{
  "dd_meta_major_version": 1,
  "dd_meta_minor_version": 4,
  "state_table_updates": [
    {
      "state_table_idx": 0,
      "update_func": 1,
      "update_arg": 1
    }
  ],
  "op_list": [
    {
      "name": "MatMulNBits_2_0",
      "type": "MladfMatMul",
      "in_args": [
        "/model/layers.0/input_layernorm/output_0.out5_4_0"
      ],
      "const_args": [
        "model.layers.0.attn.qk_proj.MatMulNBits.qweight.preformat",
        "model.layers.0.attn.qk_proj.MatMulNBits.bias.preformat",
        "model.layers.0.attn.qk_proj.MatMulNBits.scales.preformat",
        "model.layers.0.attn.qk_proj.MatMulNBits.qzeros.preformat"
      ],
      "out_args": [
        "/model/layers.0/attn/qk_proj/MatMulNBits/output_0.out5_4_0"
      ],
      "attrs": {
        "accuracy_level": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "bits": {
          "type": "int",
          "value": [
            "4"
          ]
        },
        "block_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "K": {
          "type": "int",
          "value": [
            "3072"
          ]
        },
        "N": {
          "type": "int",
          "value": [
            "6144"
          ]
        },
        "default_shape": {
          "type": "int",
          "value": [
            "1"
          ]
        },
        "op_version": {
          "type": "str",
          "value": [
            "flat"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        }
      }
    },
    {
      "name": "layers.0.attn.v_proj",
      "type": "MladfMatMul",
      "in_args": [
        "/model/layers.0/input_layernorm/output_0.out5_4_0"
      ],
      "const_args": [
        "model.layers.0.attn.v_proj.MatMulNBits.qweight.preformat",
        "model.layers.0.attn.v_proj.MatMulNBits.bias.preformat",
        "model.layers.0.attn.v_proj.MatMulNBits.scales.preformat",
        "model.layers.0.attn.v_proj.MatMulNBits.qzeros.preformat"
      ],
      "out_args": [
        "present.0.value"
      ],
      "attrs": {
        "accuracy_level": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "bits": {
          "type": "int",
          "value": [
            "4"
          ]
        },
        "block_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "K": {
          "type": "int",
          "value": [
            "3072"
          ]
        },
        "N": {
          "type": "int",
          "value": [
            "3072"
          ]
        },
        "default_shape": {
          "type": "int",
          "value": [
            "1"
          ]
        },
        "op_version": {
          "type": "str",
          "value": [
            "flat"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "total_seq_len": {
          "type": "int",
          "value": [
            "4096"
          ]
        },
        "external_buffers": {
          "type": "int",
          "value": [
            "5",
            "0",
            "3",
            "1"
          ]
        },
        "update_tensor_offsets": {
          "type": "int",
          "value": [
            "5",
            "0",
            "0",
            "192"
          ]
        }
      }
    },
    {
      "name": "/model/layers.0/attn/GroupQueryAttention",
      "type": "FLATMHA",
      "in_args": [
        "/model/layers.0/attn/qk_proj/MatMulNBits/output_0.out5_4_0",
        "past_key_values.0.key",
        "past_key_values.0.value",
        "attention_mask_const_uint",
        "sin_cos_cache_token"
      ],
      "const_args": [],
      "out_args": [
        "/model/layers.0/attn/GroupQueryAttention/output_0.out2_0",
        "present.0.key"
      ],
      "attrs": {
        "num_heads": {
          "type": "int",
          "value": [
            "32"
          ]
        },
        "kv_num_heads": {
          "type": "int",
          "value": [
            "32"
          ]
        },
        "scale": {
          "type": "float",
          "value": [
            "0.10206207633018494"
          ]
        },
        "local_window_size": {
          "type": "int",
          "value": [
            "262144"
          ]
        },
        "softcap": {
          "type": "float",
          "value": [
            "0.0"
          ]
        },
        "do_rotary": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "rotary_interleaved": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "input_shape": {
          "type": "int",
          "value": [
            "32",
            "32",
            "1",
            "4096",
            "96"
          ]
        },
        "external_buffers": {
          "type": "int",
          "value": [
            "4",
            "1",
            "0",
            "0",
            "1",
            "0",
            "0",
            "0",
            "2",
            "0",
            "1",
            "1",
            "6",
            "0",
            "2",
            "0"
          ]
        },
        "update_tensor_offsets": {
          "type": "int",
          "value": [
            "4",
            "0",
            "0",
            "192",
            "6",
            "0",
            "0",
            "192"
          ]
        }
      }
    },
    {
      "name": "layers.0.attn.o_proj",
      "type": "MladfMatMul",
      "in_args": [
        "/model/layers.0/attn/GroupQueryAttention/output_0.out2_0"
      ],
      "const_args": [
        "model.layers.0.attn.o_proj.MatMulNBits.qweight.preformat",
        "model.layers.0.attn.o_proj.MatMulNBits.bias.preformat",
        "model.layers.0.attn.o_proj.MatMulNBits.scales.preformat",
        "model.layers.0.attn.o_proj.MatMulNBits.qzeros.preformat"
      ],
      "out_args": [
        "/model/layers.0/attn/o_proj/MatMulNBits/output_0.out5_4_1"
      ],
      "attrs": {
        "accuracy_level": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "bits": {
          "type": "int",
          "value": [
            "4"
          ]
        },
        "block_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "K": {
          "type": "int",
          "value": [
            "3072"
          ]
        },
        "N": {
          "type": "int",
          "value": [
            "3072"
          ]
        },
        "default_shape": {
          "type": "int",
          "value": [
            "1"
          ]
        },
        "op_version": {
          "type": "str",
          "value": [
            "flat"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        }
      }
    },
    {
      "name": "FlatRMSAdd_4_0",
      "type": "FlatRMSAdd",
      "in_args": [
        "/model/embed_tokens/Gather/output_0.out4_0",
        "/model/layers.0/attn/o_proj/MatMulNBits/output_0.out5_4_1"
      ],
      "const_args": [
        "model.layers.0.post_attention_layernorm.weight.bf"
      ],
      "out_args": [
        "/model/layers.0/post_attention_layernorm/output_3.out4_0",
        "/model/layers.0/post_attention_layernorm/output_0.out4_0"
      ],
      "attrs": {
        "a_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "3072"
          ]
        },
        "in_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "bfloat16"
          ]
        },
        "out_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "bfloat16"
          ]
        },
        "c_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "3072"
          ]
        },
        "b_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "3072"
          ]
        },
        "is_gamma_ifm": {
          "type": "int",
          "value": [
            "1"
          ]
        }
      }
    },
    {
      "name": "FlatMLP_3_0",
      "type": "FlatMLP",
      "in_args": [
        "/model/layers.0/post_attention_layernorm/output_0.out4_0"
      ],
      "const_args": [
        "model.layers.0.mlp.gate_proj.MatMulNBits.qweight",
        "model.layers.0.mlp.gate_proj.MatMulNBits.scales.f",
        "model.layers.0.mlp.gate_proj.MatMulNBits.qzeros",
        "model.layers.0.mlp.gate_proj.MatMulNBits.bias.f",
        "model.layers.0.mlp.up_proj.MatMulNBits.qweight",
        "model.layers.0.mlp.up_proj.MatMulNBits.scales.f",
        "model.layers.0.mlp.up_proj.MatMulNBits.qzeros",
        "model.layers.0.mlp.up_proj.MatMulNBits.bias.f"
      ],
      "out_args": [
        "/model/layers.0/mlp/Mul/output_0.out3_0"
      ],
      "attrs": {
        "input_shape": {
          "type": "int",
          "value": [
            "1",
            "3072",
            "8192"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "in_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "uint8",
            "float",
            "uint8",
            "float",
            "uint8",
            "float",
            "uint8",
            "float"
          ]
        },
        "out_dtypes": {
          "type": "str",
          "value": [
            "bfloat16"
          ]
        }
      }
    },
    {
      "name": "layers.0.mlp.down_proj",
      "type": "MladfMatMul",
      "in_args": [
        "/model/layers.0/mlp/Mul/output_0.out3_0"
      ],
      "const_args": [
        "model.layers.0.mlp.down_proj.MatMulNBits.qweight.preformat",
        "model.layers.0.mlp.down_proj.MatMulNBits.bias.preformat",
        "model.layers.0.mlp.down_proj.MatMulNBits.scales.preformat",
        "model.layers.0.mlp.down_proj.MatMulNBits.qzeros.preformat"
      ],
      "out_args": [
        "/model/layers.0/mlp/down_proj/MatMulNBits/output_0.out5_4_2"
      ],
      "attrs": {
        "accuracy_level": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "bits": {
          "type": "int",
          "value": [
            "4"
          ]
        },
        "block_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "K": {
          "type": "int",
          "value": [
            "8192"
          ]
        },
        "N": {
          "type": "int",
          "value": [
            "3072"
          ]
        },
        "default_shape": {
          "type": "int",
          "value": [
            "1"
          ]
        },
        "op_version": {
          "type": "str",
          "value": [
            "flat"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        }
      }
    },
    {
      "name": "FlatRMSAdd_4_1",
      "type": "FlatRMSAdd",
      "in_args": [
        "/model/layers.0/post_attention_layernorm/output_3.out4_0",
        "/model/layers.0/mlp/down_proj/MatMulNBits/output_0.out5_4_2"
      ],
      "const_args": [
        "model.layers.1.input_layernorm.weight.bf"
      ],
      "out_args": [
        "/model/layers.1/input_layernorm/output_3.out4_1",
        "/model/layers.1/input_layernorm/output_0.out4_1"
      ],
      "attrs": {
        "a_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "3072"
          ]
        },
        "in_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "bfloat16"
          ]
        },
        "out_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "bfloat16"
          ]
        },
        "c_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "3072"
          ]
        },
        "b_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "3072"
          ]
        },
        "is_gamma_ifm": {
          "type": "int",
          "value": [
            "1"
          ]
        }
      }
    },
    {
      "name": "MatMulNBits_2_1",
      "type": "MladfMatMul",
      "in_args": [
        "/model/layers.1/input_layernorm/output_0.out4_1"
      ],
      "const_args": [
        "model.layers.1.attn.qk_proj.MatMulNBits.qweight.preformat",
        "model.layers.1.attn.qk_proj.MatMulNBits.bias.preformat",
        "model.layers.1.attn.qk_proj.MatMulNBits.scales.preformat",
        "model.layers.1.attn.qk_proj.MatMulNBits.qzeros.preformat"
      ],
      "out_args": [
        "/model/layers.1/attn/qk_proj/MatMulNBits/output_0.out5_4_3"
      ],
      "attrs": {
        "accuracy_level": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "bits": {
          "type": "int",
          "value": [
            "4"
          ]
        },
        "block_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "K": {
          "type": "int",
          "value": [
            "3072"
          ]
        },
        "N": {
          "type": "int",
          "value": [
            "6144"
          ]
        },
        "default_shape": {
          "type": "int",
          "value": [
            "1"
          ]
        },
        "op_version": {
          "type": "str",
          "value": [
            "flat"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        }
      }
    },
    {
      "name": "layers.1.attn.v_proj",
      "type": "MladfMatMul",
      "in_args": [
        "/model/layers.1/input_layernorm/output_0.out4_1"
      ],
      "const_args": [
        "model.layers.1.attn.v_proj.MatMulNBits.qweight.preformat",
        "model.layers.1.attn.v_proj.MatMulNBits.bias.preformat",
        "model.layers.1.attn.v_proj.MatMulNBits.scales.preformat",
        "model.layers.1.attn.v_proj.MatMulNBits.qzeros.preformat"
      ],
      "out_args": [
        "present.1.value"
      ],
      "attrs": {
        "accuracy_level": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "bits": {
          "type": "int",
          "value": [
            "4"
          ]
        },
        "block_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "K": {
          "type": "int",
          "value": [
            "3072"
          ]
        },
        "N": {
          "type": "int",
          "value": [
            "3072"
          ]
        },
        "default_shape": {
          "type": "int",
          "value": [
            "1"
          ]
        },
        "op_version": {
          "type": "str",
          "value": [
            "flat"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "total_seq_len": {
          "type": "int",
          "value": [
            "4096"
          ]
        },
        "external_buffers": {
          "type": "int",
          "value": [
            "5",
            "0",
            "7",
            "3"
          ]
        },
        "update_tensor_offsets": {
          "type": "int",
          "value": [
            "5",
            "0",
            "0",
            "192"
          ]
        }
      }
    },
    {
      "name": "/model/layers.1/attn/GroupQueryAttention",
      "type": "FLATMHA",
      "in_args": [
        "/model/layers.1/attn/qk_proj/MatMulNBits/output_0.out5_4_3",
        "past_key_values.1.key",
        "past_key_values.1.value",
        "attention_mask_const_uint",
        "sin_cos_cache_token"
      ],
      "const_args": [],
      "out_args": [
        "/model/layers.1/attn/GroupQueryAttention/output_0.out2_1",
        "present.1.key"
      ],
      "attrs": {
        "num_heads": {
          "type": "int",
          "value": [
            "32"
          ]
        },
        "kv_num_heads": {
          "type": "int",
          "value": [
            "32"
          ]
        },
        "scale": {
          "type": "float",
          "value": [
            "0.10206207633018494"
          ]
        },
        "local_window_size": {
          "type": "int",
          "value": [
            "262144"
          ]
        },
        "softcap": {
          "type": "float",
          "value": [
            "0.0"
          ]
        },
        "do_rotary": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "rotary_interleaved": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "input_shape": {
          "type": "int",
          "value": [
            "32",
            "32",
            "1",
            "4096",
            "96"
          ]
        },
        "external_buffers": {
          "type": "int",
          "value": [
            "4",
            "1",
            "0",
            "0",
            "1",
            "0",
            "4",
            "2",
            "2",
            "0",
            "5",
            "3",
            "6",
            "0",
            "6",
            "2"
          ]
        },
        "update_tensor_offsets": {
          "type": "int",
          "value": [
            "4",
            "0",
            "0",
            "192",
            "6",
            "0",
            "0",
            "192"
          ]
        }
      }
    },
    {
      "name": "layers.1.attn.o_proj",
      "type": "MladfMatMul",
      "in_args": [
        "/model/layers.1/attn/GroupQueryAttention/output_0.out2_1"
      ],
      "const_args": [
        "model.layers.1.attn.o_proj.MatMulNBits.qweight.preformat",
        "model.layers.1.attn.o_proj.MatMulNBits.bias.preformat",
        "model.layers.1.attn.o_proj.MatMulNBits.scales.preformat",
        "model.layers.1.attn.o_proj.MatMulNBits.qzeros.preformat"
      ],
      "out_args": [
        "/model/layers.1/attn/o_proj/MatMulNBits/output_0.out5_4_4"
      ],
      "attrs": {
        "accuracy_level": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "bits": {
          "type": "int",
          "value": [
            "4"
          ]
        },
        "block_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "K": {
          "type": "int",
          "value": [
            "3072"
          ]
        },
        "N": {
          "type": "int",
          "value": [
            "3072"
          ]
        },
        "default_shape": {
          "type": "int",
          "value": [
            "1"
          ]
        },
        "op_version": {
          "type": "str",
          "value": [
            "flat"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        }
      }
    },
    {
      "name": "FlatRMSAdd_4_2",
      "type": "FlatRMSAdd",
      "in_args": [
        "/model/layers.1/input_layernorm/output_3.out4_1",
        "/model/layers.1/attn/o_proj/MatMulNBits/output_0.out5_4_4"
      ],
      "const_args": [
        "model.layers.1.post_attention_layernorm.weight.bf"
      ],
      "out_args": [
        "/model/layers.1/post_attention_layernorm/output_3.out4_2",
        "/model/layers.1/post_attention_layernorm/output_0.out4_2"
      ],
      "attrs": {
        "a_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "3072"
          ]
        },
        "in_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "bfloat16"
          ]
        },
        "out_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "bfloat16"
          ]
        },
        "c_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "3072"
          ]
        },
        "b_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "3072"
          ]
        },
        "is_gamma_ifm": {
          "type": "int",
          "value": [
            "1"
          ]
        }
      }
    },
    {
      "name": "FlatMLP_3_1",
      "type": "FlatMLP",
      "in_args": [
        "/model/layers.1/post_attention_layernorm/output_0.out4_2"
      ],
      "const_args": [
        "model.layers.1.mlp.gate_proj.MatMulNBits.qweight",
        "model.layers.1.mlp.gate_proj.MatMulNBits.scales.f",
        "model.layers.1.mlp.gate_proj.MatMulNBits.qzeros",
        "model.layers.1.mlp.gate_proj.MatMulNBits.bias.f",
        "model.layers.1.mlp.up_proj.MatMulNBits.qweight",
        "model.layers.1.mlp.up_proj.MatMulNBits.scales.f",
        "model.layers.1.mlp.up_proj.MatMulNBits.qzeros",
        "model.layers.1.mlp.up_proj.MatMulNBits.bias.f"
      ],
      "out_args": [
        "/model/layers.1/mlp/Mul/output_0.out3_1"
      ],
      "attrs": {
        "input_shape": {
          "type": "int",
          "value": [
            "1",
            "3072",
            "8192"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "in_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "uint8",
            "float",
            "uint8",
            "float",
            "uint8",
            "float",
            "uint8",
            "float"
          ]
        },
        "out_dtypes": {
          "type": "str",
          "value": [
            "bfloat16"
          ]
        }
      }
    },
    {
      "name": "layers.1.mlp.down_proj",
      "type": "MladfMatMul",
      "in_args": [
        "/model/layers.1/mlp/Mul/output_0.out3_1"
      ],
      "const_args": [
        "model.layers.1.mlp.down_proj.MatMulNBits.qweight.preformat",
        "model.layers.1.mlp.down_proj.MatMulNBits.bias.preformat",
        "model.layers.1.mlp.down_proj.MatMulNBits.scales.preformat",
        "model.layers.1.mlp.down_proj.MatMulNBits.qzeros.preformat"
      ],
      "out_args": [
        "/model/layers.1/mlp/down_proj/MatMulNBits/output_0.out5_4_5"
      ],
      "attrs": {
        "accuracy_level": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "bits": {
          "type": "int",
          "value": [
            "4"
          ]
        },
        "block_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "K": {
          "type": "int",
          "value": [
            "8192"
          ]
        },
        "N": {
          "type": "int",
          "value": [
            "3072"
          ]
        },
        "default_shape": {
          "type": "int",
          "value": [
            "1"
          ]
        },
        "op_version": {
          "type": "str",
          "value": [
            "flat"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        }
      }
    },
    {
      "name": "FlatRMSAdd_4_3",
      "type": "FlatRMSAdd",
      "in_args": [
        "/model/layers.1/post_attention_layernorm/output_3.out4_2",
        "/model/layers.1/mlp/down_proj/MatMulNBits/output_0.out5_4_5"
      ],
      "const_args": [
        "model.layers.2.input_layernorm.weight.bf"
      ],
      "out_args": [
        "/model/layers.2/input_layernorm/output_3.out4_3",
        "/model/layers.2/input_layernorm/output_0.out4_3"
      ],
      "attrs": {
        "a_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "3072"
          ]
        },
        "in_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "bfloat16"
          ]
        },
        "out_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "bfloat16"
          ]
        },
        "c_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "3072"
          ]
        },
        "b_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "3072"
          ]
        },
        "is_gamma_ifm": {
          "type": "int",
          "value": [
            "1"
          ]
        }
      }
    },
    {
      "name": "MatMulNBits_2_2",
      "type": "MladfMatMul",
      "in_args": [
        "/model/layers.2/input_layernorm/output_0.out4_3"
      ],
      "const_args": [
        "model.layers.2.attn.qk_proj.MatMulNBits.qweight.preformat",
        "model.layers.2.attn.qk_proj.MatMulNBits.bias.preformat",
        "model.layers.2.attn.qk_proj.MatMulNBits.scales.preformat",
        "model.layers.2.attn.qk_proj.MatMulNBits.qzeros.preformat"
      ],
      "out_args": [
        "/model/layers.2/attn/qk_proj/MatMulNBits/output_0.out5_4_6"
      ],
      "attrs": {
        "accuracy_level": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "bits": {
          "type": "int",
          "value": [
            "4"
          ]
        },
        "block_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "K": {
          "type": "int",
          "value": [
            "3072"
          ]
        },
        "N": {
          "type": "int",
          "value": [
            "6144"
          ]
        },
        "default_shape": {
          "type": "int",
          "value": [
            "1"
          ]
        },
        "op_version": {
          "type": "str",
          "value": [
            "flat"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        }
      }
    },
    {
      "name": "layers.2.attn.v_proj",
      "type": "MladfMatMul",
      "in_args": [
        "/model/layers.2/input_layernorm/output_0.out4_3"
      ],
      "const_args": [
        "model.layers.2.attn.v_proj.MatMulNBits.qweight.preformat",
        "model.layers.2.attn.v_proj.MatMulNBits.bias.preformat",
        "model.layers.2.attn.v_proj.MatMulNBits.scales.preformat",
        "model.layers.2.attn.v_proj.MatMulNBits.qzeros.preformat"
      ],
      "out_args": [
        "present.2.value"
      ],
      "attrs": {
        "accuracy_level": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "bits": {
          "type": "int",
          "value": [
            "4"
          ]
        },
        "block_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "K": {
          "type": "int",
          "value": [
            "3072"
          ]
        },
        "N": {
          "type": "int",
          "value": [
            "3072"
          ]
        },
        "default_shape": {
          "type": "int",
          "value": [
            "1"
          ]
        },
        "op_version": {
          "type": "str",
          "value": [
            "flat"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "total_seq_len": {
          "type": "int",
          "value": [
            "4096"
          ]
        },
        "external_buffers": {
          "type": "int",
          "value": [
            "5",
            "0",
            "11",
            "5"
          ]
        },
        "update_tensor_offsets": {
          "type": "int",
          "value": [
            "5",
            "0",
            "0",
            "192"
          ]
        }
      }
    },
    {
      "name": "/model/layers.2/attn/GroupQueryAttention",
      "type": "FLATMHA",
      "in_args": [
        "/model/layers.2/attn/qk_proj/MatMulNBits/output_0.out5_4_6",
        "past_key_values.2.key",
        "past_key_values.2.value",
        "attention_mask_const_uint",
        "sin_cos_cache_token"
      ],
      "const_args": [],
      "out_args": [
        "/model/layers.2/attn/GroupQueryAttention/output_0.out2_2",
        "present.2.key"
      ],
      "attrs": {
        "num_heads": {
          "type": "int",
          "value": [
            "32"
          ]
        },
        "kv_num_heads": {
          "type": "int",
          "value": [
            "32"
          ]
        },
        "scale": {
          "type": "float",
          "value": [
            "0.10206207633018494"
          ]
        },
        "local_window_size": {
          "type": "int",
          "value": [
            "262144"
          ]
        },
        "softcap": {
          "type": "float",
          "value": [
            "0.0"
          ]
        },
        "do_rotary": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "rotary_interleaved": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "input_shape": {
          "type": "int",
          "value": [
            "32",
            "32",
            "1",
            "4096",
            "96"
          ]
        },
        "external_buffers": {
          "type": "int",
          "value": [
            "4",
            "1",
            "0",
            "0",
            "1",
            "0",
            "8",
            "4",
            "2",
            "0",
            "9",
            "5",
            "6",
            "0",
            "10",
            "4"
          ]
        },
        "update_tensor_offsets": {
          "type": "int",
          "value": [
            "4",
            "0",
            "0",
            "192",
            "6",
            "0",
            "0",
            "192"
          ]
        }
      }
    },
    {
      "name": "layers.2.attn.o_proj",
      "type": "MladfMatMul",
      "in_args": [
        "/model/layers.2/attn/GroupQueryAttention/output_0.out2_2"
      ],
      "const_args": [
        "model.layers.2.attn.o_proj.MatMulNBits.qweight.preformat",
        "model.layers.2.attn.o_proj.MatMulNBits.bias.preformat",
        "model.layers.2.attn.o_proj.MatMulNBits.scales.preformat",
        "model.layers.2.attn.o_proj.MatMulNBits.qzeros.preformat"
      ],
      "out_args": [
        "/model/layers.2/attn/o_proj/MatMulNBits/output_0.out5_4_7"
      ],
      "attrs": {
        "accuracy_level": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "bits": {
          "type": "int",
          "value": [
            "4"
          ]
        },
        "block_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "K": {
          "type": "int",
          "value": [
            "3072"
          ]
        },
        "N": {
          "type": "int",
          "value": [
            "3072"
          ]
        },
        "default_shape": {
          "type": "int",
          "value": [
            "1"
          ]
        },
        "op_version": {
          "type": "str",
          "value": [
            "flat"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        }
      }
    },
    {
      "name": "FlatRMSAdd_4_4",
      "type": "FlatRMSAdd",
      "in_args": [
        "/model/layers.2/input_layernorm/output_3.out4_3",
        "/model/layers.2/attn/o_proj/MatMulNBits/output_0.out5_4_7"
      ],
      "const_args": [
        "model.layers.2.post_attention_layernorm.weight.bf"
      ],
      "out_args": [
        "/model/layers.2/post_attention_layernorm/output_3.out4_4",
        "/model/layers.2/post_attention_layernorm/output_0.out4_4"
      ],
      "attrs": {
        "a_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "3072"
          ]
        },
        "in_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "bfloat16"
          ]
        },
        "out_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "bfloat16"
          ]
        },
        "c_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "3072"
          ]
        },
        "b_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "3072"
          ]
        },
        "is_gamma_ifm": {
          "type": "int",
          "value": [
            "1"
          ]
        }
      }
    },
    {
      "name": "FlatMLP_3_2",
      "type": "FlatMLP",
      "in_args": [
        "/model/layers.2/post_attention_layernorm/output_0.out4_4"
      ],
      "const_args": [
        "model.layers.2.mlp.gate_proj.MatMulNBits.qweight",
        "model.layers.2.mlp.gate_proj.MatMulNBits.scales.f",
        "model.layers.2.mlp.gate_proj.MatMulNBits.qzeros",
        "model.layers.2.mlp.gate_proj.MatMulNBits.bias.f",
        "model.layers.2.mlp.up_proj.MatMulNBits.qweight",
        "model.layers.2.mlp.up_proj.MatMulNBits.scales.f",
        "model.layers.2.mlp.up_proj.MatMulNBits.qzeros",
        "model.layers.2.mlp.up_proj.MatMulNBits.bias.f"
      ],
      "out_args": [
        "/model/layers.2/mlp/Mul/output_0.out3_2"
      ],
      "attrs": {
        "input_shape": {
          "type": "int",
          "value": [
            "1",
            "3072",
            "8192"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "in_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "uint8",
            "float",
            "uint8",
            "float",
            "uint8",
            "float",
            "uint8",
            "float"
          ]
        },
        "out_dtypes": {
          "type": "str",
          "value": [
            "bfloat16"
          ]
        }
      }
    },
    {
      "name": "layers.2.mlp.down_proj",
      "type": "MladfMatMul",
      "in_args": [
        "/model/layers.2/mlp/Mul/output_0.out3_2"
      ],
      "const_args": [
        "model.layers.2.mlp.down_proj.MatMulNBits.qweight.preformat",
        "model.layers.2.mlp.down_proj.MatMulNBits.bias.preformat",
        "model.layers.2.mlp.down_proj.MatMulNBits.scales.preformat",
        "model.layers.2.mlp.down_proj.MatMulNBits.qzeros.preformat"
      ],
      "out_args": [
        "/model/layers.2/mlp/down_proj/MatMulNBits/output_0.out5_4_8"
      ],
      "attrs": {
        "accuracy_level": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "bits": {
          "type": "int",
          "value": [
            "4"
          ]
        },
        "block_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "K": {
          "type": "int",
          "value": [
            "8192"
          ]
        },
        "N": {
          "type": "int",
          "value": [
            "3072"
          ]
        },
        "default_shape": {
          "type": "int",
          "value": [
            "1"
          ]
        },
        "op_version": {
          "type": "str",
          "value": [
            "flat"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        }
      }
    },
    {
      "name": "FlatRMSAdd_4_5",
      "type": "FlatRMSAdd",
      "in_args": [
        "/model/layers.2/post_attention_layernorm/output_3.out4_4",
        "/model/layers.2/mlp/down_proj/MatMulNBits/output_0.out5_4_8"
      ],
      "const_args": [
        "model.layers.3.input_layernorm.weight.bf"
      ],
      "out_args": [
        "/model/layers.3/input_layernorm/output_3.out4_5",
        "/model/layers.3/input_layernorm/output_0.out4_5"
      ],
      "attrs": {
        "a_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "3072"
          ]
        },
        "in_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "bfloat16"
          ]
        },
        "out_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "bfloat16"
          ]
        },
        "c_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "3072"
          ]
        },
        "b_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "3072"
          ]
        },
        "is_gamma_ifm": {
          "type": "int",
          "value": [
            "1"
          ]
        }
      }
    },
    {
      "name": "MatMulNBits_2_3",
      "type": "MladfMatMul",
      "in_args": [
        "/model/layers.3/input_layernorm/output_0.out4_5"
      ],
      "const_args": [
        "model.layers.3.attn.qk_proj.MatMulNBits.qweight.preformat",
        "model.layers.3.attn.qk_proj.MatMulNBits.bias.preformat",
        "model.layers.3.attn.qk_proj.MatMulNBits.scales.preformat",
        "model.layers.3.attn.qk_proj.MatMulNBits.qzeros.preformat"
      ],
      "out_args": [
        "/model/layers.3/attn/qk_proj/MatMulNBits/output_0.out5_4_9"
      ],
      "attrs": {
        "accuracy_level": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "bits": {
          "type": "int",
          "value": [
            "4"
          ]
        },
        "block_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "K": {
          "type": "int",
          "value": [
            "3072"
          ]
        },
        "N": {
          "type": "int",
          "value": [
            "6144"
          ]
        },
        "default_shape": {
          "type": "int",
          "value": [
            "1"
          ]
        },
        "op_version": {
          "type": "str",
          "value": [
            "flat"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        }
      }
    },
    {
      "name": "layers.3.attn.v_proj",
      "type": "MladfMatMul",
      "in_args": [
        "/model/layers.3/input_layernorm/output_0.out4_5"
      ],
      "const_args": [
        "model.layers.3.attn.v_proj.MatMulNBits.qweight.preformat",
        "model.layers.3.attn.v_proj.MatMulNBits.bias.preformat",
        "model.layers.3.attn.v_proj.MatMulNBits.scales.preformat",
        "model.layers.3.attn.v_proj.MatMulNBits.qzeros.preformat"
      ],
      "out_args": [
        "present.3.value"
      ],
      "attrs": {
        "accuracy_level": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "bits": {
          "type": "int",
          "value": [
            "4"
          ]
        },
        "block_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "K": {
          "type": "int",
          "value": [
            "3072"
          ]
        },
        "N": {
          "type": "int",
          "value": [
            "3072"
          ]
        },
        "default_shape": {
          "type": "int",
          "value": [
            "1"
          ]
        },
        "op_version": {
          "type": "str",
          "value": [
            "flat"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "total_seq_len": {
          "type": "int",
          "value": [
            "4096"
          ]
        },
        "external_buffers": {
          "type": "int",
          "value": [
            "5",
            "0",
            "15",
            "7"
          ]
        },
        "update_tensor_offsets": {
          "type": "int",
          "value": [
            "5",
            "0",
            "0",
            "192"
          ]
        }
      }
    },
    {
      "name": "/model/layers.3/attn/GroupQueryAttention",
      "type": "FLATMHA",
      "in_args": [
        "/model/layers.3/attn/qk_proj/MatMulNBits/output_0.out5_4_9",
        "past_key_values.3.key",
        "past_key_values.3.value",
        "attention_mask_const_uint",
        "sin_cos_cache_token"
      ],
      "const_args": [],
      "out_args": [
        "/model/layers.3/attn/GroupQueryAttention/output_0.out2_3",
        "present.3.key"
      ],
      "attrs": {
        "num_heads": {
          "type": "int",
          "value": [
            "32"
          ]
        },
        "kv_num_heads": {
          "type": "int",
          "value": [
            "32"
          ]
        },
        "scale": {
          "type": "float",
          "value": [
            "0.10206207633018494"
          ]
        },
        "local_window_size": {
          "type": "int",
          "value": [
            "262144"
          ]
        },
        "softcap": {
          "type": "float",
          "value": [
            "0.0"
          ]
        },
        "do_rotary": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "rotary_interleaved": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "input_shape": {
          "type": "int",
          "value": [
            "32",
            "32",
            "1",
            "4096",
            "96"
          ]
        },
        "external_buffers": {
          "type": "int",
          "value": [
            "4",
            "1",
            "0",
            "0",
            "1",
            "0",
            "12",
            "6",
            "2",
            "0",
            "13",
            "7",
            "6",
            "0",
            "14",
            "6"
          ]
        },
        "update_tensor_offsets": {
          "type": "int",
          "value": [
            "4",
            "0",
            "0",
            "192",
            "6",
            "0",
            "0",
            "192"
          ]
        }
      }
    },
    {
      "name": "layers.3.attn.o_proj",
      "type": "MladfMatMul",
      "in_args": [
        "/model/layers.3/attn/GroupQueryAttention/output_0.out2_3"
      ],
      "const_args": [
        "model.layers.3.attn.o_proj.MatMulNBits.qweight.preformat",
        "model.layers.3.attn.o_proj.MatMulNBits.bias.preformat",
        "model.layers.3.attn.o_proj.MatMulNBits.scales.preformat",
        "model.layers.3.attn.o_proj.MatMulNBits.qzeros.preformat"
      ],
      "out_args": [
        "/model/layers.3/attn/o_proj/MatMulNBits/output_0.out5_4_10"
      ],
      "attrs": {
        "accuracy_level": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "bits": {
          "type": "int",
          "value": [
            "4"
          ]
        },
        "block_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "K": {
          "type": "int",
          "value": [
            "3072"
          ]
        },
        "N": {
          "type": "int",
          "value": [
            "3072"
          ]
        },
        "default_shape": {
          "type": "int",
          "value": [
            "1"
          ]
        },
        "op_version": {
          "type": "str",
          "value": [
            "flat"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        }
      }
    },
    {
      "name": "FlatRMSAdd_4_6",
      "type": "FlatRMSAdd",
      "in_args": [
        "/model/layers.3/input_layernorm/output_3.out4_5",
        "/model/layers.3/attn/o_proj/MatMulNBits/output_0.out5_4_10"
      ],
      "const_args": [
        "model.layers.3.post_attention_layernorm.weight.bf"
      ],
      "out_args": [
        "/model/layers.3/post_attention_layernorm/output_3.out4_6",
        "/model/layers.3/post_attention_layernorm/output_0.out4_6"
      ],
      "attrs": {
        "a_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "3072"
          ]
        },
        "in_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "bfloat16"
          ]
        },
        "out_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "bfloat16"
          ]
        },
        "c_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "3072"
          ]
        },
        "b_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "3072"
          ]
        },
        "is_gamma_ifm": {
          "type": "int",
          "value": [
            "1"
          ]
        }
      }
    },
    {
      "name": "FlatMLP_3_3",
      "type": "FlatMLP",
      "in_args": [
        "/model/layers.3/post_attention_layernorm/output_0.out4_6"
      ],
      "const_args": [
        "model.layers.3.mlp.gate_proj.MatMulNBits.qweight",
        "model.layers.3.mlp.gate_proj.MatMulNBits.scales.f",
        "model.layers.3.mlp.gate_proj.MatMulNBits.qzeros",
        "model.layers.3.mlp.gate_proj.MatMulNBits.bias.f",
        "model.layers.3.mlp.up_proj.MatMulNBits.qweight",
        "model.layers.3.mlp.up_proj.MatMulNBits.scales.f",
        "model.layers.3.mlp.up_proj.MatMulNBits.qzeros",
        "model.layers.3.mlp.up_proj.MatMulNBits.bias.f"
      ],
      "out_args": [
        "/model/layers.3/mlp/Mul/output_0.out3_3"
      ],
      "attrs": {
        "input_shape": {
          "type": "int",
          "value": [
            "1",
            "3072",
            "8192"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "in_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "uint8",
            "float",
            "uint8",
            "float",
            "uint8",
            "float",
            "uint8",
            "float"
          ]
        },
        "out_dtypes": {
          "type": "str",
          "value": [
            "bfloat16"
          ]
        }
      }
    },
    {
      "name": "layers.3.mlp.down_proj",
      "type": "MladfMatMul",
      "in_args": [
        "/model/layers.3/mlp/Mul/output_0.out3_3"
      ],
      "const_args": [
        "model.layers.3.mlp.down_proj.MatMulNBits.qweight.preformat",
        "model.layers.3.mlp.down_proj.MatMulNBits.bias.preformat",
        "model.layers.3.mlp.down_proj.MatMulNBits.scales.preformat",
        "model.layers.3.mlp.down_proj.MatMulNBits.qzeros.preformat"
      ],
      "out_args": [
        "/model/layers.3/mlp/down_proj/MatMulNBits/output_0.out5_4_11"
      ],
      "attrs": {
        "accuracy_level": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "bits": {
          "type": "int",
          "value": [
            "4"
          ]
        },
        "block_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "K": {
          "type": "int",
          "value": [
            "8192"
          ]
        },
        "N": {
          "type": "int",
          "value": [
            "3072"
          ]
        },
        "default_shape": {
          "type": "int",
          "value": [
            "1"
          ]
        },
        "op_version": {
          "type": "str",
          "value": [
            "flat"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        }
      }
    },
    {
      "name": "FlatRMSAdd_4_7",
      "type": "FlatRMSAdd",
      "in_args": [
        "/model/layers.3/post_attention_layernorm/output_3.out4_6",
        "/model/layers.3/mlp/down_proj/MatMulNBits/output_0.out5_4_11"
      ],
      "const_args": [
        "model.layers.4.input_layernorm.weight.bf"
      ],
      "out_args": [
        "/model/layers.4/input_layernorm/output_3.out4_7",
        "/model/layers.4/input_layernorm/output_0.out4_7"
      ],
      "attrs": {
        "a_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "3072"
          ]
        },
        "in_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "bfloat16"
          ]
        },
        "out_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "bfloat16"
          ]
        },
        "c_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "3072"
          ]
        },
        "b_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "3072"
          ]
        },
        "is_gamma_ifm": {
          "type": "int",
          "value": [
            "1"
          ]
        }
      }
    },
    {
      "name": "MatMulNBits_2_4",
      "type": "MladfMatMul",
      "in_args": [
        "/model/layers.4/input_layernorm/output_0.out4_7"
      ],
      "const_args": [
        "model.layers.4.attn.qk_proj.MatMulNBits.qweight.preformat",
        "model.layers.4.attn.qk_proj.MatMulNBits.bias.preformat",
        "model.layers.4.attn.qk_proj.MatMulNBits.scales.preformat",
        "model.layers.4.attn.qk_proj.MatMulNBits.qzeros.preformat"
      ],
      "out_args": [
        "/model/layers.4/attn/qk_proj/MatMulNBits/output_0.out5_4_12"
      ],
      "attrs": {
        "accuracy_level": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "bits": {
          "type": "int",
          "value": [
            "4"
          ]
        },
        "block_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "K": {
          "type": "int",
          "value": [
            "3072"
          ]
        },
        "N": {
          "type": "int",
          "value": [
            "6144"
          ]
        },
        "default_shape": {
          "type": "int",
          "value": [
            "1"
          ]
        },
        "op_version": {
          "type": "str",
          "value": [
            "flat"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        }
      }
    },
    {
      "name": "layers.4.attn.v_proj",
      "type": "MladfMatMul",
      "in_args": [
        "/model/layers.4/input_layernorm/output_0.out4_7"
      ],
      "const_args": [
        "model.layers.4.attn.v_proj.MatMulNBits.qweight.preformat",
        "model.layers.4.attn.v_proj.MatMulNBits.bias.preformat",
        "model.layers.4.attn.v_proj.MatMulNBits.scales.preformat",
        "model.layers.4.attn.v_proj.MatMulNBits.qzeros.preformat"
      ],
      "out_args": [
        "present.4.value"
      ],
      "attrs": {
        "accuracy_level": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "bits": {
          "type": "int",
          "value": [
            "4"
          ]
        },
        "block_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "K": {
          "type": "int",
          "value": [
            "3072"
          ]
        },
        "N": {
          "type": "int",
          "value": [
            "3072"
          ]
        },
        "default_shape": {
          "type": "int",
          "value": [
            "1"
          ]
        },
        "op_version": {
          "type": "str",
          "value": [
            "flat"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "total_seq_len": {
          "type": "int",
          "value": [
            "4096"
          ]
        },
        "external_buffers": {
          "type": "int",
          "value": [
            "5",
            "0",
            "19",
            "9"
          ]
        },
        "update_tensor_offsets": {
          "type": "int",
          "value": [
            "5",
            "0",
            "0",
            "192"
          ]
        }
      }
    },
    {
      "name": "/model/layers.4/attn/GroupQueryAttention",
      "type": "FLATMHA",
      "in_args": [
        "/model/layers.4/attn/qk_proj/MatMulNBits/output_0.out5_4_12",
        "past_key_values.4.key",
        "past_key_values.4.value",
        "attention_mask_const_uint",
        "sin_cos_cache_token"
      ],
      "const_args": [],
      "out_args": [
        "/model/layers.4/attn/GroupQueryAttention/output_0.out2_4",
        "present.4.key"
      ],
      "attrs": {
        "num_heads": {
          "type": "int",
          "value": [
            "32"
          ]
        },
        "kv_num_heads": {
          "type": "int",
          "value": [
            "32"
          ]
        },
        "scale": {
          "type": "float",
          "value": [
            "0.10206207633018494"
          ]
        },
        "local_window_size": {
          "type": "int",
          "value": [
            "262144"
          ]
        },
        "softcap": {
          "type": "float",
          "value": [
            "0.0"
          ]
        },
        "do_rotary": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "rotary_interleaved": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "input_shape": {
          "type": "int",
          "value": [
            "32",
            "32",
            "1",
            "4096",
            "96"
          ]
        },
        "external_buffers": {
          "type": "int",
          "value": [
            "4",
            "1",
            "0",
            "0",
            "1",
            "0",
            "16",
            "8",
            "2",
            "0",
            "17",
            "9",
            "6",
            "0",
            "18",
            "8"
          ]
        },
        "update_tensor_offsets": {
          "type": "int",
          "value": [
            "4",
            "0",
            "0",
            "192",
            "6",
            "0",
            "0",
            "192"
          ]
        }
      }
    },
    {
      "name": "layers.4.attn.o_proj",
      "type": "MladfMatMul",
      "in_args": [
        "/model/layers.4/attn/GroupQueryAttention/output_0.out2_4"
      ],
      "const_args": [
        "model.layers.4.attn.o_proj.MatMulNBits.qweight.preformat",
        "model.layers.4.attn.o_proj.MatMulNBits.bias.preformat",
        "model.layers.4.attn.o_proj.MatMulNBits.scales.preformat",
        "model.layers.4.attn.o_proj.MatMulNBits.qzeros.preformat"
      ],
      "out_args": [
        "/model/layers.4/attn/o_proj/MatMulNBits/output_0.out5_4_13"
      ],
      "attrs": {
        "accuracy_level": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "bits": {
          "type": "int",
          "value": [
            "4"
          ]
        },
        "block_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "K": {
          "type": "int",
          "value": [
            "3072"
          ]
        },
        "N": {
          "type": "int",
          "value": [
            "3072"
          ]
        },
        "default_shape": {
          "type": "int",
          "value": [
            "1"
          ]
        },
        "op_version": {
          "type": "str",
          "value": [
            "flat"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        }
      }
    },
    {
      "name": "FlatRMSAdd_4_8",
      "type": "FlatRMSAdd",
      "in_args": [
        "/model/layers.4/input_layernorm/output_3.out4_7",
        "/model/layers.4/attn/o_proj/MatMulNBits/output_0.out5_4_13"
      ],
      "const_args": [
        "model.layers.4.post_attention_layernorm.weight.bf"
      ],
      "out_args": [
        "/model/layers.4/post_attention_layernorm/output_3.out4_8",
        "/model/layers.4/post_attention_layernorm/output_0.out4_8"
      ],
      "attrs": {
        "a_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "3072"
          ]
        },
        "in_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "bfloat16"
          ]
        },
        "out_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "bfloat16"
          ]
        },
        "c_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "3072"
          ]
        },
        "b_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "3072"
          ]
        },
        "is_gamma_ifm": {
          "type": "int",
          "value": [
            "1"
          ]
        }
      }
    },
    {
      "name": "FlatMLP_3_4",
      "type": "FlatMLP",
      "in_args": [
        "/model/layers.4/post_attention_layernorm/output_0.out4_8"
      ],
      "const_args": [
        "model.layers.4.mlp.gate_proj.MatMulNBits.qweight",
        "model.layers.4.mlp.gate_proj.MatMulNBits.scales.f",
        "model.layers.4.mlp.gate_proj.MatMulNBits.qzeros",
        "model.layers.4.mlp.gate_proj.MatMulNBits.bias.f",
        "model.layers.4.mlp.up_proj.MatMulNBits.qweight",
        "model.layers.4.mlp.up_proj.MatMulNBits.scales.f",
        "model.layers.4.mlp.up_proj.MatMulNBits.qzeros",
        "model.layers.4.mlp.up_proj.MatMulNBits.bias.f"
      ],
      "out_args": [
        "/model/layers.4/mlp/Mul/output_0.out3_4"
      ],
      "attrs": {
        "input_shape": {
          "type": "int",
          "value": [
            "1",
            "3072",
            "8192"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "in_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "uint8",
            "float",
            "uint8",
            "float",
            "uint8",
            "float",
            "uint8",
            "float"
          ]
        },
        "out_dtypes": {
          "type": "str",
          "value": [
            "bfloat16"
          ]
        }
      }
    },
    {
      "name": "layers.4.mlp.down_proj",
      "type": "MladfMatMul",
      "in_args": [
        "/model/layers.4/mlp/Mul/output_0.out3_4"
      ],
      "const_args": [
        "model.layers.4.mlp.down_proj.MatMulNBits.qweight.preformat",
        "model.layers.4.mlp.down_proj.MatMulNBits.bias.preformat",
        "model.layers.4.mlp.down_proj.MatMulNBits.scales.preformat",
        "model.layers.4.mlp.down_proj.MatMulNBits.qzeros.preformat"
      ],
      "out_args": [
        "/model/layers.4/mlp/down_proj/MatMulNBits/output_0.out5_4_14"
      ],
      "attrs": {
        "accuracy_level": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "bits": {
          "type": "int",
          "value": [
            "4"
          ]
        },
        "block_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "K": {
          "type": "int",
          "value": [
            "8192"
          ]
        },
        "N": {
          "type": "int",
          "value": [
            "3072"
          ]
        },
        "default_shape": {
          "type": "int",
          "value": [
            "1"
          ]
        },
        "op_version": {
          "type": "str",
          "value": [
            "flat"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        }
      }
    },
    {
      "name": "FlatRMSAdd_4_9",
      "type": "FlatRMSAdd",
      "in_args": [
        "/model/layers.4/post_attention_layernorm/output_3.out4_8",
        "/model/layers.4/mlp/down_proj/MatMulNBits/output_0.out5_4_14"
      ],
      "const_args": [
        "model.layers.5.input_layernorm.weight.bf"
      ],
      "out_args": [
        "/model/layers.5/input_layernorm/output_3.out4_9",
        "/model/layers.5/input_layernorm/output_0.out4_9"
      ],
      "attrs": {
        "a_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "3072"
          ]
        },
        "in_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "bfloat16"
          ]
        },
        "out_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "bfloat16"
          ]
        },
        "c_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "3072"
          ]
        },
        "b_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "3072"
          ]
        },
        "is_gamma_ifm": {
          "type": "int",
          "value": [
            "1"
          ]
        }
      }
    },
    {
      "name": "MatMulNBits_2_5",
      "type": "MladfMatMul",
      "in_args": [
        "/model/layers.5/input_layernorm/output_0.out4_9"
      ],
      "const_args": [
        "model.layers.5.attn.qk_proj.MatMulNBits.qweight.preformat",
        "model.layers.5.attn.qk_proj.MatMulNBits.bias.preformat",
        "model.layers.5.attn.qk_proj.MatMulNBits.scales.preformat",
        "model.layers.5.attn.qk_proj.MatMulNBits.qzeros.preformat"
      ],
      "out_args": [
        "/model/layers.5/attn/qk_proj/MatMulNBits/output_0.out5_4_15"
      ],
      "attrs": {
        "accuracy_level": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "bits": {
          "type": "int",
          "value": [
            "4"
          ]
        },
        "block_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "K": {
          "type": "int",
          "value": [
            "3072"
          ]
        },
        "N": {
          "type": "int",
          "value": [
            "6144"
          ]
        },
        "default_shape": {
          "type": "int",
          "value": [
            "1"
          ]
        },
        "op_version": {
          "type": "str",
          "value": [
            "flat"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        }
      }
    },
    {
      "name": "layers.5.attn.v_proj",
      "type": "MladfMatMul",
      "in_args": [
        "/model/layers.5/input_layernorm/output_0.out4_9"
      ],
      "const_args": [
        "model.layers.5.attn.v_proj.MatMulNBits.qweight.preformat",
        "model.layers.5.attn.v_proj.MatMulNBits.bias.preformat",
        "model.layers.5.attn.v_proj.MatMulNBits.scales.preformat",
        "model.layers.5.attn.v_proj.MatMulNBits.qzeros.preformat"
      ],
      "out_args": [
        "present.5.value"
      ],
      "attrs": {
        "accuracy_level": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "bits": {
          "type": "int",
          "value": [
            "4"
          ]
        },
        "block_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "K": {
          "type": "int",
          "value": [
            "3072"
          ]
        },
        "N": {
          "type": "int",
          "value": [
            "3072"
          ]
        },
        "default_shape": {
          "type": "int",
          "value": [
            "1"
          ]
        },
        "op_version": {
          "type": "str",
          "value": [
            "flat"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "total_seq_len": {
          "type": "int",
          "value": [
            "4096"
          ]
        },
        "external_buffers": {
          "type": "int",
          "value": [
            "5",
            "0",
            "23",
            "11"
          ]
        },
        "update_tensor_offsets": {
          "type": "int",
          "value": [
            "5",
            "0",
            "0",
            "192"
          ]
        }
      }
    },
    {
      "name": "/model/layers.5/attn/GroupQueryAttention",
      "type": "FLATMHA",
      "in_args": [
        "/model/layers.5/attn/qk_proj/MatMulNBits/output_0.out5_4_15",
        "past_key_values.5.key",
        "past_key_values.5.value",
        "attention_mask_const_uint",
        "sin_cos_cache_token"
      ],
      "const_args": [],
      "out_args": [
        "/model/layers.5/attn/GroupQueryAttention/output_0.out2_5",
        "present.5.key"
      ],
      "attrs": {
        "num_heads": {
          "type": "int",
          "value": [
            "32"
          ]
        },
        "kv_num_heads": {
          "type": "int",
          "value": [
            "32"
          ]
        },
        "scale": {
          "type": "float",
          "value": [
            "0.10206207633018494"
          ]
        },
        "local_window_size": {
          "type": "int",
          "value": [
            "262144"
          ]
        },
        "softcap": {
          "type": "float",
          "value": [
            "0.0"
          ]
        },
        "do_rotary": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "rotary_interleaved": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "input_shape": {
          "type": "int",
          "value": [
            "32",
            "32",
            "1",
            "4096",
            "96"
          ]
        },
        "external_buffers": {
          "type": "int",
          "value": [
            "4",
            "1",
            "0",
            "0",
            "1",
            "0",
            "20",
            "10",
            "2",
            "0",
            "21",
            "11",
            "6",
            "0",
            "22",
            "10"
          ]
        },
        "update_tensor_offsets": {
          "type": "int",
          "value": [
            "4",
            "0",
            "0",
            "192",
            "6",
            "0",
            "0",
            "192"
          ]
        }
      }
    },
    {
      "name": "layers.5.attn.o_proj",
      "type": "MladfMatMul",
      "in_args": [
        "/model/layers.5/attn/GroupQueryAttention/output_0.out2_5"
      ],
      "const_args": [
        "model.layers.5.attn.o_proj.MatMulNBits.qweight.preformat",
        "model.layers.5.attn.o_proj.MatMulNBits.bias.preformat",
        "model.layers.5.attn.o_proj.MatMulNBits.scales.preformat",
        "model.layers.5.attn.o_proj.MatMulNBits.qzeros.preformat"
      ],
      "out_args": [
        "/model/layers.5/attn/o_proj/MatMulNBits/output_0.out5_4_16"
      ],
      "attrs": {
        "accuracy_level": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "bits": {
          "type": "int",
          "value": [
            "4"
          ]
        },
        "block_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "K": {
          "type": "int",
          "value": [
            "3072"
          ]
        },
        "N": {
          "type": "int",
          "value": [
            "3072"
          ]
        },
        "default_shape": {
          "type": "int",
          "value": [
            "1"
          ]
        },
        "op_version": {
          "type": "str",
          "value": [
            "flat"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        }
      }
    },
    {
      "name": "FlatRMSAdd_4_10",
      "type": "FlatRMSAdd",
      "in_args": [
        "/model/layers.5/input_layernorm/output_3.out4_9",
        "/model/layers.5/attn/o_proj/MatMulNBits/output_0.out5_4_16"
      ],
      "const_args": [
        "model.layers.5.post_attention_layernorm.weight.bf"
      ],
      "out_args": [
        "/model/layers.5/post_attention_layernorm/output_3.out4_10",
        "/model/layers.5/post_attention_layernorm/output_0.out4_10"
      ],
      "attrs": {
        "a_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "3072"
          ]
        },
        "in_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "bfloat16"
          ]
        },
        "out_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "bfloat16"
          ]
        },
        "c_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "3072"
          ]
        },
        "b_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "3072"
          ]
        },
        "is_gamma_ifm": {
          "type": "int",
          "value": [
            "1"
          ]
        }
      }
    },
    {
      "name": "FlatMLP_3_5",
      "type": "FlatMLP",
      "in_args": [
        "/model/layers.5/post_attention_layernorm/output_0.out4_10"
      ],
      "const_args": [
        "model.layers.5.mlp.gate_proj.MatMulNBits.qweight",
        "model.layers.5.mlp.gate_proj.MatMulNBits.scales.f",
        "model.layers.5.mlp.gate_proj.MatMulNBits.qzeros",
        "model.layers.5.mlp.gate_proj.MatMulNBits.bias.f",
        "model.layers.5.mlp.up_proj.MatMulNBits.qweight",
        "model.layers.5.mlp.up_proj.MatMulNBits.scales.f",
        "model.layers.5.mlp.up_proj.MatMulNBits.qzeros",
        "model.layers.5.mlp.up_proj.MatMulNBits.bias.f"
      ],
      "out_args": [
        "/model/layers.5/mlp/Mul/output_0.out3_5"
      ],
      "attrs": {
        "input_shape": {
          "type": "int",
          "value": [
            "1",
            "3072",
            "8192"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "in_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "uint8",
            "float",
            "uint8",
            "float",
            "uint8",
            "float",
            "uint8",
            "float"
          ]
        },
        "out_dtypes": {
          "type": "str",
          "value": [
            "bfloat16"
          ]
        }
      }
    },
    {
      "name": "layers.5.mlp.down_proj",
      "type": "MladfMatMul",
      "in_args": [
        "/model/layers.5/mlp/Mul/output_0.out3_5"
      ],
      "const_args": [
        "model.layers.5.mlp.down_proj.MatMulNBits.qweight.preformat",
        "model.layers.5.mlp.down_proj.MatMulNBits.bias.preformat",
        "model.layers.5.mlp.down_proj.MatMulNBits.scales.preformat",
        "model.layers.5.mlp.down_proj.MatMulNBits.qzeros.preformat"
      ],
      "out_args": [
        "/model/layers.5/mlp/down_proj/MatMulNBits/output_0.out5_4_17"
      ],
      "attrs": {
        "accuracy_level": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "bits": {
          "type": "int",
          "value": [
            "4"
          ]
        },
        "block_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "K": {
          "type": "int",
          "value": [
            "8192"
          ]
        },
        "N": {
          "type": "int",
          "value": [
            "3072"
          ]
        },
        "default_shape": {
          "type": "int",
          "value": [
            "1"
          ]
        },
        "op_version": {
          "type": "str",
          "value": [
            "flat"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        }
      }
    },
    {
      "name": "FlatRMSAdd_4_11",
      "type": "FlatRMSAdd",
      "in_args": [
        "/model/layers.5/post_attention_layernorm/output_3.out4_10",
        "/model/layers.5/mlp/down_proj/MatMulNBits/output_0.out5_4_17"
      ],
      "const_args": [
        "model.layers.6.input_layernorm.weight.bf"
      ],
      "out_args": [
        "/model/layers.6/input_layernorm/output_3.out4_11",
        "/model/layers.6/input_layernorm/output_0.out4_11"
      ],
      "attrs": {
        "a_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "3072"
          ]
        },
        "in_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "bfloat16"
          ]
        },
        "out_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "bfloat16"
          ]
        },
        "c_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "3072"
          ]
        },
        "b_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "3072"
          ]
        },
        "is_gamma_ifm": {
          "type": "int",
          "value": [
            "1"
          ]
        }
      }
    },
    {
      "name": "MatMulNBits_2_6",
      "type": "MladfMatMul",
      "in_args": [
        "/model/layers.6/input_layernorm/output_0.out4_11"
      ],
      "const_args": [
        "model.layers.6.attn.qk_proj.MatMulNBits.qweight.preformat",
        "model.layers.6.attn.qk_proj.MatMulNBits.bias.preformat",
        "model.layers.6.attn.qk_proj.MatMulNBits.scales.preformat",
        "model.layers.6.attn.qk_proj.MatMulNBits.qzeros.preformat"
      ],
      "out_args": [
        "/model/layers.6/attn/qk_proj/MatMulNBits/output_0.out5_4_18"
      ],
      "attrs": {
        "accuracy_level": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "bits": {
          "type": "int",
          "value": [
            "4"
          ]
        },
        "block_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "K": {
          "type": "int",
          "value": [
            "3072"
          ]
        },
        "N": {
          "type": "int",
          "value": [
            "6144"
          ]
        },
        "default_shape": {
          "type": "int",
          "value": [
            "1"
          ]
        },
        "op_version": {
          "type": "str",
          "value": [
            "flat"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        }
      }
    },
    {
      "name": "layers.6.attn.v_proj",
      "type": "MladfMatMul",
      "in_args": [
        "/model/layers.6/input_layernorm/output_0.out4_11"
      ],
      "const_args": [
        "model.layers.6.attn.v_proj.MatMulNBits.qweight.preformat",
        "model.layers.6.attn.v_proj.MatMulNBits.bias.preformat",
        "model.layers.6.attn.v_proj.MatMulNBits.scales.preformat",
        "model.layers.6.attn.v_proj.MatMulNBits.qzeros.preformat"
      ],
      "out_args": [
        "present.6.value"
      ],
      "attrs": {
        "accuracy_level": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "bits": {
          "type": "int",
          "value": [
            "4"
          ]
        },
        "block_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "K": {
          "type": "int",
          "value": [
            "3072"
          ]
        },
        "N": {
          "type": "int",
          "value": [
            "3072"
          ]
        },
        "default_shape": {
          "type": "int",
          "value": [
            "1"
          ]
        },
        "op_version": {
          "type": "str",
          "value": [
            "flat"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "total_seq_len": {
          "type": "int",
          "value": [
            "4096"
          ]
        },
        "external_buffers": {
          "type": "int",
          "value": [
            "5",
            "0",
            "27",
            "13"
          ]
        },
        "update_tensor_offsets": {
          "type": "int",
          "value": [
            "5",
            "0",
            "0",
            "192"
          ]
        }
      }
    },
    {
      "name": "/model/layers.6/attn/GroupQueryAttention",
      "type": "FLATMHA",
      "in_args": [
        "/model/layers.6/attn/qk_proj/MatMulNBits/output_0.out5_4_18",
        "past_key_values.6.key",
        "past_key_values.6.value",
        "attention_mask_const_uint",
        "sin_cos_cache_token"
      ],
      "const_args": [],
      "out_args": [
        "/model/layers.6/attn/GroupQueryAttention/output_0.out2_6",
        "present.6.key"
      ],
      "attrs": {
        "num_heads": {
          "type": "int",
          "value": [
            "32"
          ]
        },
        "kv_num_heads": {
          "type": "int",
          "value": [
            "32"
          ]
        },
        "scale": {
          "type": "float",
          "value": [
            "0.10206207633018494"
          ]
        },
        "local_window_size": {
          "type": "int",
          "value": [
            "262144"
          ]
        },
        "softcap": {
          "type": "float",
          "value": [
            "0.0"
          ]
        },
        "do_rotary": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "rotary_interleaved": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "input_shape": {
          "type": "int",
          "value": [
            "32",
            "32",
            "1",
            "4096",
            "96"
          ]
        },
        "external_buffers": {
          "type": "int",
          "value": [
            "4",
            "1",
            "0",
            "0",
            "1",
            "0",
            "24",
            "12",
            "2",
            "0",
            "25",
            "13",
            "6",
            "0",
            "26",
            "12"
          ]
        },
        "update_tensor_offsets": {
          "type": "int",
          "value": [
            "4",
            "0",
            "0",
            "192",
            "6",
            "0",
            "0",
            "192"
          ]
        }
      }
    },
    {
      "name": "layers.6.attn.o_proj",
      "type": "MladfMatMul",
      "in_args": [
        "/model/layers.6/attn/GroupQueryAttention/output_0.out2_6"
      ],
      "const_args": [
        "model.layers.6.attn.o_proj.MatMulNBits.qweight.preformat",
        "model.layers.6.attn.o_proj.MatMulNBits.bias.preformat",
        "model.layers.6.attn.o_proj.MatMulNBits.scales.preformat",
        "model.layers.6.attn.o_proj.MatMulNBits.qzeros.preformat"
      ],
      "out_args": [
        "/model/layers.6/attn/o_proj/MatMulNBits/output_0.out5_4_19"
      ],
      "attrs": {
        "accuracy_level": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "bits": {
          "type": "int",
          "value": [
            "4"
          ]
        },
        "block_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "K": {
          "type": "int",
          "value": [
            "3072"
          ]
        },
        "N": {
          "type": "int",
          "value": [
            "3072"
          ]
        },
        "default_shape": {
          "type": "int",
          "value": [
            "1"
          ]
        },
        "op_version": {
          "type": "str",
          "value": [
            "flat"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        }
      }
    },
    {
      "name": "FlatRMSAdd_4_12",
      "type": "FlatRMSAdd",
      "in_args": [
        "/model/layers.6/input_layernorm/output_3.out4_11",
        "/model/layers.6/attn/o_proj/MatMulNBits/output_0.out5_4_19"
      ],
      "const_args": [
        "model.layers.6.post_attention_layernorm.weight.bf"
      ],
      "out_args": [
        "/model/layers.6/post_attention_layernorm/output_3.out4_12",
        "/model/layers.6/post_attention_layernorm/output_0.out4_12"
      ],
      "attrs": {
        "a_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "3072"
          ]
        },
        "in_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "bfloat16"
          ]
        },
        "out_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "bfloat16"
          ]
        },
        "c_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "3072"
          ]
        },
        "b_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "3072"
          ]
        },
        "is_gamma_ifm": {
          "type": "int",
          "value": [
            "1"
          ]
        }
      }
    },
    {
      "name": "FlatMLP_3_6",
      "type": "FlatMLP",
      "in_args": [
        "/model/layers.6/post_attention_layernorm/output_0.out4_12"
      ],
      "const_args": [
        "model.layers.6.mlp.gate_proj.MatMulNBits.qweight",
        "model.layers.6.mlp.gate_proj.MatMulNBits.scales.f",
        "model.layers.6.mlp.gate_proj.MatMulNBits.qzeros",
        "model.layers.6.mlp.gate_proj.MatMulNBits.bias.f",
        "model.layers.6.mlp.up_proj.MatMulNBits.qweight",
        "model.layers.6.mlp.up_proj.MatMulNBits.scales.f",
        "model.layers.6.mlp.up_proj.MatMulNBits.qzeros",
        "model.layers.6.mlp.up_proj.MatMulNBits.bias.f"
      ],
      "out_args": [
        "/model/layers.6/mlp/Mul/output_0.out3_6"
      ],
      "attrs": {
        "input_shape": {
          "type": "int",
          "value": [
            "1",
            "3072",
            "8192"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "in_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "uint8",
            "float",
            "uint8",
            "float",
            "uint8",
            "float",
            "uint8",
            "float"
          ]
        },
        "out_dtypes": {
          "type": "str",
          "value": [
            "bfloat16"
          ]
        }
      }
    },
    {
      "name": "layers.6.mlp.down_proj",
      "type": "MladfMatMul",
      "in_args": [
        "/model/layers.6/mlp/Mul/output_0.out3_6"
      ],
      "const_args": [
        "model.layers.6.mlp.down_proj.MatMulNBits.qweight.preformat",
        "model.layers.6.mlp.down_proj.MatMulNBits.bias.preformat",
        "model.layers.6.mlp.down_proj.MatMulNBits.scales.preformat",
        "model.layers.6.mlp.down_proj.MatMulNBits.qzeros.preformat"
      ],
      "out_args": [
        "/model/layers.6/mlp/down_proj/MatMulNBits/output_0.out5_4_20"
      ],
      "attrs": {
        "accuracy_level": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "bits": {
          "type": "int",
          "value": [
            "4"
          ]
        },
        "block_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "K": {
          "type": "int",
          "value": [
            "8192"
          ]
        },
        "N": {
          "type": "int",
          "value": [
            "3072"
          ]
        },
        "default_shape": {
          "type": "int",
          "value": [
            "1"
          ]
        },
        "op_version": {
          "type": "str",
          "value": [
            "flat"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        }
      }
    },
    {
      "name": "FlatRMSAdd_4_13",
      "type": "FlatRMSAdd",
      "in_args": [
        "/model/layers.6/post_attention_layernorm/output_3.out4_12",
        "/model/layers.6/mlp/down_proj/MatMulNBits/output_0.out5_4_20"
      ],
      "const_args": [
        "model.layers.7.input_layernorm.weight.bf"
      ],
      "out_args": [
        "/model/layers.7/input_layernorm/output_3.out4_13",
        "/model/layers.7/input_layernorm/output_0.out4_13"
      ],
      "attrs": {
        "a_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "3072"
          ]
        },
        "in_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "bfloat16"
          ]
        },
        "out_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "bfloat16"
          ]
        },
        "c_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "3072"
          ]
        },
        "b_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "3072"
          ]
        },
        "is_gamma_ifm": {
          "type": "int",
          "value": [
            "1"
          ]
        }
      }
    },
    {
      "name": "MatMulNBits_2_7",
      "type": "MladfMatMul",
      "in_args": [
        "/model/layers.7/input_layernorm/output_0.out4_13"
      ],
      "const_args": [
        "model.layers.7.attn.qk_proj.MatMulNBits.qweight.preformat",
        "model.layers.7.attn.qk_proj.MatMulNBits.bias.preformat",
        "model.layers.7.attn.qk_proj.MatMulNBits.scales.preformat",
        "model.layers.7.attn.qk_proj.MatMulNBits.qzeros.preformat"
      ],
      "out_args": [
        "/model/layers.7/attn/qk_proj/MatMulNBits/output_0.out5_4_21"
      ],
      "attrs": {
        "accuracy_level": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "bits": {
          "type": "int",
          "value": [
            "4"
          ]
        },
        "block_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "K": {
          "type": "int",
          "value": [
            "3072"
          ]
        },
        "N": {
          "type": "int",
          "value": [
            "6144"
          ]
        },
        "default_shape": {
          "type": "int",
          "value": [
            "1"
          ]
        },
        "op_version": {
          "type": "str",
          "value": [
            "flat"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        }
      }
    },
    {
      "name": "layers.7.attn.v_proj",
      "type": "MladfMatMul",
      "in_args": [
        "/model/layers.7/input_layernorm/output_0.out4_13"
      ],
      "const_args": [
        "model.layers.7.attn.v_proj.MatMulNBits.qweight.preformat",
        "model.layers.7.attn.v_proj.MatMulNBits.bias.preformat",
        "model.layers.7.attn.v_proj.MatMulNBits.scales.preformat",
        "model.layers.7.attn.v_proj.MatMulNBits.qzeros.preformat"
      ],
      "out_args": [
        "present.7.value"
      ],
      "attrs": {
        "accuracy_level": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "bits": {
          "type": "int",
          "value": [
            "4"
          ]
        },
        "block_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "K": {
          "type": "int",
          "value": [
            "3072"
          ]
        },
        "N": {
          "type": "int",
          "value": [
            "3072"
          ]
        },
        "default_shape": {
          "type": "int",
          "value": [
            "1"
          ]
        },
        "op_version": {
          "type": "str",
          "value": [
            "flat"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "total_seq_len": {
          "type": "int",
          "value": [
            "4096"
          ]
        },
        "external_buffers": {
          "type": "int",
          "value": [
            "5",
            "0",
            "31",
            "15"
          ]
        },
        "update_tensor_offsets": {
          "type": "int",
          "value": [
            "5",
            "0",
            "0",
            "192"
          ]
        }
      }
    },
    {
      "name": "/model/layers.7/attn/GroupQueryAttention",
      "type": "FLATMHA",
      "in_args": [
        "/model/layers.7/attn/qk_proj/MatMulNBits/output_0.out5_4_21",
        "past_key_values.7.key",
        "past_key_values.7.value",
        "attention_mask_const_uint",
        "sin_cos_cache_token"
      ],
      "const_args": [],
      "out_args": [
        "/model/layers.7/attn/GroupQueryAttention/output_0.out2_7",
        "present.7.key"
      ],
      "attrs": {
        "num_heads": {
          "type": "int",
          "value": [
            "32"
          ]
        },
        "kv_num_heads": {
          "type": "int",
          "value": [
            "32"
          ]
        },
        "scale": {
          "type": "float",
          "value": [
            "0.10206207633018494"
          ]
        },
        "local_window_size": {
          "type": "int",
          "value": [
            "262144"
          ]
        },
        "softcap": {
          "type": "float",
          "value": [
            "0.0"
          ]
        },
        "do_rotary": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "rotary_interleaved": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "input_shape": {
          "type": "int",
          "value": [
            "32",
            "32",
            "1",
            "4096",
            "96"
          ]
        },
        "external_buffers": {
          "type": "int",
          "value": [
            "4",
            "1",
            "0",
            "0",
            "1",
            "0",
            "28",
            "14",
            "2",
            "0",
            "29",
            "15",
            "6",
            "0",
            "30",
            "14"
          ]
        },
        "update_tensor_offsets": {
          "type": "int",
          "value": [
            "4",
            "0",
            "0",
            "192",
            "6",
            "0",
            "0",
            "192"
          ]
        }
      }
    },
    {
      "name": "layers.7.attn.o_proj",
      "type": "MladfMatMul",
      "in_args": [
        "/model/layers.7/attn/GroupQueryAttention/output_0.out2_7"
      ],
      "const_args": [
        "model.layers.7.attn.o_proj.MatMulNBits.qweight.preformat",
        "model.layers.7.attn.o_proj.MatMulNBits.bias.preformat",
        "model.layers.7.attn.o_proj.MatMulNBits.scales.preformat",
        "model.layers.7.attn.o_proj.MatMulNBits.qzeros.preformat"
      ],
      "out_args": [
        "/model/layers.7/attn/o_proj/MatMulNBits/output_0.out5_4_22"
      ],
      "attrs": {
        "accuracy_level": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "bits": {
          "type": "int",
          "value": [
            "4"
          ]
        },
        "block_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "K": {
          "type": "int",
          "value": [
            "3072"
          ]
        },
        "N": {
          "type": "int",
          "value": [
            "3072"
          ]
        },
        "default_shape": {
          "type": "int",
          "value": [
            "1"
          ]
        },
        "op_version": {
          "type": "str",
          "value": [
            "flat"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        }
      }
    },
    {
      "name": "FlatRMSAdd_4_14",
      "type": "FlatRMSAdd",
      "in_args": [
        "/model/layers.7/input_layernorm/output_3.out4_13",
        "/model/layers.7/attn/o_proj/MatMulNBits/output_0.out5_4_22"
      ],
      "const_args": [
        "model.layers.7.post_attention_layernorm.weight.bf"
      ],
      "out_args": [
        "/model/layers.7/post_attention_layernorm/output_3.out4_14",
        "/model/layers.7/post_attention_layernorm/output_0.out4_14"
      ],
      "attrs": {
        "a_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "3072"
          ]
        },
        "in_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "bfloat16"
          ]
        },
        "out_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "bfloat16"
          ]
        },
        "c_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "3072"
          ]
        },
        "b_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "3072"
          ]
        },
        "is_gamma_ifm": {
          "type": "int",
          "value": [
            "1"
          ]
        }
      }
    },
    {
      "name": "FlatMLP_3_7",
      "type": "FlatMLP",
      "in_args": [
        "/model/layers.7/post_attention_layernorm/output_0.out4_14"
      ],
      "const_args": [
        "model.layers.7.mlp.gate_proj.MatMulNBits.qweight",
        "model.layers.7.mlp.gate_proj.MatMulNBits.scales.f",
        "model.layers.7.mlp.gate_proj.MatMulNBits.qzeros",
        "model.layers.7.mlp.gate_proj.MatMulNBits.bias.f",
        "model.layers.7.mlp.up_proj.MatMulNBits.qweight",
        "model.layers.7.mlp.up_proj.MatMulNBits.scales.f",
        "model.layers.7.mlp.up_proj.MatMulNBits.qzeros",
        "model.layers.7.mlp.up_proj.MatMulNBits.bias.f"
      ],
      "out_args": [
        "/model/layers.7/mlp/Mul/output_0.out3_7"
      ],
      "attrs": {
        "input_shape": {
          "type": "int",
          "value": [
            "1",
            "3072",
            "8192"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "in_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "uint8",
            "float",
            "uint8",
            "float",
            "uint8",
            "float",
            "uint8",
            "float"
          ]
        },
        "out_dtypes": {
          "type": "str",
          "value": [
            "bfloat16"
          ]
        }
      }
    },
    {
      "name": "layers.7.mlp.down_proj",
      "type": "MladfMatMul",
      "in_args": [
        "/model/layers.7/mlp/Mul/output_0.out3_7"
      ],
      "const_args": [
        "model.layers.7.mlp.down_proj.MatMulNBits.qweight.preformat",
        "model.layers.7.mlp.down_proj.MatMulNBits.bias.preformat",
        "model.layers.7.mlp.down_proj.MatMulNBits.scales.preformat",
        "model.layers.7.mlp.down_proj.MatMulNBits.qzeros.preformat"
      ],
      "out_args": [
        "/model/layers.7/mlp/down_proj/MatMulNBits/output_0.out5_4_23"
      ],
      "attrs": {
        "accuracy_level": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "bits": {
          "type": "int",
          "value": [
            "4"
          ]
        },
        "block_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "K": {
          "type": "int",
          "value": [
            "8192"
          ]
        },
        "N": {
          "type": "int",
          "value": [
            "3072"
          ]
        },
        "default_shape": {
          "type": "int",
          "value": [
            "1"
          ]
        },
        "op_version": {
          "type": "str",
          "value": [
            "flat"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        }
      }
    },
    {
      "name": "FlatRMSAdd_4_15",
      "type": "FlatRMSAdd",
      "in_args": [
        "/model/layers.7/post_attention_layernorm/output_3.out4_14",
        "/model/layers.7/mlp/down_proj/MatMulNBits/output_0.out5_4_23"
      ],
      "const_args": [
        "model.layers.8.input_layernorm.weight.bf"
      ],
      "out_args": [
        "/model/layers.8/input_layernorm/output_3.out4_15",
        "/model/layers.8/input_layernorm/output_0.out4_15"
      ],
      "attrs": {
        "a_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "3072"
          ]
        },
        "in_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "bfloat16"
          ]
        },
        "out_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "bfloat16"
          ]
        },
        "c_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "3072"
          ]
        },
        "b_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "3072"
          ]
        },
        "is_gamma_ifm": {
          "type": "int",
          "value": [
            "1"
          ]
        }
      }
    },
    {
      "name": "MatMulNBits_2_8",
      "type": "MladfMatMul",
      "in_args": [
        "/model/layers.8/input_layernorm/output_0.out4_15"
      ],
      "const_args": [
        "model.layers.8.attn.qk_proj.MatMulNBits.qweight.preformat",
        "model.layers.8.attn.qk_proj.MatMulNBits.bias.preformat",
        "model.layers.8.attn.qk_proj.MatMulNBits.scales.preformat",
        "model.layers.8.attn.qk_proj.MatMulNBits.qzeros.preformat"
      ],
      "out_args": [
        "/model/layers.8/attn/qk_proj/MatMulNBits/output_0.out5_4_24"
      ],
      "attrs": {
        "accuracy_level": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "bits": {
          "type": "int",
          "value": [
            "4"
          ]
        },
        "block_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "K": {
          "type": "int",
          "value": [
            "3072"
          ]
        },
        "N": {
          "type": "int",
          "value": [
            "6144"
          ]
        },
        "default_shape": {
          "type": "int",
          "value": [
            "1"
          ]
        },
        "op_version": {
          "type": "str",
          "value": [
            "flat"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        }
      }
    },
    {
      "name": "layers.8.attn.v_proj",
      "type": "MladfMatMul",
      "in_args": [
        "/model/layers.8/input_layernorm/output_0.out4_15"
      ],
      "const_args": [
        "model.layers.8.attn.v_proj.MatMulNBits.qweight.preformat",
        "model.layers.8.attn.v_proj.MatMulNBits.bias.preformat",
        "model.layers.8.attn.v_proj.MatMulNBits.scales.preformat",
        "model.layers.8.attn.v_proj.MatMulNBits.qzeros.preformat"
      ],
      "out_args": [
        "present.8.value"
      ],
      "attrs": {
        "accuracy_level": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "bits": {
          "type": "int",
          "value": [
            "4"
          ]
        },
        "block_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "K": {
          "type": "int",
          "value": [
            "3072"
          ]
        },
        "N": {
          "type": "int",
          "value": [
            "3072"
          ]
        },
        "default_shape": {
          "type": "int",
          "value": [
            "1"
          ]
        },
        "op_version": {
          "type": "str",
          "value": [
            "flat"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "total_seq_len": {
          "type": "int",
          "value": [
            "4096"
          ]
        },
        "external_buffers": {
          "type": "int",
          "value": [
            "5",
            "0",
            "35",
            "17"
          ]
        },
        "update_tensor_offsets": {
          "type": "int",
          "value": [
            "5",
            "0",
            "0",
            "192"
          ]
        }
      }
    },
    {
      "name": "/model/layers.8/attn/GroupQueryAttention",
      "type": "FLATMHA",
      "in_args": [
        "/model/layers.8/attn/qk_proj/MatMulNBits/output_0.out5_4_24",
        "past_key_values.8.key",
        "past_key_values.8.value",
        "attention_mask_const_uint",
        "sin_cos_cache_token"
      ],
      "const_args": [],
      "out_args": [
        "/model/layers.8/attn/GroupQueryAttention/output_0.out2_8",
        "present.8.key"
      ],
      "attrs": {
        "num_heads": {
          "type": "int",
          "value": [
            "32"
          ]
        },
        "kv_num_heads": {
          "type": "int",
          "value": [
            "32"
          ]
        },
        "scale": {
          "type": "float",
          "value": [
            "0.10206207633018494"
          ]
        },
        "local_window_size": {
          "type": "int",
          "value": [
            "262144"
          ]
        },
        "softcap": {
          "type": "float",
          "value": [
            "0.0"
          ]
        },
        "do_rotary": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "rotary_interleaved": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "input_shape": {
          "type": "int",
          "value": [
            "32",
            "32",
            "1",
            "4096",
            "96"
          ]
        },
        "external_buffers": {
          "type": "int",
          "value": [
            "4",
            "1",
            "0",
            "0",
            "1",
            "0",
            "32",
            "16",
            "2",
            "0",
            "33",
            "17",
            "6",
            "0",
            "34",
            "16"
          ]
        },
        "update_tensor_offsets": {
          "type": "int",
          "value": [
            "4",
            "0",
            "0",
            "192",
            "6",
            "0",
            "0",
            "192"
          ]
        }
      }
    },
    {
      "name": "layers.8.attn.o_proj",
      "type": "MladfMatMul",
      "in_args": [
        "/model/layers.8/attn/GroupQueryAttention/output_0.out2_8"
      ],
      "const_args": [
        "model.layers.8.attn.o_proj.MatMulNBits.qweight.preformat",
        "model.layers.8.attn.o_proj.MatMulNBits.bias.preformat",
        "model.layers.8.attn.o_proj.MatMulNBits.scales.preformat",
        "model.layers.8.attn.o_proj.MatMulNBits.qzeros.preformat"
      ],
      "out_args": [
        "/model/layers.8/attn/o_proj/MatMulNBits/output_0.out5_4_25"
      ],
      "attrs": {
        "accuracy_level": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "bits": {
          "type": "int",
          "value": [
            "4"
          ]
        },
        "block_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "K": {
          "type": "int",
          "value": [
            "3072"
          ]
        },
        "N": {
          "type": "int",
          "value": [
            "3072"
          ]
        },
        "default_shape": {
          "type": "int",
          "value": [
            "1"
          ]
        },
        "op_version": {
          "type": "str",
          "value": [
            "flat"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        }
      }
    },
    {
      "name": "FlatRMSAdd_4_16",
      "type": "FlatRMSAdd",
      "in_args": [
        "/model/layers.8/input_layernorm/output_3.out4_15",
        "/model/layers.8/attn/o_proj/MatMulNBits/output_0.out5_4_25"
      ],
      "const_args": [
        "model.layers.8.post_attention_layernorm.weight.bf"
      ],
      "out_args": [
        "/model/layers.8/post_attention_layernorm/output_3.out4_16",
        "/model/layers.8/post_attention_layernorm/output_0.out4_16"
      ],
      "attrs": {
        "a_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "3072"
          ]
        },
        "in_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "bfloat16"
          ]
        },
        "out_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "bfloat16"
          ]
        },
        "c_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "3072"
          ]
        },
        "b_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "3072"
          ]
        },
        "is_gamma_ifm": {
          "type": "int",
          "value": [
            "1"
          ]
        }
      }
    },
    {
      "name": "FlatMLP_3_8",
      "type": "FlatMLP",
      "in_args": [
        "/model/layers.8/post_attention_layernorm/output_0.out4_16"
      ],
      "const_args": [
        "model.layers.8.mlp.gate_proj.MatMulNBits.qweight",
        "model.layers.8.mlp.gate_proj.MatMulNBits.scales.f",
        "model.layers.8.mlp.gate_proj.MatMulNBits.qzeros",
        "model.layers.8.mlp.gate_proj.MatMulNBits.bias.f",
        "model.layers.8.mlp.up_proj.MatMulNBits.qweight",
        "model.layers.8.mlp.up_proj.MatMulNBits.scales.f",
        "model.layers.8.mlp.up_proj.MatMulNBits.qzeros",
        "model.layers.8.mlp.up_proj.MatMulNBits.bias.f"
      ],
      "out_args": [
        "/model/layers.8/mlp/Mul/output_0.out3_8"
      ],
      "attrs": {
        "input_shape": {
          "type": "int",
          "value": [
            "1",
            "3072",
            "8192"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "in_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "uint8",
            "float",
            "uint8",
            "float",
            "uint8",
            "float",
            "uint8",
            "float"
          ]
        },
        "out_dtypes": {
          "type": "str",
          "value": [
            "bfloat16"
          ]
        }
      }
    },
    {
      "name": "layers.8.mlp.down_proj",
      "type": "MladfMatMul",
      "in_args": [
        "/model/layers.8/mlp/Mul/output_0.out3_8"
      ],
      "const_args": [
        "model.layers.8.mlp.down_proj.MatMulNBits.qweight.preformat",
        "model.layers.8.mlp.down_proj.MatMulNBits.bias.preformat",
        "model.layers.8.mlp.down_proj.MatMulNBits.scales.preformat",
        "model.layers.8.mlp.down_proj.MatMulNBits.qzeros.preformat"
      ],
      "out_args": [
        "/model/layers.8/mlp/down_proj/MatMulNBits/output_0.out5_4_26"
      ],
      "attrs": {
        "accuracy_level": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "bits": {
          "type": "int",
          "value": [
            "4"
          ]
        },
        "block_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "K": {
          "type": "int",
          "value": [
            "8192"
          ]
        },
        "N": {
          "type": "int",
          "value": [
            "3072"
          ]
        },
        "default_shape": {
          "type": "int",
          "value": [
            "1"
          ]
        },
        "op_version": {
          "type": "str",
          "value": [
            "flat"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        }
      }
    },
    {
      "name": "FlatRMSAdd_4_17",
      "type": "FlatRMSAdd",
      "in_args": [
        "/model/layers.8/post_attention_layernorm/output_3.out4_16",
        "/model/layers.8/mlp/down_proj/MatMulNBits/output_0.out5_4_26"
      ],
      "const_args": [
        "model.layers.9.input_layernorm.weight.bf"
      ],
      "out_args": [
        "/model/layers.9/input_layernorm/output_3.out4_17",
        "/model/layers.9/input_layernorm/output_0.out4_17"
      ],
      "attrs": {
        "a_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "3072"
          ]
        },
        "in_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "bfloat16"
          ]
        },
        "out_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "bfloat16"
          ]
        },
        "c_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "3072"
          ]
        },
        "b_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "3072"
          ]
        },
        "is_gamma_ifm": {
          "type": "int",
          "value": [
            "1"
          ]
        }
      }
    },
    {
      "name": "MatMulNBits_2_9",
      "type": "MladfMatMul",
      "in_args": [
        "/model/layers.9/input_layernorm/output_0.out4_17"
      ],
      "const_args": [
        "model.layers.9.attn.qk_proj.MatMulNBits.qweight.preformat",
        "model.layers.9.attn.qk_proj.MatMulNBits.bias.preformat",
        "model.layers.9.attn.qk_proj.MatMulNBits.scales.preformat",
        "model.layers.9.attn.qk_proj.MatMulNBits.qzeros.preformat"
      ],
      "out_args": [
        "/model/layers.9/attn/qk_proj/MatMulNBits/output_0.out5_4_27"
      ],
      "attrs": {
        "accuracy_level": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "bits": {
          "type": "int",
          "value": [
            "4"
          ]
        },
        "block_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "K": {
          "type": "int",
          "value": [
            "3072"
          ]
        },
        "N": {
          "type": "int",
          "value": [
            "6144"
          ]
        },
        "default_shape": {
          "type": "int",
          "value": [
            "1"
          ]
        },
        "op_version": {
          "type": "str",
          "value": [
            "flat"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        }
      }
    },
    {
      "name": "layers.9.attn.v_proj",
      "type": "MladfMatMul",
      "in_args": [
        "/model/layers.9/input_layernorm/output_0.out4_17"
      ],
      "const_args": [
        "model.layers.9.attn.v_proj.MatMulNBits.qweight.preformat",
        "model.layers.9.attn.v_proj.MatMulNBits.bias.preformat",
        "model.layers.9.attn.v_proj.MatMulNBits.scales.preformat",
        "model.layers.9.attn.v_proj.MatMulNBits.qzeros.preformat"
      ],
      "out_args": [
        "present.9.value"
      ],
      "attrs": {
        "accuracy_level": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "bits": {
          "type": "int",
          "value": [
            "4"
          ]
        },
        "block_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "K": {
          "type": "int",
          "value": [
            "3072"
          ]
        },
        "N": {
          "type": "int",
          "value": [
            "3072"
          ]
        },
        "default_shape": {
          "type": "int",
          "value": [
            "1"
          ]
        },
        "op_version": {
          "type": "str",
          "value": [
            "flat"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "total_seq_len": {
          "type": "int",
          "value": [
            "4096"
          ]
        },
        "external_buffers": {
          "type": "int",
          "value": [
            "5",
            "0",
            "39",
            "19"
          ]
        },
        "update_tensor_offsets": {
          "type": "int",
          "value": [
            "5",
            "0",
            "0",
            "192"
          ]
        }
      }
    },
    {
      "name": "/model/layers.9/attn/GroupQueryAttention",
      "type": "FLATMHA",
      "in_args": [
        "/model/layers.9/attn/qk_proj/MatMulNBits/output_0.out5_4_27",
        "past_key_values.9.key",
        "past_key_values.9.value",
        "attention_mask_const_uint",
        "sin_cos_cache_token"
      ],
      "const_args": [],
      "out_args": [
        "/model/layers.9/attn/GroupQueryAttention/output_0.out2_9",
        "present.9.key"
      ],
      "attrs": {
        "num_heads": {
          "type": "int",
          "value": [
            "32"
          ]
        },
        "kv_num_heads": {
          "type": "int",
          "value": [
            "32"
          ]
        },
        "scale": {
          "type": "float",
          "value": [
            "0.10206207633018494"
          ]
        },
        "local_window_size": {
          "type": "int",
          "value": [
            "262144"
          ]
        },
        "softcap": {
          "type": "float",
          "value": [
            "0.0"
          ]
        },
        "do_rotary": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "rotary_interleaved": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "input_shape": {
          "type": "int",
          "value": [
            "32",
            "32",
            "1",
            "4096",
            "96"
          ]
        },
        "external_buffers": {
          "type": "int",
          "value": [
            "4",
            "1",
            "0",
            "0",
            "1",
            "0",
            "36",
            "18",
            "2",
            "0",
            "37",
            "19",
            "6",
            "0",
            "38",
            "18"
          ]
        },
        "update_tensor_offsets": {
          "type": "int",
          "value": [
            "4",
            "0",
            "0",
            "192",
            "6",
            "0",
            "0",
            "192"
          ]
        }
      }
    },
    {
      "name": "layers.9.attn.o_proj",
      "type": "MladfMatMul",
      "in_args": [
        "/model/layers.9/attn/GroupQueryAttention/output_0.out2_9"
      ],
      "const_args": [
        "model.layers.9.attn.o_proj.MatMulNBits.qweight.preformat",
        "model.layers.9.attn.o_proj.MatMulNBits.bias.preformat",
        "model.layers.9.attn.o_proj.MatMulNBits.scales.preformat",
        "model.layers.9.attn.o_proj.MatMulNBits.qzeros.preformat"
      ],
      "out_args": [
        "/model/layers.9/attn/o_proj/MatMulNBits/output_0.out5_4_28"
      ],
      "attrs": {
        "accuracy_level": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "bits": {
          "type": "int",
          "value": [
            "4"
          ]
        },
        "block_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "K": {
          "type": "int",
          "value": [
            "3072"
          ]
        },
        "N": {
          "type": "int",
          "value": [
            "3072"
          ]
        },
        "default_shape": {
          "type": "int",
          "value": [
            "1"
          ]
        },
        "op_version": {
          "type": "str",
          "value": [
            "flat"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        }
      }
    },
    {
      "name": "FlatRMSAdd_4_18",
      "type": "FlatRMSAdd",
      "in_args": [
        "/model/layers.9/input_layernorm/output_3.out4_17",
        "/model/layers.9/attn/o_proj/MatMulNBits/output_0.out5_4_28"
      ],
      "const_args": [
        "model.layers.9.post_attention_layernorm.weight.bf"
      ],
      "out_args": [
        "/model/layers.9/post_attention_layernorm/output_3.out4_18",
        "/model/layers.9/post_attention_layernorm/output_0.out4_18"
      ],
      "attrs": {
        "a_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "3072"
          ]
        },
        "in_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "bfloat16"
          ]
        },
        "out_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "bfloat16"
          ]
        },
        "c_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "3072"
          ]
        },
        "b_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "3072"
          ]
        },
        "is_gamma_ifm": {
          "type": "int",
          "value": [
            "1"
          ]
        }
      }
    },
    {
      "name": "FlatMLP_3_9",
      "type": "FlatMLP",
      "in_args": [
        "/model/layers.9/post_attention_layernorm/output_0.out4_18"
      ],
      "const_args": [
        "model.layers.9.mlp.gate_proj.MatMulNBits.qweight",
        "model.layers.9.mlp.gate_proj.MatMulNBits.scales.f",
        "model.layers.9.mlp.gate_proj.MatMulNBits.qzeros",
        "model.layers.9.mlp.gate_proj.MatMulNBits.bias.f",
        "model.layers.9.mlp.up_proj.MatMulNBits.qweight",
        "model.layers.9.mlp.up_proj.MatMulNBits.scales.f",
        "model.layers.9.mlp.up_proj.MatMulNBits.qzeros",
        "model.layers.9.mlp.up_proj.MatMulNBits.bias.f"
      ],
      "out_args": [
        "/model/layers.9/mlp/Mul/output_0.out3_9"
      ],
      "attrs": {
        "input_shape": {
          "type": "int",
          "value": [
            "1",
            "3072",
            "8192"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "in_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "uint8",
            "float",
            "uint8",
            "float",
            "uint8",
            "float",
            "uint8",
            "float"
          ]
        },
        "out_dtypes": {
          "type": "str",
          "value": [
            "bfloat16"
          ]
        }
      }
    },
    {
      "name": "layers.9.mlp.down_proj",
      "type": "MladfMatMul",
      "in_args": [
        "/model/layers.9/mlp/Mul/output_0.out3_9"
      ],
      "const_args": [
        "model.layers.9.mlp.down_proj.MatMulNBits.qweight.preformat",
        "model.layers.9.mlp.down_proj.MatMulNBits.bias.preformat",
        "model.layers.9.mlp.down_proj.MatMulNBits.scales.preformat",
        "model.layers.9.mlp.down_proj.MatMulNBits.qzeros.preformat"
      ],
      "out_args": [
        "/model/layers.9/mlp/down_proj/MatMulNBits/output_0.out5_4_29"
      ],
      "attrs": {
        "accuracy_level": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "bits": {
          "type": "int",
          "value": [
            "4"
          ]
        },
        "block_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "K": {
          "type": "int",
          "value": [
            "8192"
          ]
        },
        "N": {
          "type": "int",
          "value": [
            "3072"
          ]
        },
        "default_shape": {
          "type": "int",
          "value": [
            "1"
          ]
        },
        "op_version": {
          "type": "str",
          "value": [
            "flat"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        }
      }
    },
    {
      "name": "FlatRMSAdd_4_19",
      "type": "FlatRMSAdd",
      "in_args": [
        "/model/layers.9/post_attention_layernorm/output_3.out4_18",
        "/model/layers.9/mlp/down_proj/MatMulNBits/output_0.out5_4_29"
      ],
      "const_args": [
        "model.layers.10.input_layernorm.weight.bf"
      ],
      "out_args": [
        "/model/layers.10/input_layernorm/output_3.out4_19",
        "/model/layers.10/input_layernorm/output_0.out4_19"
      ],
      "attrs": {
        "a_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "3072"
          ]
        },
        "in_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "bfloat16"
          ]
        },
        "out_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "bfloat16"
          ]
        },
        "c_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "3072"
          ]
        },
        "b_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "3072"
          ]
        },
        "is_gamma_ifm": {
          "type": "int",
          "value": [
            "1"
          ]
        }
      }
    },
    {
      "name": "MatMulNBits_2_10",
      "type": "MladfMatMul",
      "in_args": [
        "/model/layers.10/input_layernorm/output_0.out4_19"
      ],
      "const_args": [
        "model.layers.10.attn.qk_proj.MatMulNBits.qweight.preformat",
        "model.layers.10.attn.qk_proj.MatMulNBits.bias.preformat",
        "model.layers.10.attn.qk_proj.MatMulNBits.scales.preformat",
        "model.layers.10.attn.qk_proj.MatMulNBits.qzeros.preformat"
      ],
      "out_args": [
        "/model/layers.10/attn/qk_proj/MatMulNBits/output_0.out5_4_30"
      ],
      "attrs": {
        "accuracy_level": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "bits": {
          "type": "int",
          "value": [
            "4"
          ]
        },
        "block_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "K": {
          "type": "int",
          "value": [
            "3072"
          ]
        },
        "N": {
          "type": "int",
          "value": [
            "6144"
          ]
        },
        "default_shape": {
          "type": "int",
          "value": [
            "1"
          ]
        },
        "op_version": {
          "type": "str",
          "value": [
            "flat"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        }
      }
    },
    {
      "name": "layers.10.attn.v_proj",
      "type": "MladfMatMul",
      "in_args": [
        "/model/layers.10/input_layernorm/output_0.out4_19"
      ],
      "const_args": [
        "model.layers.10.attn.v_proj.MatMulNBits.qweight.preformat",
        "model.layers.10.attn.v_proj.MatMulNBits.bias.preformat",
        "model.layers.10.attn.v_proj.MatMulNBits.scales.preformat",
        "model.layers.10.attn.v_proj.MatMulNBits.qzeros.preformat"
      ],
      "out_args": [
        "present.10.value"
      ],
      "attrs": {
        "accuracy_level": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "bits": {
          "type": "int",
          "value": [
            "4"
          ]
        },
        "block_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "K": {
          "type": "int",
          "value": [
            "3072"
          ]
        },
        "N": {
          "type": "int",
          "value": [
            "3072"
          ]
        },
        "default_shape": {
          "type": "int",
          "value": [
            "1"
          ]
        },
        "op_version": {
          "type": "str",
          "value": [
            "flat"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "total_seq_len": {
          "type": "int",
          "value": [
            "4096"
          ]
        },
        "external_buffers": {
          "type": "int",
          "value": [
            "5",
            "0",
            "43",
            "21"
          ]
        },
        "update_tensor_offsets": {
          "type": "int",
          "value": [
            "5",
            "0",
            "0",
            "192"
          ]
        }
      }
    },
    {
      "name": "/model/layers.10/attn/GroupQueryAttention",
      "type": "FLATMHA",
      "in_args": [
        "/model/layers.10/attn/qk_proj/MatMulNBits/output_0.out5_4_30",
        "past_key_values.10.key",
        "past_key_values.10.value",
        "attention_mask_const_uint",
        "sin_cos_cache_token"
      ],
      "const_args": [],
      "out_args": [
        "/model/layers.10/attn/GroupQueryAttention/output_0.out2_10",
        "present.10.key"
      ],
      "attrs": {
        "num_heads": {
          "type": "int",
          "value": [
            "32"
          ]
        },
        "kv_num_heads": {
          "type": "int",
          "value": [
            "32"
          ]
        },
        "scale": {
          "type": "float",
          "value": [
            "0.10206207633018494"
          ]
        },
        "local_window_size": {
          "type": "int",
          "value": [
            "262144"
          ]
        },
        "softcap": {
          "type": "float",
          "value": [
            "0.0"
          ]
        },
        "do_rotary": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "rotary_interleaved": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "input_shape": {
          "type": "int",
          "value": [
            "32",
            "32",
            "1",
            "4096",
            "96"
          ]
        },
        "external_buffers": {
          "type": "int",
          "value": [
            "4",
            "1",
            "0",
            "0",
            "1",
            "0",
            "40",
            "20",
            "2",
            "0",
            "41",
            "21",
            "6",
            "0",
            "42",
            "20"
          ]
        },
        "update_tensor_offsets": {
          "type": "int",
          "value": [
            "4",
            "0",
            "0",
            "192",
            "6",
            "0",
            "0",
            "192"
          ]
        }
      }
    },
    {
      "name": "layers.10.attn.o_proj",
      "type": "MladfMatMul",
      "in_args": [
        "/model/layers.10/attn/GroupQueryAttention/output_0.out2_10"
      ],
      "const_args": [
        "model.layers.10.attn.o_proj.MatMulNBits.qweight.preformat",
        "model.layers.10.attn.o_proj.MatMulNBits.bias.preformat",
        "model.layers.10.attn.o_proj.MatMulNBits.scales.preformat",
        "model.layers.10.attn.o_proj.MatMulNBits.qzeros.preformat"
      ],
      "out_args": [
        "/model/layers.10/attn/o_proj/MatMulNBits/output_0.out5_4_31"
      ],
      "attrs": {
        "accuracy_level": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "bits": {
          "type": "int",
          "value": [
            "4"
          ]
        },
        "block_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "K": {
          "type": "int",
          "value": [
            "3072"
          ]
        },
        "N": {
          "type": "int",
          "value": [
            "3072"
          ]
        },
        "default_shape": {
          "type": "int",
          "value": [
            "1"
          ]
        },
        "op_version": {
          "type": "str",
          "value": [
            "flat"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        }
      }
    },
    {
      "name": "FlatRMSAdd_4_20",
      "type": "FlatRMSAdd",
      "in_args": [
        "/model/layers.10/input_layernorm/output_3.out4_19",
        "/model/layers.10/attn/o_proj/MatMulNBits/output_0.out5_4_31"
      ],
      "const_args": [
        "model.layers.10.post_attention_layernorm.weight.bf"
      ],
      "out_args": [
        "/model/layers.10/post_attention_layernorm/output_3.out4_20",
        "/model/layers.10/post_attention_layernorm/output_0.out4_20"
      ],
      "attrs": {
        "a_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "3072"
          ]
        },
        "in_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "bfloat16"
          ]
        },
        "out_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "bfloat16"
          ]
        },
        "c_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "3072"
          ]
        },
        "b_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "3072"
          ]
        },
        "is_gamma_ifm": {
          "type": "int",
          "value": [
            "1"
          ]
        }
      }
    },
    {
      "name": "FlatMLP_3_10",
      "type": "FlatMLP",
      "in_args": [
        "/model/layers.10/post_attention_layernorm/output_0.out4_20"
      ],
      "const_args": [
        "model.layers.10.mlp.gate_proj.MatMulNBits.qweight",
        "model.layers.10.mlp.gate_proj.MatMulNBits.scales.f",
        "model.layers.10.mlp.gate_proj.MatMulNBits.qzeros",
        "model.layers.10.mlp.gate_proj.MatMulNBits.bias.f",
        "model.layers.10.mlp.up_proj.MatMulNBits.qweight",
        "model.layers.10.mlp.up_proj.MatMulNBits.scales.f",
        "model.layers.10.mlp.up_proj.MatMulNBits.qzeros",
        "model.layers.10.mlp.up_proj.MatMulNBits.bias.f"
      ],
      "out_args": [
        "/model/layers.10/mlp/Mul/output_0.out3_10"
      ],
      "attrs": {
        "input_shape": {
          "type": "int",
          "value": [
            "1",
            "3072",
            "8192"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "in_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "uint8",
            "float",
            "uint8",
            "float",
            "uint8",
            "float",
            "uint8",
            "float"
          ]
        },
        "out_dtypes": {
          "type": "str",
          "value": [
            "bfloat16"
          ]
        }
      }
    },
    {
      "name": "layers.10.mlp.down_proj",
      "type": "MladfMatMul",
      "in_args": [
        "/model/layers.10/mlp/Mul/output_0.out3_10"
      ],
      "const_args": [
        "model.layers.10.mlp.down_proj.MatMulNBits.qweight.preformat",
        "model.layers.10.mlp.down_proj.MatMulNBits.bias.preformat",
        "model.layers.10.mlp.down_proj.MatMulNBits.scales.preformat",
        "model.layers.10.mlp.down_proj.MatMulNBits.qzeros.preformat"
      ],
      "out_args": [
        "/model/layers.10/mlp/down_proj/MatMulNBits/output_0.out5_4_32"
      ],
      "attrs": {
        "accuracy_level": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "bits": {
          "type": "int",
          "value": [
            "4"
          ]
        },
        "block_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "K": {
          "type": "int",
          "value": [
            "8192"
          ]
        },
        "N": {
          "type": "int",
          "value": [
            "3072"
          ]
        },
        "default_shape": {
          "type": "int",
          "value": [
            "1"
          ]
        },
        "op_version": {
          "type": "str",
          "value": [
            "flat"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        }
      }
    },
    {
      "name": "FlatRMSAdd_4_21",
      "type": "FlatRMSAdd",
      "in_args": [
        "/model/layers.10/post_attention_layernorm/output_3.out4_20",
        "/model/layers.10/mlp/down_proj/MatMulNBits/output_0.out5_4_32"
      ],
      "const_args": [
        "model.layers.11.input_layernorm.weight.bf"
      ],
      "out_args": [
        "/model/layers.11/input_layernorm/output_3.out4_21",
        "/model/layers.11/input_layernorm/output_0.out4_21"
      ],
      "attrs": {
        "a_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "3072"
          ]
        },
        "in_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "bfloat16"
          ]
        },
        "out_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "bfloat16"
          ]
        },
        "c_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "3072"
          ]
        },
        "b_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "3072"
          ]
        },
        "is_gamma_ifm": {
          "type": "int",
          "value": [
            "1"
          ]
        }
      }
    },
    {
      "name": "MatMulNBits_2_11",
      "type": "MladfMatMul",
      "in_args": [
        "/model/layers.11/input_layernorm/output_0.out4_21"
      ],
      "const_args": [
        "model.layers.11.attn.qk_proj.MatMulNBits.qweight.preformat",
        "model.layers.11.attn.qk_proj.MatMulNBits.bias.preformat",
        "model.layers.11.attn.qk_proj.MatMulNBits.scales.preformat",
        "model.layers.11.attn.qk_proj.MatMulNBits.qzeros.preformat"
      ],
      "out_args": [
        "/model/layers.11/attn/qk_proj/MatMulNBits/output_0.out5_4_33"
      ],
      "attrs": {
        "accuracy_level": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "bits": {
          "type": "int",
          "value": [
            "4"
          ]
        },
        "block_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "K": {
          "type": "int",
          "value": [
            "3072"
          ]
        },
        "N": {
          "type": "int",
          "value": [
            "6144"
          ]
        },
        "default_shape": {
          "type": "int",
          "value": [
            "1"
          ]
        },
        "op_version": {
          "type": "str",
          "value": [
            "flat"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        }
      }
    },
    {
      "name": "layers.11.attn.v_proj",
      "type": "MladfMatMul",
      "in_args": [
        "/model/layers.11/input_layernorm/output_0.out4_21"
      ],
      "const_args": [
        "model.layers.11.attn.v_proj.MatMulNBits.qweight.preformat",
        "model.layers.11.attn.v_proj.MatMulNBits.bias.preformat",
        "model.layers.11.attn.v_proj.MatMulNBits.scales.preformat",
        "model.layers.11.attn.v_proj.MatMulNBits.qzeros.preformat"
      ],
      "out_args": [
        "present.11.value"
      ],
      "attrs": {
        "accuracy_level": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "bits": {
          "type": "int",
          "value": [
            "4"
          ]
        },
        "block_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "K": {
          "type": "int",
          "value": [
            "3072"
          ]
        },
        "N": {
          "type": "int",
          "value": [
            "3072"
          ]
        },
        "default_shape": {
          "type": "int",
          "value": [
            "1"
          ]
        },
        "op_version": {
          "type": "str",
          "value": [
            "flat"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "total_seq_len": {
          "type": "int",
          "value": [
            "4096"
          ]
        },
        "external_buffers": {
          "type": "int",
          "value": [
            "5",
            "0",
            "47",
            "23"
          ]
        },
        "update_tensor_offsets": {
          "type": "int",
          "value": [
            "5",
            "0",
            "0",
            "192"
          ]
        }
      }
    },
    {
      "name": "/model/layers.11/attn/GroupQueryAttention",
      "type": "FLATMHA",
      "in_args": [
        "/model/layers.11/attn/qk_proj/MatMulNBits/output_0.out5_4_33",
        "past_key_values.11.key",
        "past_key_values.11.value",
        "attention_mask_const_uint",
        "sin_cos_cache_token"
      ],
      "const_args": [],
      "out_args": [
        "/model/layers.11/attn/GroupQueryAttention/output_0.out2_11",
        "present.11.key"
      ],
      "attrs": {
        "num_heads": {
          "type": "int",
          "value": [
            "32"
          ]
        },
        "kv_num_heads": {
          "type": "int",
          "value": [
            "32"
          ]
        },
        "scale": {
          "type": "float",
          "value": [
            "0.10206207633018494"
          ]
        },
        "local_window_size": {
          "type": "int",
          "value": [
            "262144"
          ]
        },
        "softcap": {
          "type": "float",
          "value": [
            "0.0"
          ]
        },
        "do_rotary": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "rotary_interleaved": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "input_shape": {
          "type": "int",
          "value": [
            "32",
            "32",
            "1",
            "4096",
            "96"
          ]
        },
        "external_buffers": {
          "type": "int",
          "value": [
            "4",
            "1",
            "0",
            "0",
            "1",
            "0",
            "44",
            "22",
            "2",
            "0",
            "45",
            "23",
            "6",
            "0",
            "46",
            "22"
          ]
        },
        "update_tensor_offsets": {
          "type": "int",
          "value": [
            "4",
            "0",
            "0",
            "192",
            "6",
            "0",
            "0",
            "192"
          ]
        }
      }
    },
    {
      "name": "layers.11.attn.o_proj",
      "type": "MladfMatMul",
      "in_args": [
        "/model/layers.11/attn/GroupQueryAttention/output_0.out2_11"
      ],
      "const_args": [
        "model.layers.11.attn.o_proj.MatMulNBits.qweight.preformat",
        "model.layers.11.attn.o_proj.MatMulNBits.bias.preformat",
        "model.layers.11.attn.o_proj.MatMulNBits.scales.preformat",
        "model.layers.11.attn.o_proj.MatMulNBits.qzeros.preformat"
      ],
      "out_args": [
        "/model/layers.11/attn/o_proj/MatMulNBits/output_0.out5_4_34"
      ],
      "attrs": {
        "accuracy_level": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "bits": {
          "type": "int",
          "value": [
            "4"
          ]
        },
        "block_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "K": {
          "type": "int",
          "value": [
            "3072"
          ]
        },
        "N": {
          "type": "int",
          "value": [
            "3072"
          ]
        },
        "default_shape": {
          "type": "int",
          "value": [
            "1"
          ]
        },
        "op_version": {
          "type": "str",
          "value": [
            "flat"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        }
      }
    },
    {
      "name": "FlatRMSAdd_4_22",
      "type": "FlatRMSAdd",
      "in_args": [
        "/model/layers.11/input_layernorm/output_3.out4_21",
        "/model/layers.11/attn/o_proj/MatMulNBits/output_0.out5_4_34"
      ],
      "const_args": [
        "model.layers.11.post_attention_layernorm.weight.bf"
      ],
      "out_args": [
        "/model/layers.11/post_attention_layernorm/output_3.out4_22",
        "/model/layers.11/post_attention_layernorm/output_0.out4_22"
      ],
      "attrs": {
        "a_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "3072"
          ]
        },
        "in_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "bfloat16"
          ]
        },
        "out_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "bfloat16"
          ]
        },
        "c_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "3072"
          ]
        },
        "b_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "3072"
          ]
        },
        "is_gamma_ifm": {
          "type": "int",
          "value": [
            "1"
          ]
        }
      }
    },
    {
      "name": "FlatMLP_3_11",
      "type": "FlatMLP",
      "in_args": [
        "/model/layers.11/post_attention_layernorm/output_0.out4_22"
      ],
      "const_args": [
        "model.layers.11.mlp.gate_proj.MatMulNBits.qweight",
        "model.layers.11.mlp.gate_proj.MatMulNBits.scales.f",
        "model.layers.11.mlp.gate_proj.MatMulNBits.qzeros",
        "model.layers.11.mlp.gate_proj.MatMulNBits.bias.f",
        "model.layers.11.mlp.up_proj.MatMulNBits.qweight",
        "model.layers.11.mlp.up_proj.MatMulNBits.scales.f",
        "model.layers.11.mlp.up_proj.MatMulNBits.qzeros",
        "model.layers.11.mlp.up_proj.MatMulNBits.bias.f"
      ],
      "out_args": [
        "/model/layers.11/mlp/Mul/output_0.out3_11"
      ],
      "attrs": {
        "input_shape": {
          "type": "int",
          "value": [
            "1",
            "3072",
            "8192"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "in_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "uint8",
            "float",
            "uint8",
            "float",
            "uint8",
            "float",
            "uint8",
            "float"
          ]
        },
        "out_dtypes": {
          "type": "str",
          "value": [
            "bfloat16"
          ]
        }
      }
    },
    {
      "name": "layers.11.mlp.down_proj",
      "type": "MladfMatMul",
      "in_args": [
        "/model/layers.11/mlp/Mul/output_0.out3_11"
      ],
      "const_args": [
        "model.layers.11.mlp.down_proj.MatMulNBits.qweight.preformat",
        "model.layers.11.mlp.down_proj.MatMulNBits.bias.preformat",
        "model.layers.11.mlp.down_proj.MatMulNBits.scales.preformat",
        "model.layers.11.mlp.down_proj.MatMulNBits.qzeros.preformat"
      ],
      "out_args": [
        "/model/layers.11/mlp/down_proj/MatMulNBits/output_0.out5_4_35"
      ],
      "attrs": {
        "accuracy_level": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "bits": {
          "type": "int",
          "value": [
            "4"
          ]
        },
        "block_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "K": {
          "type": "int",
          "value": [
            "8192"
          ]
        },
        "N": {
          "type": "int",
          "value": [
            "3072"
          ]
        },
        "default_shape": {
          "type": "int",
          "value": [
            "1"
          ]
        },
        "op_version": {
          "type": "str",
          "value": [
            "flat"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        }
      }
    },
    {
      "name": "FlatRMSAdd_4_23",
      "type": "FlatRMSAdd",
      "in_args": [
        "/model/layers.11/post_attention_layernorm/output_3.out4_22",
        "/model/layers.11/mlp/down_proj/MatMulNBits/output_0.out5_4_35"
      ],
      "const_args": [
        "model.layers.12.input_layernorm.weight.bf"
      ],
      "out_args": [
        "/model/layers.12/input_layernorm/output_3.out4_23",
        "/model/layers.12/input_layernorm/output_0.out4_23"
      ],
      "attrs": {
        "a_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "3072"
          ]
        },
        "in_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "bfloat16"
          ]
        },
        "out_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "bfloat16"
          ]
        },
        "c_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "3072"
          ]
        },
        "b_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "3072"
          ]
        },
        "is_gamma_ifm": {
          "type": "int",
          "value": [
            "1"
          ]
        }
      }
    },
    {
      "name": "MatMulNBits_2_12",
      "type": "MladfMatMul",
      "in_args": [
        "/model/layers.12/input_layernorm/output_0.out4_23"
      ],
      "const_args": [
        "model.layers.12.attn.qk_proj.MatMulNBits.qweight.preformat",
        "model.layers.12.attn.qk_proj.MatMulNBits.bias.preformat",
        "model.layers.12.attn.qk_proj.MatMulNBits.scales.preformat",
        "model.layers.12.attn.qk_proj.MatMulNBits.qzeros.preformat"
      ],
      "out_args": [
        "/model/layers.12/attn/qk_proj/MatMulNBits/output_0.out5_4_36"
      ],
      "attrs": {
        "accuracy_level": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "bits": {
          "type": "int",
          "value": [
            "4"
          ]
        },
        "block_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "K": {
          "type": "int",
          "value": [
            "3072"
          ]
        },
        "N": {
          "type": "int",
          "value": [
            "6144"
          ]
        },
        "default_shape": {
          "type": "int",
          "value": [
            "1"
          ]
        },
        "op_version": {
          "type": "str",
          "value": [
            "flat"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        }
      }
    },
    {
      "name": "layers.12.attn.v_proj",
      "type": "MladfMatMul",
      "in_args": [
        "/model/layers.12/input_layernorm/output_0.out4_23"
      ],
      "const_args": [
        "model.layers.12.attn.v_proj.MatMulNBits.qweight.preformat",
        "model.layers.12.attn.v_proj.MatMulNBits.bias.preformat",
        "model.layers.12.attn.v_proj.MatMulNBits.scales.preformat",
        "model.layers.12.attn.v_proj.MatMulNBits.qzeros.preformat"
      ],
      "out_args": [
        "present.12.value"
      ],
      "attrs": {
        "accuracy_level": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "bits": {
          "type": "int",
          "value": [
            "4"
          ]
        },
        "block_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "K": {
          "type": "int",
          "value": [
            "3072"
          ]
        },
        "N": {
          "type": "int",
          "value": [
            "3072"
          ]
        },
        "default_shape": {
          "type": "int",
          "value": [
            "1"
          ]
        },
        "op_version": {
          "type": "str",
          "value": [
            "flat"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "total_seq_len": {
          "type": "int",
          "value": [
            "4096"
          ]
        },
        "external_buffers": {
          "type": "int",
          "value": [
            "5",
            "0",
            "51",
            "25"
          ]
        },
        "update_tensor_offsets": {
          "type": "int",
          "value": [
            "5",
            "0",
            "0",
            "192"
          ]
        }
      }
    },
    {
      "name": "/model/layers.12/attn/GroupQueryAttention",
      "type": "FLATMHA",
      "in_args": [
        "/model/layers.12/attn/qk_proj/MatMulNBits/output_0.out5_4_36",
        "past_key_values.12.key",
        "past_key_values.12.value",
        "attention_mask_const_uint",
        "sin_cos_cache_token"
      ],
      "const_args": [],
      "out_args": [
        "/model/layers.12/attn/GroupQueryAttention/output_0.out2_12",
        "present.12.key"
      ],
      "attrs": {
        "num_heads": {
          "type": "int",
          "value": [
            "32"
          ]
        },
        "kv_num_heads": {
          "type": "int",
          "value": [
            "32"
          ]
        },
        "scale": {
          "type": "float",
          "value": [
            "0.10206207633018494"
          ]
        },
        "local_window_size": {
          "type": "int",
          "value": [
            "262144"
          ]
        },
        "softcap": {
          "type": "float",
          "value": [
            "0.0"
          ]
        },
        "do_rotary": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "rotary_interleaved": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "input_shape": {
          "type": "int",
          "value": [
            "32",
            "32",
            "1",
            "4096",
            "96"
          ]
        },
        "external_buffers": {
          "type": "int",
          "value": [
            "4",
            "1",
            "0",
            "0",
            "1",
            "0",
            "48",
            "24",
            "2",
            "0",
            "49",
            "25",
            "6",
            "0",
            "50",
            "24"
          ]
        },
        "update_tensor_offsets": {
          "type": "int",
          "value": [
            "4",
            "0",
            "0",
            "192",
            "6",
            "0",
            "0",
            "192"
          ]
        }
      }
    },
    {
      "name": "layers.12.attn.o_proj",
      "type": "MladfMatMul",
      "in_args": [
        "/model/layers.12/attn/GroupQueryAttention/output_0.out2_12"
      ],
      "const_args": [
        "model.layers.12.attn.o_proj.MatMulNBits.qweight.preformat",
        "model.layers.12.attn.o_proj.MatMulNBits.bias.preformat",
        "model.layers.12.attn.o_proj.MatMulNBits.scales.preformat",
        "model.layers.12.attn.o_proj.MatMulNBits.qzeros.preformat"
      ],
      "out_args": [
        "/model/layers.12/attn/o_proj/MatMulNBits/output_0.out5_4_37"
      ],
      "attrs": {
        "accuracy_level": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "bits": {
          "type": "int",
          "value": [
            "4"
          ]
        },
        "block_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "K": {
          "type": "int",
          "value": [
            "3072"
          ]
        },
        "N": {
          "type": "int",
          "value": [
            "3072"
          ]
        },
        "default_shape": {
          "type": "int",
          "value": [
            "1"
          ]
        },
        "op_version": {
          "type": "str",
          "value": [
            "flat"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        }
      }
    },
    {
      "name": "FlatRMSAdd_4_24",
      "type": "FlatRMSAdd",
      "in_args": [
        "/model/layers.12/input_layernorm/output_3.out4_23",
        "/model/layers.12/attn/o_proj/MatMulNBits/output_0.out5_4_37"
      ],
      "const_args": [
        "model.layers.12.post_attention_layernorm.weight.bf"
      ],
      "out_args": [
        "/model/layers.12/post_attention_layernorm/output_3.out4_24",
        "/model/layers.12/post_attention_layernorm/output_0.out4_24"
      ],
      "attrs": {
        "a_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "3072"
          ]
        },
        "in_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "bfloat16"
          ]
        },
        "out_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "bfloat16"
          ]
        },
        "c_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "3072"
          ]
        },
        "b_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "3072"
          ]
        },
        "is_gamma_ifm": {
          "type": "int",
          "value": [
            "1"
          ]
        }
      }
    },
    {
      "name": "FlatMLP_3_12",
      "type": "FlatMLP",
      "in_args": [
        "/model/layers.12/post_attention_layernorm/output_0.out4_24"
      ],
      "const_args": [
        "model.layers.12.mlp.gate_proj.MatMulNBits.qweight",
        "model.layers.12.mlp.gate_proj.MatMulNBits.scales.f",
        "model.layers.12.mlp.gate_proj.MatMulNBits.qzeros",
        "model.layers.12.mlp.gate_proj.MatMulNBits.bias.f",
        "model.layers.12.mlp.up_proj.MatMulNBits.qweight",
        "model.layers.12.mlp.up_proj.MatMulNBits.scales.f",
        "model.layers.12.mlp.up_proj.MatMulNBits.qzeros",
        "model.layers.12.mlp.up_proj.MatMulNBits.bias.f"
      ],
      "out_args": [
        "/model/layers.12/mlp/Mul/output_0.out3_12"
      ],
      "attrs": {
        "input_shape": {
          "type": "int",
          "value": [
            "1",
            "3072",
            "8192"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "in_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "uint8",
            "float",
            "uint8",
            "float",
            "uint8",
            "float",
            "uint8",
            "float"
          ]
        },
        "out_dtypes": {
          "type": "str",
          "value": [
            "bfloat16"
          ]
        }
      }
    },
    {
      "name": "layers.12.mlp.down_proj",
      "type": "MladfMatMul",
      "in_args": [
        "/model/layers.12/mlp/Mul/output_0.out3_12"
      ],
      "const_args": [
        "model.layers.12.mlp.down_proj.MatMulNBits.qweight.preformat",
        "model.layers.12.mlp.down_proj.MatMulNBits.bias.preformat",
        "model.layers.12.mlp.down_proj.MatMulNBits.scales.preformat",
        "model.layers.12.mlp.down_proj.MatMulNBits.qzeros.preformat"
      ],
      "out_args": [
        "/model/layers.12/mlp/down_proj/MatMulNBits/output_0.out5_4_38"
      ],
      "attrs": {
        "accuracy_level": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "bits": {
          "type": "int",
          "value": [
            "4"
          ]
        },
        "block_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "K": {
          "type": "int",
          "value": [
            "8192"
          ]
        },
        "N": {
          "type": "int",
          "value": [
            "3072"
          ]
        },
        "default_shape": {
          "type": "int",
          "value": [
            "1"
          ]
        },
        "op_version": {
          "type": "str",
          "value": [
            "flat"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        }
      }
    },
    {
      "name": "FlatRMSAdd_4_25",
      "type": "FlatRMSAdd",
      "in_args": [
        "/model/layers.12/post_attention_layernorm/output_3.out4_24",
        "/model/layers.12/mlp/down_proj/MatMulNBits/output_0.out5_4_38"
      ],
      "const_args": [
        "model.layers.13.input_layernorm.weight.bf"
      ],
      "out_args": [
        "/model/layers.13/input_layernorm/output_3.out4_25",
        "/model/layers.13/input_layernorm/output_0.out4_25"
      ],
      "attrs": {
        "a_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "3072"
          ]
        },
        "in_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "bfloat16"
          ]
        },
        "out_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "bfloat16"
          ]
        },
        "c_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "3072"
          ]
        },
        "b_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "3072"
          ]
        },
        "is_gamma_ifm": {
          "type": "int",
          "value": [
            "1"
          ]
        }
      }
    },
    {
      "name": "MatMulNBits_2_13",
      "type": "MladfMatMul",
      "in_args": [
        "/model/layers.13/input_layernorm/output_0.out4_25"
      ],
      "const_args": [
        "model.layers.13.attn.qk_proj.MatMulNBits.qweight.preformat",
        "model.layers.13.attn.qk_proj.MatMulNBits.bias.preformat",
        "model.layers.13.attn.qk_proj.MatMulNBits.scales.preformat",
        "model.layers.13.attn.qk_proj.MatMulNBits.qzeros.preformat"
      ],
      "out_args": [
        "/model/layers.13/attn/qk_proj/MatMulNBits/output_0.out5_4_39"
      ],
      "attrs": {
        "accuracy_level": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "bits": {
          "type": "int",
          "value": [
            "4"
          ]
        },
        "block_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "K": {
          "type": "int",
          "value": [
            "3072"
          ]
        },
        "N": {
          "type": "int",
          "value": [
            "6144"
          ]
        },
        "default_shape": {
          "type": "int",
          "value": [
            "1"
          ]
        },
        "op_version": {
          "type": "str",
          "value": [
            "flat"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        }
      }
    },
    {
      "name": "layers.13.attn.v_proj",
      "type": "MladfMatMul",
      "in_args": [
        "/model/layers.13/input_layernorm/output_0.out4_25"
      ],
      "const_args": [
        "model.layers.13.attn.v_proj.MatMulNBits.qweight.preformat",
        "model.layers.13.attn.v_proj.MatMulNBits.bias.preformat",
        "model.layers.13.attn.v_proj.MatMulNBits.scales.preformat",
        "model.layers.13.attn.v_proj.MatMulNBits.qzeros.preformat"
      ],
      "out_args": [
        "present.13.value"
      ],
      "attrs": {
        "accuracy_level": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "bits": {
          "type": "int",
          "value": [
            "4"
          ]
        },
        "block_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "K": {
          "type": "int",
          "value": [
            "3072"
          ]
        },
        "N": {
          "type": "int",
          "value": [
            "3072"
          ]
        },
        "default_shape": {
          "type": "int",
          "value": [
            "1"
          ]
        },
        "op_version": {
          "type": "str",
          "value": [
            "flat"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "total_seq_len": {
          "type": "int",
          "value": [
            "4096"
          ]
        },
        "external_buffers": {
          "type": "int",
          "value": [
            "5",
            "0",
            "55",
            "27"
          ]
        },
        "update_tensor_offsets": {
          "type": "int",
          "value": [
            "5",
            "0",
            "0",
            "192"
          ]
        }
      }
    },
    {
      "name": "/model/layers.13/attn/GroupQueryAttention",
      "type": "FLATMHA",
      "in_args": [
        "/model/layers.13/attn/qk_proj/MatMulNBits/output_0.out5_4_39",
        "past_key_values.13.key",
        "past_key_values.13.value",
        "attention_mask_const_uint",
        "sin_cos_cache_token"
      ],
      "const_args": [],
      "out_args": [
        "/model/layers.13/attn/GroupQueryAttention/output_0.out2_13",
        "present.13.key"
      ],
      "attrs": {
        "num_heads": {
          "type": "int",
          "value": [
            "32"
          ]
        },
        "kv_num_heads": {
          "type": "int",
          "value": [
            "32"
          ]
        },
        "scale": {
          "type": "float",
          "value": [
            "0.10206207633018494"
          ]
        },
        "local_window_size": {
          "type": "int",
          "value": [
            "262144"
          ]
        },
        "softcap": {
          "type": "float",
          "value": [
            "0.0"
          ]
        },
        "do_rotary": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "rotary_interleaved": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "input_shape": {
          "type": "int",
          "value": [
            "32",
            "32",
            "1",
            "4096",
            "96"
          ]
        },
        "external_buffers": {
          "type": "int",
          "value": [
            "4",
            "1",
            "0",
            "0",
            "1",
            "0",
            "52",
            "26",
            "2",
            "0",
            "53",
            "27",
            "6",
            "0",
            "54",
            "26"
          ]
        },
        "update_tensor_offsets": {
          "type": "int",
          "value": [
            "4",
            "0",
            "0",
            "192",
            "6",
            "0",
            "0",
            "192"
          ]
        }
      }
    },
    {
      "name": "layers.13.attn.o_proj",
      "type": "MladfMatMul",
      "in_args": [
        "/model/layers.13/attn/GroupQueryAttention/output_0.out2_13"
      ],
      "const_args": [
        "model.layers.13.attn.o_proj.MatMulNBits.qweight.preformat",
        "model.layers.13.attn.o_proj.MatMulNBits.bias.preformat",
        "model.layers.13.attn.o_proj.MatMulNBits.scales.preformat",
        "model.layers.13.attn.o_proj.MatMulNBits.qzeros.preformat"
      ],
      "out_args": [
        "/model/layers.13/attn/o_proj/MatMulNBits/output_0.out5_4_40"
      ],
      "attrs": {
        "accuracy_level": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "bits": {
          "type": "int",
          "value": [
            "4"
          ]
        },
        "block_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "K": {
          "type": "int",
          "value": [
            "3072"
          ]
        },
        "N": {
          "type": "int",
          "value": [
            "3072"
          ]
        },
        "default_shape": {
          "type": "int",
          "value": [
            "1"
          ]
        },
        "op_version": {
          "type": "str",
          "value": [
            "flat"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        }
      }
    },
    {
      "name": "FlatRMSAdd_4_26",
      "type": "FlatRMSAdd",
      "in_args": [
        "/model/layers.13/input_layernorm/output_3.out4_25",
        "/model/layers.13/attn/o_proj/MatMulNBits/output_0.out5_4_40"
      ],
      "const_args": [
        "model.layers.13.post_attention_layernorm.weight.bf"
      ],
      "out_args": [
        "/model/layers.13/post_attention_layernorm/output_3.out4_26",
        "/model/layers.13/post_attention_layernorm/output_0.out4_26"
      ],
      "attrs": {
        "a_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "3072"
          ]
        },
        "in_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "bfloat16"
          ]
        },
        "out_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "bfloat16"
          ]
        },
        "c_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "3072"
          ]
        },
        "b_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "3072"
          ]
        },
        "is_gamma_ifm": {
          "type": "int",
          "value": [
            "1"
          ]
        }
      }
    },
    {
      "name": "FlatMLP_3_13",
      "type": "FlatMLP",
      "in_args": [
        "/model/layers.13/post_attention_layernorm/output_0.out4_26"
      ],
      "const_args": [
        "model.layers.13.mlp.gate_proj.MatMulNBits.qweight",
        "model.layers.13.mlp.gate_proj.MatMulNBits.scales.f",
        "model.layers.13.mlp.gate_proj.MatMulNBits.qzeros",
        "model.layers.13.mlp.gate_proj.MatMulNBits.bias.f",
        "model.layers.13.mlp.up_proj.MatMulNBits.qweight",
        "model.layers.13.mlp.up_proj.MatMulNBits.scales.f",
        "model.layers.13.mlp.up_proj.MatMulNBits.qzeros",
        "model.layers.13.mlp.up_proj.MatMulNBits.bias.f"
      ],
      "out_args": [
        "/model/layers.13/mlp/Mul/output_0.out3_13"
      ],
      "attrs": {
        "input_shape": {
          "type": "int",
          "value": [
            "1",
            "3072",
            "8192"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "in_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "uint8",
            "float",
            "uint8",
            "float",
            "uint8",
            "float",
            "uint8",
            "float"
          ]
        },
        "out_dtypes": {
          "type": "str",
          "value": [
            "bfloat16"
          ]
        }
      }
    },
    {
      "name": "layers.13.mlp.down_proj",
      "type": "MladfMatMul",
      "in_args": [
        "/model/layers.13/mlp/Mul/output_0.out3_13"
      ],
      "const_args": [
        "model.layers.13.mlp.down_proj.MatMulNBits.qweight.preformat",
        "model.layers.13.mlp.down_proj.MatMulNBits.bias.preformat",
        "model.layers.13.mlp.down_proj.MatMulNBits.scales.preformat",
        "model.layers.13.mlp.down_proj.MatMulNBits.qzeros.preformat"
      ],
      "out_args": [
        "/model/layers.13/mlp/down_proj/MatMulNBits/output_0.out5_4_41"
      ],
      "attrs": {
        "accuracy_level": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "bits": {
          "type": "int",
          "value": [
            "4"
          ]
        },
        "block_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "K": {
          "type": "int",
          "value": [
            "8192"
          ]
        },
        "N": {
          "type": "int",
          "value": [
            "3072"
          ]
        },
        "default_shape": {
          "type": "int",
          "value": [
            "1"
          ]
        },
        "op_version": {
          "type": "str",
          "value": [
            "flat"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        }
      }
    },
    {
      "name": "FlatRMSAdd_4_27",
      "type": "FlatRMSAdd",
      "in_args": [
        "/model/layers.13/post_attention_layernorm/output_3.out4_26",
        "/model/layers.13/mlp/down_proj/MatMulNBits/output_0.out5_4_41"
      ],
      "const_args": [
        "model.layers.14.input_layernorm.weight.bf"
      ],
      "out_args": [
        "/model/layers.14/input_layernorm/output_3.out4_27",
        "/model/layers.14/input_layernorm/output_0.out4_27"
      ],
      "attrs": {
        "a_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "3072"
          ]
        },
        "in_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "bfloat16"
          ]
        },
        "out_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "bfloat16"
          ]
        },
        "c_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "3072"
          ]
        },
        "b_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "3072"
          ]
        },
        "is_gamma_ifm": {
          "type": "int",
          "value": [
            "1"
          ]
        }
      }
    },
    {
      "name": "MatMulNBits_2_14",
      "type": "MladfMatMul",
      "in_args": [
        "/model/layers.14/input_layernorm/output_0.out4_27"
      ],
      "const_args": [
        "model.layers.14.attn.qk_proj.MatMulNBits.qweight.preformat",
        "model.layers.14.attn.qk_proj.MatMulNBits.bias.preformat",
        "model.layers.14.attn.qk_proj.MatMulNBits.scales.preformat",
        "model.layers.14.attn.qk_proj.MatMulNBits.qzeros.preformat"
      ],
      "out_args": [
        "/model/layers.14/attn/qk_proj/MatMulNBits/output_0.out5_4_42"
      ],
      "attrs": {
        "accuracy_level": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "bits": {
          "type": "int",
          "value": [
            "4"
          ]
        },
        "block_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "K": {
          "type": "int",
          "value": [
            "3072"
          ]
        },
        "N": {
          "type": "int",
          "value": [
            "6144"
          ]
        },
        "default_shape": {
          "type": "int",
          "value": [
            "1"
          ]
        },
        "op_version": {
          "type": "str",
          "value": [
            "flat"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        }
      }
    },
    {
      "name": "layers.14.attn.v_proj",
      "type": "MladfMatMul",
      "in_args": [
        "/model/layers.14/input_layernorm/output_0.out4_27"
      ],
      "const_args": [
        "model.layers.14.attn.v_proj.MatMulNBits.qweight.preformat",
        "model.layers.14.attn.v_proj.MatMulNBits.bias.preformat",
        "model.layers.14.attn.v_proj.MatMulNBits.scales.preformat",
        "model.layers.14.attn.v_proj.MatMulNBits.qzeros.preformat"
      ],
      "out_args": [
        "present.14.value"
      ],
      "attrs": {
        "accuracy_level": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "bits": {
          "type": "int",
          "value": [
            "4"
          ]
        },
        "block_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "K": {
          "type": "int",
          "value": [
            "3072"
          ]
        },
        "N": {
          "type": "int",
          "value": [
            "3072"
          ]
        },
        "default_shape": {
          "type": "int",
          "value": [
            "1"
          ]
        },
        "op_version": {
          "type": "str",
          "value": [
            "flat"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "total_seq_len": {
          "type": "int",
          "value": [
            "4096"
          ]
        },
        "external_buffers": {
          "type": "int",
          "value": [
            "5",
            "0",
            "59",
            "29"
          ]
        },
        "update_tensor_offsets": {
          "type": "int",
          "value": [
            "5",
            "0",
            "0",
            "192"
          ]
        }
      }
    },
    {
      "name": "/model/layers.14/attn/GroupQueryAttention",
      "type": "FLATMHA",
      "in_args": [
        "/model/layers.14/attn/qk_proj/MatMulNBits/output_0.out5_4_42",
        "past_key_values.14.key",
        "past_key_values.14.value",
        "attention_mask_const_uint",
        "sin_cos_cache_token"
      ],
      "const_args": [],
      "out_args": [
        "/model/layers.14/attn/GroupQueryAttention/output_0.out2_14",
        "present.14.key"
      ],
      "attrs": {
        "num_heads": {
          "type": "int",
          "value": [
            "32"
          ]
        },
        "kv_num_heads": {
          "type": "int",
          "value": [
            "32"
          ]
        },
        "scale": {
          "type": "float",
          "value": [
            "0.10206207633018494"
          ]
        },
        "local_window_size": {
          "type": "int",
          "value": [
            "262144"
          ]
        },
        "softcap": {
          "type": "float",
          "value": [
            "0.0"
          ]
        },
        "do_rotary": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "rotary_interleaved": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "input_shape": {
          "type": "int",
          "value": [
            "32",
            "32",
            "1",
            "4096",
            "96"
          ]
        },
        "external_buffers": {
          "type": "int",
          "value": [
            "4",
            "1",
            "0",
            "0",
            "1",
            "0",
            "56",
            "28",
            "2",
            "0",
            "57",
            "29",
            "6",
            "0",
            "58",
            "28"
          ]
        },
        "update_tensor_offsets": {
          "type": "int",
          "value": [
            "4",
            "0",
            "0",
            "192",
            "6",
            "0",
            "0",
            "192"
          ]
        }
      }
    },
    {
      "name": "layers.14.attn.o_proj",
      "type": "MladfMatMul",
      "in_args": [
        "/model/layers.14/attn/GroupQueryAttention/output_0.out2_14"
      ],
      "const_args": [
        "model.layers.14.attn.o_proj.MatMulNBits.qweight.preformat",
        "model.layers.14.attn.o_proj.MatMulNBits.bias.preformat",
        "model.layers.14.attn.o_proj.MatMulNBits.scales.preformat",
        "model.layers.14.attn.o_proj.MatMulNBits.qzeros.preformat"
      ],
      "out_args": [
        "/model/layers.14/attn/o_proj/MatMulNBits/output_0.out5_4_43"
      ],
      "attrs": {
        "accuracy_level": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "bits": {
          "type": "int",
          "value": [
            "4"
          ]
        },
        "block_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "K": {
          "type": "int",
          "value": [
            "3072"
          ]
        },
        "N": {
          "type": "int",
          "value": [
            "3072"
          ]
        },
        "default_shape": {
          "type": "int",
          "value": [
            "1"
          ]
        },
        "op_version": {
          "type": "str",
          "value": [
            "flat"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        }
      }
    },
    {
      "name": "FlatRMSAdd_4_28",
      "type": "FlatRMSAdd",
      "in_args": [
        "/model/layers.14/input_layernorm/output_3.out4_27",
        "/model/layers.14/attn/o_proj/MatMulNBits/output_0.out5_4_43"
      ],
      "const_args": [
        "model.layers.14.post_attention_layernorm.weight.bf"
      ],
      "out_args": [
        "/model/layers.14/post_attention_layernorm/output_3.out4_28",
        "/model/layers.14/post_attention_layernorm/output_0.out4_28"
      ],
      "attrs": {
        "a_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "3072"
          ]
        },
        "in_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "bfloat16"
          ]
        },
        "out_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "bfloat16"
          ]
        },
        "c_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "3072"
          ]
        },
        "b_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "3072"
          ]
        },
        "is_gamma_ifm": {
          "type": "int",
          "value": [
            "1"
          ]
        }
      }
    },
    {
      "name": "FlatMLP_3_14",
      "type": "FlatMLP",
      "in_args": [
        "/model/layers.14/post_attention_layernorm/output_0.out4_28"
      ],
      "const_args": [
        "model.layers.14.mlp.gate_proj.MatMulNBits.qweight",
        "model.layers.14.mlp.gate_proj.MatMulNBits.scales.f",
        "model.layers.14.mlp.gate_proj.MatMulNBits.qzeros",
        "model.layers.14.mlp.gate_proj.MatMulNBits.bias.f",
        "model.layers.14.mlp.up_proj.MatMulNBits.qweight",
        "model.layers.14.mlp.up_proj.MatMulNBits.scales.f",
        "model.layers.14.mlp.up_proj.MatMulNBits.qzeros",
        "model.layers.14.mlp.up_proj.MatMulNBits.bias.f"
      ],
      "out_args": [
        "/model/layers.14/mlp/Mul/output_0.out3_14"
      ],
      "attrs": {
        "input_shape": {
          "type": "int",
          "value": [
            "1",
            "3072",
            "8192"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "in_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "uint8",
            "float",
            "uint8",
            "float",
            "uint8",
            "float",
            "uint8",
            "float"
          ]
        },
        "out_dtypes": {
          "type": "str",
          "value": [
            "bfloat16"
          ]
        }
      }
    },
    {
      "name": "layers.14.mlp.down_proj",
      "type": "MladfMatMul",
      "in_args": [
        "/model/layers.14/mlp/Mul/output_0.out3_14"
      ],
      "const_args": [
        "model.layers.14.mlp.down_proj.MatMulNBits.qweight.preformat",
        "model.layers.14.mlp.down_proj.MatMulNBits.bias.preformat",
        "model.layers.14.mlp.down_proj.MatMulNBits.scales.preformat",
        "model.layers.14.mlp.down_proj.MatMulNBits.qzeros.preformat"
      ],
      "out_args": [
        "/model/layers.14/mlp/down_proj/MatMulNBits/output_0.out5_4_44"
      ],
      "attrs": {
        "accuracy_level": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "bits": {
          "type": "int",
          "value": [
            "4"
          ]
        },
        "block_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "K": {
          "type": "int",
          "value": [
            "8192"
          ]
        },
        "N": {
          "type": "int",
          "value": [
            "3072"
          ]
        },
        "default_shape": {
          "type": "int",
          "value": [
            "1"
          ]
        },
        "op_version": {
          "type": "str",
          "value": [
            "flat"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        }
      }
    },
    {
      "name": "FlatRMSAdd_4_29",
      "type": "FlatRMSAdd",
      "in_args": [
        "/model/layers.14/post_attention_layernorm/output_3.out4_28",
        "/model/layers.14/mlp/down_proj/MatMulNBits/output_0.out5_4_44"
      ],
      "const_args": [
        "model.layers.15.input_layernorm.weight.bf"
      ],
      "out_args": [
        "/model/layers.15/input_layernorm/output_3.out4_29",
        "/model/layers.15/input_layernorm/output_0.out4_29"
      ],
      "attrs": {
        "a_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "3072"
          ]
        },
        "in_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "bfloat16"
          ]
        },
        "out_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "bfloat16"
          ]
        },
        "c_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "3072"
          ]
        },
        "b_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "3072"
          ]
        },
        "is_gamma_ifm": {
          "type": "int",
          "value": [
            "1"
          ]
        }
      }
    },
    {
      "name": "MatMulNBits_2_15",
      "type": "MladfMatMul",
      "in_args": [
        "/model/layers.15/input_layernorm/output_0.out4_29"
      ],
      "const_args": [
        "model.layers.15.attn.qk_proj.MatMulNBits.qweight.preformat",
        "model.layers.15.attn.qk_proj.MatMulNBits.bias.preformat",
        "model.layers.15.attn.qk_proj.MatMulNBits.scales.preformat",
        "model.layers.15.attn.qk_proj.MatMulNBits.qzeros.preformat"
      ],
      "out_args": [
        "/model/layers.15/attn/qk_proj/MatMulNBits/output_0.out5_4_45"
      ],
      "attrs": {
        "accuracy_level": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "bits": {
          "type": "int",
          "value": [
            "4"
          ]
        },
        "block_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "K": {
          "type": "int",
          "value": [
            "3072"
          ]
        },
        "N": {
          "type": "int",
          "value": [
            "6144"
          ]
        },
        "default_shape": {
          "type": "int",
          "value": [
            "1"
          ]
        },
        "op_version": {
          "type": "str",
          "value": [
            "flat"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        }
      }
    },
    {
      "name": "layers.15.attn.v_proj",
      "type": "MladfMatMul",
      "in_args": [
        "/model/layers.15/input_layernorm/output_0.out4_29"
      ],
      "const_args": [
        "model.layers.15.attn.v_proj.MatMulNBits.qweight.preformat",
        "model.layers.15.attn.v_proj.MatMulNBits.bias.preformat",
        "model.layers.15.attn.v_proj.MatMulNBits.scales.preformat",
        "model.layers.15.attn.v_proj.MatMulNBits.qzeros.preformat"
      ],
      "out_args": [
        "present.15.value"
      ],
      "attrs": {
        "accuracy_level": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "bits": {
          "type": "int",
          "value": [
            "4"
          ]
        },
        "block_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "K": {
          "type": "int",
          "value": [
            "3072"
          ]
        },
        "N": {
          "type": "int",
          "value": [
            "3072"
          ]
        },
        "default_shape": {
          "type": "int",
          "value": [
            "1"
          ]
        },
        "op_version": {
          "type": "str",
          "value": [
            "flat"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "total_seq_len": {
          "type": "int",
          "value": [
            "4096"
          ]
        },
        "external_buffers": {
          "type": "int",
          "value": [
            "5",
            "0",
            "63",
            "31"
          ]
        },
        "update_tensor_offsets": {
          "type": "int",
          "value": [
            "5",
            "0",
            "0",
            "192"
          ]
        }
      }
    },
    {
      "name": "/model/layers.15/attn/GroupQueryAttention",
      "type": "FLATMHA",
      "in_args": [
        "/model/layers.15/attn/qk_proj/MatMulNBits/output_0.out5_4_45",
        "past_key_values.15.key",
        "past_key_values.15.value",
        "attention_mask_const_uint",
        "sin_cos_cache_token"
      ],
      "const_args": [],
      "out_args": [
        "/model/layers.15/attn/GroupQueryAttention/output_0.out2_15",
        "present.15.key"
      ],
      "attrs": {
        "num_heads": {
          "type": "int",
          "value": [
            "32"
          ]
        },
        "kv_num_heads": {
          "type": "int",
          "value": [
            "32"
          ]
        },
        "scale": {
          "type": "float",
          "value": [
            "0.10206207633018494"
          ]
        },
        "local_window_size": {
          "type": "int",
          "value": [
            "262144"
          ]
        },
        "softcap": {
          "type": "float",
          "value": [
            "0.0"
          ]
        },
        "do_rotary": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "rotary_interleaved": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "input_shape": {
          "type": "int",
          "value": [
            "32",
            "32",
            "1",
            "4096",
            "96"
          ]
        },
        "external_buffers": {
          "type": "int",
          "value": [
            "4",
            "1",
            "0",
            "0",
            "1",
            "0",
            "60",
            "30",
            "2",
            "0",
            "61",
            "31",
            "6",
            "0",
            "62",
            "30"
          ]
        },
        "update_tensor_offsets": {
          "type": "int",
          "value": [
            "4",
            "0",
            "0",
            "192",
            "6",
            "0",
            "0",
            "192"
          ]
        }
      }
    },
    {
      "name": "layers.15.attn.o_proj",
      "type": "MladfMatMul",
      "in_args": [
        "/model/layers.15/attn/GroupQueryAttention/output_0.out2_15"
      ],
      "const_args": [
        "model.layers.15.attn.o_proj.MatMulNBits.qweight.preformat",
        "model.layers.15.attn.o_proj.MatMulNBits.bias.preformat",
        "model.layers.15.attn.o_proj.MatMulNBits.scales.preformat",
        "model.layers.15.attn.o_proj.MatMulNBits.qzeros.preformat"
      ],
      "out_args": [
        "/model/layers.15/attn/o_proj/MatMulNBits/output_0.out5_4_46"
      ],
      "attrs": {
        "accuracy_level": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "bits": {
          "type": "int",
          "value": [
            "4"
          ]
        },
        "block_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "K": {
          "type": "int",
          "value": [
            "3072"
          ]
        },
        "N": {
          "type": "int",
          "value": [
            "3072"
          ]
        },
        "default_shape": {
          "type": "int",
          "value": [
            "1"
          ]
        },
        "op_version": {
          "type": "str",
          "value": [
            "flat"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        }
      }
    },
    {
      "name": "FlatRMSAdd_4_30",
      "type": "FlatRMSAdd",
      "in_args": [
        "/model/layers.15/input_layernorm/output_3.out4_29",
        "/model/layers.15/attn/o_proj/MatMulNBits/output_0.out5_4_46"
      ],
      "const_args": [
        "model.layers.15.post_attention_layernorm.weight.bf"
      ],
      "out_args": [
        "/model/layers.15/post_attention_layernorm/output_3.out4_30",
        "/model/layers.15/post_attention_layernorm/output_0.out4_30"
      ],
      "attrs": {
        "a_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "3072"
          ]
        },
        "in_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "bfloat16"
          ]
        },
        "out_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "bfloat16"
          ]
        },
        "c_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "3072"
          ]
        },
        "b_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "3072"
          ]
        },
        "is_gamma_ifm": {
          "type": "int",
          "value": [
            "1"
          ]
        }
      }
    },
    {
      "name": "FlatMLP_3_15",
      "type": "FlatMLP",
      "in_args": [
        "/model/layers.15/post_attention_layernorm/output_0.out4_30"
      ],
      "const_args": [
        "model.layers.15.mlp.gate_proj.MatMulNBits.qweight",
        "model.layers.15.mlp.gate_proj.MatMulNBits.scales.f",
        "model.layers.15.mlp.gate_proj.MatMulNBits.qzeros",
        "model.layers.15.mlp.gate_proj.MatMulNBits.bias.f",
        "model.layers.15.mlp.up_proj.MatMulNBits.qweight",
        "model.layers.15.mlp.up_proj.MatMulNBits.scales.f",
        "model.layers.15.mlp.up_proj.MatMulNBits.qzeros",
        "model.layers.15.mlp.up_proj.MatMulNBits.bias.f"
      ],
      "out_args": [
        "/model/layers.15/mlp/Mul/output_0.out3_15"
      ],
      "attrs": {
        "input_shape": {
          "type": "int",
          "value": [
            "1",
            "3072",
            "8192"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "in_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "uint8",
            "float",
            "uint8",
            "float",
            "uint8",
            "float",
            "uint8",
            "float"
          ]
        },
        "out_dtypes": {
          "type": "str",
          "value": [
            "bfloat16"
          ]
        }
      }
    },
    {
      "name": "layers.15.mlp.down_proj",
      "type": "MladfMatMul",
      "in_args": [
        "/model/layers.15/mlp/Mul/output_0.out3_15"
      ],
      "const_args": [
        "model.layers.15.mlp.down_proj.MatMulNBits.qweight.preformat",
        "model.layers.15.mlp.down_proj.MatMulNBits.bias.preformat",
        "model.layers.15.mlp.down_proj.MatMulNBits.scales.preformat",
        "model.layers.15.mlp.down_proj.MatMulNBits.qzeros.preformat"
      ],
      "out_args": [
        "/model/layers.15/mlp/down_proj/MatMulNBits/output_0.out5_4_47"
      ],
      "attrs": {
        "accuracy_level": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "bits": {
          "type": "int",
          "value": [
            "4"
          ]
        },
        "block_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "K": {
          "type": "int",
          "value": [
            "8192"
          ]
        },
        "N": {
          "type": "int",
          "value": [
            "3072"
          ]
        },
        "default_shape": {
          "type": "int",
          "value": [
            "1"
          ]
        },
        "op_version": {
          "type": "str",
          "value": [
            "flat"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        }
      }
    },
    {
      "name": "FlatRMSAdd_4_31",
      "type": "FlatRMSAdd",
      "in_args": [
        "/model/layers.15/post_attention_layernorm/output_3.out4_30",
        "/model/layers.15/mlp/down_proj/MatMulNBits/output_0.out5_4_47"
      ],
      "const_args": [
        "model.layers.16.input_layernorm.weight.bf"
      ],
      "out_args": [
        "/model/layers.16/input_layernorm/output_3.out4_31",
        "/model/layers.16/input_layernorm/output_0.out4_31"
      ],
      "attrs": {
        "a_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "3072"
          ]
        },
        "in_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "bfloat16"
          ]
        },
        "out_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "bfloat16"
          ]
        },
        "c_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "3072"
          ]
        },
        "b_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "3072"
          ]
        },
        "is_gamma_ifm": {
          "type": "int",
          "value": [
            "1"
          ]
        }
      }
    },
    {
      "name": "MatMulNBits_2_16",
      "type": "MladfMatMul",
      "in_args": [
        "/model/layers.16/input_layernorm/output_0.out4_31"
      ],
      "const_args": [
        "model.layers.16.attn.qk_proj.MatMulNBits.qweight.preformat",
        "model.layers.16.attn.qk_proj.MatMulNBits.bias.preformat",
        "model.layers.16.attn.qk_proj.MatMulNBits.scales.preformat",
        "model.layers.16.attn.qk_proj.MatMulNBits.qzeros.preformat"
      ],
      "out_args": [
        "/model/layers.16/attn/qk_proj/MatMulNBits/output_0.out5_4_48"
      ],
      "attrs": {
        "accuracy_level": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "bits": {
          "type": "int",
          "value": [
            "4"
          ]
        },
        "block_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "K": {
          "type": "int",
          "value": [
            "3072"
          ]
        },
        "N": {
          "type": "int",
          "value": [
            "6144"
          ]
        },
        "default_shape": {
          "type": "int",
          "value": [
            "1"
          ]
        },
        "op_version": {
          "type": "str",
          "value": [
            "flat"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        }
      }
    },
    {
      "name": "layers.16.attn.v_proj",
      "type": "MladfMatMul",
      "in_args": [
        "/model/layers.16/input_layernorm/output_0.out4_31"
      ],
      "const_args": [
        "model.layers.16.attn.v_proj.MatMulNBits.qweight.preformat",
        "model.layers.16.attn.v_proj.MatMulNBits.bias.preformat",
        "model.layers.16.attn.v_proj.MatMulNBits.scales.preformat",
        "model.layers.16.attn.v_proj.MatMulNBits.qzeros.preformat"
      ],
      "out_args": [
        "present.16.value"
      ],
      "attrs": {
        "accuracy_level": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "bits": {
          "type": "int",
          "value": [
            "4"
          ]
        },
        "block_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "K": {
          "type": "int",
          "value": [
            "3072"
          ]
        },
        "N": {
          "type": "int",
          "value": [
            "3072"
          ]
        },
        "default_shape": {
          "type": "int",
          "value": [
            "1"
          ]
        },
        "op_version": {
          "type": "str",
          "value": [
            "flat"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "total_seq_len": {
          "type": "int",
          "value": [
            "4096"
          ]
        },
        "external_buffers": {
          "type": "int",
          "value": [
            "5",
            "0",
            "67",
            "33"
          ]
        },
        "update_tensor_offsets": {
          "type": "int",
          "value": [
            "5",
            "0",
            "0",
            "192"
          ]
        }
      }
    },
    {
      "name": "/model/layers.16/attn/GroupQueryAttention",
      "type": "FLATMHA",
      "in_args": [
        "/model/layers.16/attn/qk_proj/MatMulNBits/output_0.out5_4_48",
        "past_key_values.16.key",
        "past_key_values.16.value",
        "attention_mask_const_uint",
        "sin_cos_cache_token"
      ],
      "const_args": [],
      "out_args": [
        "/model/layers.16/attn/GroupQueryAttention/output_0.out2_16",
        "present.16.key"
      ],
      "attrs": {
        "num_heads": {
          "type": "int",
          "value": [
            "32"
          ]
        },
        "kv_num_heads": {
          "type": "int",
          "value": [
            "32"
          ]
        },
        "scale": {
          "type": "float",
          "value": [
            "0.10206207633018494"
          ]
        },
        "local_window_size": {
          "type": "int",
          "value": [
            "262144"
          ]
        },
        "softcap": {
          "type": "float",
          "value": [
            "0.0"
          ]
        },
        "do_rotary": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "rotary_interleaved": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "input_shape": {
          "type": "int",
          "value": [
            "32",
            "32",
            "1",
            "4096",
            "96"
          ]
        },
        "external_buffers": {
          "type": "int",
          "value": [
            "4",
            "1",
            "0",
            "0",
            "1",
            "0",
            "64",
            "32",
            "2",
            "0",
            "65",
            "33",
            "6",
            "0",
            "66",
            "32"
          ]
        },
        "update_tensor_offsets": {
          "type": "int",
          "value": [
            "4",
            "0",
            "0",
            "192",
            "6",
            "0",
            "0",
            "192"
          ]
        }
      }
    },
    {
      "name": "layers.16.attn.o_proj",
      "type": "MladfMatMul",
      "in_args": [
        "/model/layers.16/attn/GroupQueryAttention/output_0.out2_16"
      ],
      "const_args": [
        "model.layers.16.attn.o_proj.MatMulNBits.qweight.preformat",
        "model.layers.16.attn.o_proj.MatMulNBits.bias.preformat",
        "model.layers.16.attn.o_proj.MatMulNBits.scales.preformat",
        "model.layers.16.attn.o_proj.MatMulNBits.qzeros.preformat"
      ],
      "out_args": [
        "/model/layers.16/attn/o_proj/MatMulNBits/output_0.out5_4_49"
      ],
      "attrs": {
        "accuracy_level": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "bits": {
          "type": "int",
          "value": [
            "4"
          ]
        },
        "block_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "K": {
          "type": "int",
          "value": [
            "3072"
          ]
        },
        "N": {
          "type": "int",
          "value": [
            "3072"
          ]
        },
        "default_shape": {
          "type": "int",
          "value": [
            "1"
          ]
        },
        "op_version": {
          "type": "str",
          "value": [
            "flat"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        }
      }
    },
    {
      "name": "FlatRMSAdd_4_32",
      "type": "FlatRMSAdd",
      "in_args": [
        "/model/layers.16/input_layernorm/output_3.out4_31",
        "/model/layers.16/attn/o_proj/MatMulNBits/output_0.out5_4_49"
      ],
      "const_args": [
        "model.layers.16.post_attention_layernorm.weight.bf"
      ],
      "out_args": [
        "/model/layers.16/post_attention_layernorm/output_3.out4_32",
        "/model/layers.16/post_attention_layernorm/output_0.out4_32"
      ],
      "attrs": {
        "a_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "3072"
          ]
        },
        "in_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "bfloat16"
          ]
        },
        "out_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "bfloat16"
          ]
        },
        "c_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "3072"
          ]
        },
        "b_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "3072"
          ]
        },
        "is_gamma_ifm": {
          "type": "int",
          "value": [
            "1"
          ]
        }
      }
    },
    {
      "name": "FlatMLP_3_16",
      "type": "FlatMLP",
      "in_args": [
        "/model/layers.16/post_attention_layernorm/output_0.out4_32"
      ],
      "const_args": [
        "model.layers.16.mlp.gate_proj.MatMulNBits.qweight",
        "model.layers.16.mlp.gate_proj.MatMulNBits.scales.f",
        "model.layers.16.mlp.gate_proj.MatMulNBits.qzeros",
        "model.layers.16.mlp.gate_proj.MatMulNBits.bias.f",
        "model.layers.16.mlp.up_proj.MatMulNBits.qweight",
        "model.layers.16.mlp.up_proj.MatMulNBits.scales.f",
        "model.layers.16.mlp.up_proj.MatMulNBits.qzeros",
        "model.layers.16.mlp.up_proj.MatMulNBits.bias.f"
      ],
      "out_args": [
        "/model/layers.16/mlp/Mul/output_0.out3_16"
      ],
      "attrs": {
        "input_shape": {
          "type": "int",
          "value": [
            "1",
            "3072",
            "8192"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "in_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "uint8",
            "float",
            "uint8",
            "float",
            "uint8",
            "float",
            "uint8",
            "float"
          ]
        },
        "out_dtypes": {
          "type": "str",
          "value": [
            "bfloat16"
          ]
        }
      }
    },
    {
      "name": "layers.16.mlp.down_proj",
      "type": "MladfMatMul",
      "in_args": [
        "/model/layers.16/mlp/Mul/output_0.out3_16"
      ],
      "const_args": [
        "model.layers.16.mlp.down_proj.MatMulNBits.qweight.preformat",
        "model.layers.16.mlp.down_proj.MatMulNBits.bias.preformat",
        "model.layers.16.mlp.down_proj.MatMulNBits.scales.preformat",
        "model.layers.16.mlp.down_proj.MatMulNBits.qzeros.preformat"
      ],
      "out_args": [
        "/model/layers.16/mlp/down_proj/MatMulNBits/output_0.out5_4_50"
      ],
      "attrs": {
        "accuracy_level": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "bits": {
          "type": "int",
          "value": [
            "4"
          ]
        },
        "block_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "K": {
          "type": "int",
          "value": [
            "8192"
          ]
        },
        "N": {
          "type": "int",
          "value": [
            "3072"
          ]
        },
        "default_shape": {
          "type": "int",
          "value": [
            "1"
          ]
        },
        "op_version": {
          "type": "str",
          "value": [
            "flat"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        }
      }
    },
    {
      "name": "FlatRMSAdd_4_33",
      "type": "FlatRMSAdd",
      "in_args": [
        "/model/layers.16/post_attention_layernorm/output_3.out4_32",
        "/model/layers.16/mlp/down_proj/MatMulNBits/output_0.out5_4_50"
      ],
      "const_args": [
        "model.layers.17.input_layernorm.weight.bf"
      ],
      "out_args": [
        "/model/layers.17/input_layernorm/output_3.out4_33",
        "/model/layers.17/input_layernorm/output_0.out4_33"
      ],
      "attrs": {
        "a_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "3072"
          ]
        },
        "in_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "bfloat16"
          ]
        },
        "out_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "bfloat16"
          ]
        },
        "c_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "3072"
          ]
        },
        "b_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "3072"
          ]
        },
        "is_gamma_ifm": {
          "type": "int",
          "value": [
            "1"
          ]
        }
      }
    },
    {
      "name": "MatMulNBits_2_17",
      "type": "MladfMatMul",
      "in_args": [
        "/model/layers.17/input_layernorm/output_0.out4_33"
      ],
      "const_args": [
        "model.layers.17.attn.qk_proj.MatMulNBits.qweight.preformat",
        "model.layers.17.attn.qk_proj.MatMulNBits.bias.preformat",
        "model.layers.17.attn.qk_proj.MatMulNBits.scales.preformat",
        "model.layers.17.attn.qk_proj.MatMulNBits.qzeros.preformat"
      ],
      "out_args": [
        "/model/layers.17/attn/qk_proj/MatMulNBits/output_0.out5_4_51"
      ],
      "attrs": {
        "accuracy_level": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "bits": {
          "type": "int",
          "value": [
            "4"
          ]
        },
        "block_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "K": {
          "type": "int",
          "value": [
            "3072"
          ]
        },
        "N": {
          "type": "int",
          "value": [
            "6144"
          ]
        },
        "default_shape": {
          "type": "int",
          "value": [
            "1"
          ]
        },
        "op_version": {
          "type": "str",
          "value": [
            "flat"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        }
      }
    },
    {
      "name": "layers.17.attn.v_proj",
      "type": "MladfMatMul",
      "in_args": [
        "/model/layers.17/input_layernorm/output_0.out4_33"
      ],
      "const_args": [
        "model.layers.17.attn.v_proj.MatMulNBits.qweight.preformat",
        "model.layers.17.attn.v_proj.MatMulNBits.bias.preformat",
        "model.layers.17.attn.v_proj.MatMulNBits.scales.preformat",
        "model.layers.17.attn.v_proj.MatMulNBits.qzeros.preformat"
      ],
      "out_args": [
        "present.17.value"
      ],
      "attrs": {
        "accuracy_level": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "bits": {
          "type": "int",
          "value": [
            "4"
          ]
        },
        "block_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "K": {
          "type": "int",
          "value": [
            "3072"
          ]
        },
        "N": {
          "type": "int",
          "value": [
            "3072"
          ]
        },
        "default_shape": {
          "type": "int",
          "value": [
            "1"
          ]
        },
        "op_version": {
          "type": "str",
          "value": [
            "flat"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "total_seq_len": {
          "type": "int",
          "value": [
            "4096"
          ]
        },
        "external_buffers": {
          "type": "int",
          "value": [
            "5",
            "0",
            "71",
            "35"
          ]
        },
        "update_tensor_offsets": {
          "type": "int",
          "value": [
            "5",
            "0",
            "0",
            "192"
          ]
        }
      }
    },
    {
      "name": "/model/layers.17/attn/GroupQueryAttention",
      "type": "FLATMHA",
      "in_args": [
        "/model/layers.17/attn/qk_proj/MatMulNBits/output_0.out5_4_51",
        "past_key_values.17.key",
        "past_key_values.17.value",
        "attention_mask_const_uint",
        "sin_cos_cache_token"
      ],
      "const_args": [],
      "out_args": [
        "/model/layers.17/attn/GroupQueryAttention/output_0.out2_17",
        "present.17.key"
      ],
      "attrs": {
        "num_heads": {
          "type": "int",
          "value": [
            "32"
          ]
        },
        "kv_num_heads": {
          "type": "int",
          "value": [
            "32"
          ]
        },
        "scale": {
          "type": "float",
          "value": [
            "0.10206207633018494"
          ]
        },
        "local_window_size": {
          "type": "int",
          "value": [
            "262144"
          ]
        },
        "softcap": {
          "type": "float",
          "value": [
            "0.0"
          ]
        },
        "do_rotary": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "rotary_interleaved": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "input_shape": {
          "type": "int",
          "value": [
            "32",
            "32",
            "1",
            "4096",
            "96"
          ]
        },
        "external_buffers": {
          "type": "int",
          "value": [
            "4",
            "1",
            "0",
            "0",
            "1",
            "0",
            "68",
            "34",
            "2",
            "0",
            "69",
            "35",
            "6",
            "0",
            "70",
            "34"
          ]
        },
        "update_tensor_offsets": {
          "type": "int",
          "value": [
            "4",
            "0",
            "0",
            "192",
            "6",
            "0",
            "0",
            "192"
          ]
        }
      }
    },
    {
      "name": "layers.17.attn.o_proj",
      "type": "MladfMatMul",
      "in_args": [
        "/model/layers.17/attn/GroupQueryAttention/output_0.out2_17"
      ],
      "const_args": [
        "model.layers.17.attn.o_proj.MatMulNBits.qweight.preformat",
        "model.layers.17.attn.o_proj.MatMulNBits.bias.preformat",
        "model.layers.17.attn.o_proj.MatMulNBits.scales.preformat",
        "model.layers.17.attn.o_proj.MatMulNBits.qzeros.preformat"
      ],
      "out_args": [
        "/model/layers.17/attn/o_proj/MatMulNBits/output_0.out5_4_52"
      ],
      "attrs": {
        "accuracy_level": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "bits": {
          "type": "int",
          "value": [
            "4"
          ]
        },
        "block_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "K": {
          "type": "int",
          "value": [
            "3072"
          ]
        },
        "N": {
          "type": "int",
          "value": [
            "3072"
          ]
        },
        "default_shape": {
          "type": "int",
          "value": [
            "1"
          ]
        },
        "op_version": {
          "type": "str",
          "value": [
            "flat"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        }
      }
    },
    {
      "name": "FlatRMSAdd_4_34",
      "type": "FlatRMSAdd",
      "in_args": [
        "/model/layers.17/input_layernorm/output_3.out4_33",
        "/model/layers.17/attn/o_proj/MatMulNBits/output_0.out5_4_52"
      ],
      "const_args": [
        "model.layers.17.post_attention_layernorm.weight.bf"
      ],
      "out_args": [
        "/model/layers.17/post_attention_layernorm/output_3.out4_34",
        "/model/layers.17/post_attention_layernorm/output_0.out4_34"
      ],
      "attrs": {
        "a_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "3072"
          ]
        },
        "in_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "bfloat16"
          ]
        },
        "out_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "bfloat16"
          ]
        },
        "c_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "3072"
          ]
        },
        "b_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "3072"
          ]
        },
        "is_gamma_ifm": {
          "type": "int",
          "value": [
            "1"
          ]
        }
      }
    },
    {
      "name": "FlatMLP_3_17",
      "type": "FlatMLP",
      "in_args": [
        "/model/layers.17/post_attention_layernorm/output_0.out4_34"
      ],
      "const_args": [
        "model.layers.17.mlp.gate_proj.MatMulNBits.qweight",
        "model.layers.17.mlp.gate_proj.MatMulNBits.scales.f",
        "model.layers.17.mlp.gate_proj.MatMulNBits.qzeros",
        "model.layers.17.mlp.gate_proj.MatMulNBits.bias.f",
        "model.layers.17.mlp.up_proj.MatMulNBits.qweight",
        "model.layers.17.mlp.up_proj.MatMulNBits.scales.f",
        "model.layers.17.mlp.up_proj.MatMulNBits.qzeros",
        "model.layers.17.mlp.up_proj.MatMulNBits.bias.f"
      ],
      "out_args": [
        "/model/layers.17/mlp/Mul/output_0.out3_17"
      ],
      "attrs": {
        "input_shape": {
          "type": "int",
          "value": [
            "1",
            "3072",
            "8192"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "in_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "uint8",
            "float",
            "uint8",
            "float",
            "uint8",
            "float",
            "uint8",
            "float"
          ]
        },
        "out_dtypes": {
          "type": "str",
          "value": [
            "bfloat16"
          ]
        }
      }
    },
    {
      "name": "layers.17.mlp.down_proj",
      "type": "MladfMatMul",
      "in_args": [
        "/model/layers.17/mlp/Mul/output_0.out3_17"
      ],
      "const_args": [
        "model.layers.17.mlp.down_proj.MatMulNBits.qweight.preformat",
        "model.layers.17.mlp.down_proj.MatMulNBits.bias.preformat",
        "model.layers.17.mlp.down_proj.MatMulNBits.scales.preformat",
        "model.layers.17.mlp.down_proj.MatMulNBits.qzeros.preformat"
      ],
      "out_args": [
        "/model/layers.17/mlp/down_proj/MatMulNBits/output_0.out5_4_53"
      ],
      "attrs": {
        "accuracy_level": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "bits": {
          "type": "int",
          "value": [
            "4"
          ]
        },
        "block_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "K": {
          "type": "int",
          "value": [
            "8192"
          ]
        },
        "N": {
          "type": "int",
          "value": [
            "3072"
          ]
        },
        "default_shape": {
          "type": "int",
          "value": [
            "1"
          ]
        },
        "op_version": {
          "type": "str",
          "value": [
            "flat"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        }
      }
    },
    {
      "name": "FlatRMSAdd_4_35",
      "type": "FlatRMSAdd",
      "in_args": [
        "/model/layers.17/post_attention_layernorm/output_3.out4_34",
        "/model/layers.17/mlp/down_proj/MatMulNBits/output_0.out5_4_53"
      ],
      "const_args": [
        "model.layers.18.input_layernorm.weight.bf"
      ],
      "out_args": [
        "/model/layers.18/input_layernorm/output_3.out4_35",
        "/model/layers.18/input_layernorm/output_0.out4_35"
      ],
      "attrs": {
        "a_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "3072"
          ]
        },
        "in_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "bfloat16"
          ]
        },
        "out_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "bfloat16"
          ]
        },
        "c_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "3072"
          ]
        },
        "b_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "3072"
          ]
        },
        "is_gamma_ifm": {
          "type": "int",
          "value": [
            "1"
          ]
        }
      }
    },
    {
      "name": "MatMulNBits_2_18",
      "type": "MladfMatMul",
      "in_args": [
        "/model/layers.18/input_layernorm/output_0.out4_35"
      ],
      "const_args": [
        "model.layers.18.attn.qk_proj.MatMulNBits.qweight.preformat",
        "model.layers.18.attn.qk_proj.MatMulNBits.bias.preformat",
        "model.layers.18.attn.qk_proj.MatMulNBits.scales.preformat",
        "model.layers.18.attn.qk_proj.MatMulNBits.qzeros.preformat"
      ],
      "out_args": [
        "/model/layers.18/attn/qk_proj/MatMulNBits/output_0.out5_4_54"
      ],
      "attrs": {
        "accuracy_level": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "bits": {
          "type": "int",
          "value": [
            "4"
          ]
        },
        "block_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "K": {
          "type": "int",
          "value": [
            "3072"
          ]
        },
        "N": {
          "type": "int",
          "value": [
            "6144"
          ]
        },
        "default_shape": {
          "type": "int",
          "value": [
            "1"
          ]
        },
        "op_version": {
          "type": "str",
          "value": [
            "flat"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        }
      }
    },
    {
      "name": "layers.18.attn.v_proj",
      "type": "MladfMatMul",
      "in_args": [
        "/model/layers.18/input_layernorm/output_0.out4_35"
      ],
      "const_args": [
        "model.layers.18.attn.v_proj.MatMulNBits.qweight.preformat",
        "model.layers.18.attn.v_proj.MatMulNBits.bias.preformat",
        "model.layers.18.attn.v_proj.MatMulNBits.scales.preformat",
        "model.layers.18.attn.v_proj.MatMulNBits.qzeros.preformat"
      ],
      "out_args": [
        "present.18.value"
      ],
      "attrs": {
        "accuracy_level": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "bits": {
          "type": "int",
          "value": [
            "4"
          ]
        },
        "block_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "K": {
          "type": "int",
          "value": [
            "3072"
          ]
        },
        "N": {
          "type": "int",
          "value": [
            "3072"
          ]
        },
        "default_shape": {
          "type": "int",
          "value": [
            "1"
          ]
        },
        "op_version": {
          "type": "str",
          "value": [
            "flat"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "total_seq_len": {
          "type": "int",
          "value": [
            "4096"
          ]
        },
        "external_buffers": {
          "type": "int",
          "value": [
            "5",
            "0",
            "75",
            "37"
          ]
        },
        "update_tensor_offsets": {
          "type": "int",
          "value": [
            "5",
            "0",
            "0",
            "192"
          ]
        }
      }
    },
    {
      "name": "/model/layers.18/attn/GroupQueryAttention",
      "type": "FLATMHA",
      "in_args": [
        "/model/layers.18/attn/qk_proj/MatMulNBits/output_0.out5_4_54",
        "past_key_values.18.key",
        "past_key_values.18.value",
        "attention_mask_const_uint",
        "sin_cos_cache_token"
      ],
      "const_args": [],
      "out_args": [
        "/model/layers.18/attn/GroupQueryAttention/output_0.out2_18",
        "present.18.key"
      ],
      "attrs": {
        "num_heads": {
          "type": "int",
          "value": [
            "32"
          ]
        },
        "kv_num_heads": {
          "type": "int",
          "value": [
            "32"
          ]
        },
        "scale": {
          "type": "float",
          "value": [
            "0.10206207633018494"
          ]
        },
        "local_window_size": {
          "type": "int",
          "value": [
            "262144"
          ]
        },
        "softcap": {
          "type": "float",
          "value": [
            "0.0"
          ]
        },
        "do_rotary": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "rotary_interleaved": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "input_shape": {
          "type": "int",
          "value": [
            "32",
            "32",
            "1",
            "4096",
            "96"
          ]
        },
        "external_buffers": {
          "type": "int",
          "value": [
            "4",
            "1",
            "0",
            "0",
            "1",
            "0",
            "72",
            "36",
            "2",
            "0",
            "73",
            "37",
            "6",
            "0",
            "74",
            "36"
          ]
        },
        "update_tensor_offsets": {
          "type": "int",
          "value": [
            "4",
            "0",
            "0",
            "192",
            "6",
            "0",
            "0",
            "192"
          ]
        }
      }
    },
    {
      "name": "layers.18.attn.o_proj",
      "type": "MladfMatMul",
      "in_args": [
        "/model/layers.18/attn/GroupQueryAttention/output_0.out2_18"
      ],
      "const_args": [
        "model.layers.18.attn.o_proj.MatMulNBits.qweight.preformat",
        "model.layers.18.attn.o_proj.MatMulNBits.bias.preformat",
        "model.layers.18.attn.o_proj.MatMulNBits.scales.preformat",
        "model.layers.18.attn.o_proj.MatMulNBits.qzeros.preformat"
      ],
      "out_args": [
        "/model/layers.18/attn/o_proj/MatMulNBits/output_0.out5_4_55"
      ],
      "attrs": {
        "accuracy_level": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "bits": {
          "type": "int",
          "value": [
            "4"
          ]
        },
        "block_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "K": {
          "type": "int",
          "value": [
            "3072"
          ]
        },
        "N": {
          "type": "int",
          "value": [
            "3072"
          ]
        },
        "default_shape": {
          "type": "int",
          "value": [
            "1"
          ]
        },
        "op_version": {
          "type": "str",
          "value": [
            "flat"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        }
      }
    },
    {
      "name": "FlatRMSAdd_4_36",
      "type": "FlatRMSAdd",
      "in_args": [
        "/model/layers.18/input_layernorm/output_3.out4_35",
        "/model/layers.18/attn/o_proj/MatMulNBits/output_0.out5_4_55"
      ],
      "const_args": [
        "model.layers.18.post_attention_layernorm.weight.bf"
      ],
      "out_args": [
        "/model/layers.18/post_attention_layernorm/output_3.out4_36",
        "/model/layers.18/post_attention_layernorm/output_0.out4_36"
      ],
      "attrs": {
        "a_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "3072"
          ]
        },
        "in_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "bfloat16"
          ]
        },
        "out_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "bfloat16"
          ]
        },
        "c_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "3072"
          ]
        },
        "b_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "3072"
          ]
        },
        "is_gamma_ifm": {
          "type": "int",
          "value": [
            "1"
          ]
        }
      }
    },
    {
      "name": "FlatMLP_3_18",
      "type": "FlatMLP",
      "in_args": [
        "/model/layers.18/post_attention_layernorm/output_0.out4_36"
      ],
      "const_args": [
        "model.layers.18.mlp.gate_proj.MatMulNBits.qweight",
        "model.layers.18.mlp.gate_proj.MatMulNBits.scales.f",
        "model.layers.18.mlp.gate_proj.MatMulNBits.qzeros",
        "model.layers.18.mlp.gate_proj.MatMulNBits.bias.f",
        "model.layers.18.mlp.up_proj.MatMulNBits.qweight",
        "model.layers.18.mlp.up_proj.MatMulNBits.scales.f",
        "model.layers.18.mlp.up_proj.MatMulNBits.qzeros",
        "model.layers.18.mlp.up_proj.MatMulNBits.bias.f"
      ],
      "out_args": [
        "/model/layers.18/mlp/Mul/output_0.out3_18"
      ],
      "attrs": {
        "input_shape": {
          "type": "int",
          "value": [
            "1",
            "3072",
            "8192"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "in_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "uint8",
            "float",
            "uint8",
            "float",
            "uint8",
            "float",
            "uint8",
            "float"
          ]
        },
        "out_dtypes": {
          "type": "str",
          "value": [
            "bfloat16"
          ]
        }
      }
    },
    {
      "name": "layers.18.mlp.down_proj",
      "type": "MladfMatMul",
      "in_args": [
        "/model/layers.18/mlp/Mul/output_0.out3_18"
      ],
      "const_args": [
        "model.layers.18.mlp.down_proj.MatMulNBits.qweight.preformat",
        "model.layers.18.mlp.down_proj.MatMulNBits.bias.preformat",
        "model.layers.18.mlp.down_proj.MatMulNBits.scales.preformat",
        "model.layers.18.mlp.down_proj.MatMulNBits.qzeros.preformat"
      ],
      "out_args": [
        "/model/layers.18/mlp/down_proj/MatMulNBits/output_0.out5_4_56"
      ],
      "attrs": {
        "accuracy_level": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "bits": {
          "type": "int",
          "value": [
            "4"
          ]
        },
        "block_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "K": {
          "type": "int",
          "value": [
            "8192"
          ]
        },
        "N": {
          "type": "int",
          "value": [
            "3072"
          ]
        },
        "default_shape": {
          "type": "int",
          "value": [
            "1"
          ]
        },
        "op_version": {
          "type": "str",
          "value": [
            "flat"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        }
      }
    },
    {
      "name": "FlatRMSAdd_4_37",
      "type": "FlatRMSAdd",
      "in_args": [
        "/model/layers.18/post_attention_layernorm/output_3.out4_36",
        "/model/layers.18/mlp/down_proj/MatMulNBits/output_0.out5_4_56"
      ],
      "const_args": [
        "model.layers.19.input_layernorm.weight.bf"
      ],
      "out_args": [
        "/model/layers.19/input_layernorm/output_3.out4_37",
        "/model/layers.19/input_layernorm/output_0.out4_37"
      ],
      "attrs": {
        "a_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "3072"
          ]
        },
        "in_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "bfloat16"
          ]
        },
        "out_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "bfloat16"
          ]
        },
        "c_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "3072"
          ]
        },
        "b_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "3072"
          ]
        },
        "is_gamma_ifm": {
          "type": "int",
          "value": [
            "1"
          ]
        }
      }
    },
    {
      "name": "MatMulNBits_2_19",
      "type": "MladfMatMul",
      "in_args": [
        "/model/layers.19/input_layernorm/output_0.out4_37"
      ],
      "const_args": [
        "model.layers.19.attn.qk_proj.MatMulNBits.qweight.preformat",
        "model.layers.19.attn.qk_proj.MatMulNBits.bias.preformat",
        "model.layers.19.attn.qk_proj.MatMulNBits.scales.preformat",
        "model.layers.19.attn.qk_proj.MatMulNBits.qzeros.preformat"
      ],
      "out_args": [
        "/model/layers.19/attn/qk_proj/MatMulNBits/output_0.out5_4_57"
      ],
      "attrs": {
        "accuracy_level": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "bits": {
          "type": "int",
          "value": [
            "4"
          ]
        },
        "block_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "K": {
          "type": "int",
          "value": [
            "3072"
          ]
        },
        "N": {
          "type": "int",
          "value": [
            "6144"
          ]
        },
        "default_shape": {
          "type": "int",
          "value": [
            "1"
          ]
        },
        "op_version": {
          "type": "str",
          "value": [
            "flat"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        }
      }
    },
    {
      "name": "layers.19.attn.v_proj",
      "type": "MladfMatMul",
      "in_args": [
        "/model/layers.19/input_layernorm/output_0.out4_37"
      ],
      "const_args": [
        "model.layers.19.attn.v_proj.MatMulNBits.qweight.preformat",
        "model.layers.19.attn.v_proj.MatMulNBits.bias.preformat",
        "model.layers.19.attn.v_proj.MatMulNBits.scales.preformat",
        "model.layers.19.attn.v_proj.MatMulNBits.qzeros.preformat"
      ],
      "out_args": [
        "present.19.value"
      ],
      "attrs": {
        "accuracy_level": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "bits": {
          "type": "int",
          "value": [
            "4"
          ]
        },
        "block_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "K": {
          "type": "int",
          "value": [
            "3072"
          ]
        },
        "N": {
          "type": "int",
          "value": [
            "3072"
          ]
        },
        "default_shape": {
          "type": "int",
          "value": [
            "1"
          ]
        },
        "op_version": {
          "type": "str",
          "value": [
            "flat"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "total_seq_len": {
          "type": "int",
          "value": [
            "4096"
          ]
        },
        "external_buffers": {
          "type": "int",
          "value": [
            "5",
            "0",
            "79",
            "39"
          ]
        },
        "update_tensor_offsets": {
          "type": "int",
          "value": [
            "5",
            "0",
            "0",
            "192"
          ]
        }
      }
    },
    {
      "name": "/model/layers.19/attn/GroupQueryAttention",
      "type": "FLATMHA",
      "in_args": [
        "/model/layers.19/attn/qk_proj/MatMulNBits/output_0.out5_4_57",
        "past_key_values.19.key",
        "past_key_values.19.value",
        "attention_mask_const_uint",
        "sin_cos_cache_token"
      ],
      "const_args": [],
      "out_args": [
        "/model/layers.19/attn/GroupQueryAttention/output_0.out2_19",
        "present.19.key"
      ],
      "attrs": {
        "num_heads": {
          "type": "int",
          "value": [
            "32"
          ]
        },
        "kv_num_heads": {
          "type": "int",
          "value": [
            "32"
          ]
        },
        "scale": {
          "type": "float",
          "value": [
            "0.10206207633018494"
          ]
        },
        "local_window_size": {
          "type": "int",
          "value": [
            "262144"
          ]
        },
        "softcap": {
          "type": "float",
          "value": [
            "0.0"
          ]
        },
        "do_rotary": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "rotary_interleaved": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "input_shape": {
          "type": "int",
          "value": [
            "32",
            "32",
            "1",
            "4096",
            "96"
          ]
        },
        "external_buffers": {
          "type": "int",
          "value": [
            "4",
            "1",
            "0",
            "0",
            "1",
            "0",
            "76",
            "38",
            "2",
            "0",
            "77",
            "39",
            "6",
            "0",
            "78",
            "38"
          ]
        },
        "update_tensor_offsets": {
          "type": "int",
          "value": [
            "4",
            "0",
            "0",
            "192",
            "6",
            "0",
            "0",
            "192"
          ]
        }
      }
    },
    {
      "name": "layers.19.attn.o_proj",
      "type": "MladfMatMul",
      "in_args": [
        "/model/layers.19/attn/GroupQueryAttention/output_0.out2_19"
      ],
      "const_args": [
        "model.layers.19.attn.o_proj.MatMulNBits.qweight.preformat",
        "model.layers.19.attn.o_proj.MatMulNBits.bias.preformat",
        "model.layers.19.attn.o_proj.MatMulNBits.scales.preformat",
        "model.layers.19.attn.o_proj.MatMulNBits.qzeros.preformat"
      ],
      "out_args": [
        "/model/layers.19/attn/o_proj/MatMulNBits/output_0.out5_4_58"
      ],
      "attrs": {
        "accuracy_level": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "bits": {
          "type": "int",
          "value": [
            "4"
          ]
        },
        "block_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "K": {
          "type": "int",
          "value": [
            "3072"
          ]
        },
        "N": {
          "type": "int",
          "value": [
            "3072"
          ]
        },
        "default_shape": {
          "type": "int",
          "value": [
            "1"
          ]
        },
        "op_version": {
          "type": "str",
          "value": [
            "flat"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        }
      }
    },
    {
      "name": "FlatRMSAdd_4_38",
      "type": "FlatRMSAdd",
      "in_args": [
        "/model/layers.19/input_layernorm/output_3.out4_37",
        "/model/layers.19/attn/o_proj/MatMulNBits/output_0.out5_4_58"
      ],
      "const_args": [
        "model.layers.19.post_attention_layernorm.weight.bf"
      ],
      "out_args": [
        "/model/layers.19/post_attention_layernorm/output_3.out4_38",
        "/model/layers.19/post_attention_layernorm/output_0.out4_38"
      ],
      "attrs": {
        "a_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "3072"
          ]
        },
        "in_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "bfloat16"
          ]
        },
        "out_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "bfloat16"
          ]
        },
        "c_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "3072"
          ]
        },
        "b_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "3072"
          ]
        },
        "is_gamma_ifm": {
          "type": "int",
          "value": [
            "1"
          ]
        }
      }
    },
    {
      "name": "FlatMLP_3_19",
      "type": "FlatMLP",
      "in_args": [
        "/model/layers.19/post_attention_layernorm/output_0.out4_38"
      ],
      "const_args": [
        "model.layers.19.mlp.gate_proj.MatMulNBits.qweight",
        "model.layers.19.mlp.gate_proj.MatMulNBits.scales.f",
        "model.layers.19.mlp.gate_proj.MatMulNBits.qzeros",
        "model.layers.19.mlp.gate_proj.MatMulNBits.bias.f",
        "model.layers.19.mlp.up_proj.MatMulNBits.qweight",
        "model.layers.19.mlp.up_proj.MatMulNBits.scales.f",
        "model.layers.19.mlp.up_proj.MatMulNBits.qzeros",
        "model.layers.19.mlp.up_proj.MatMulNBits.bias.f"
      ],
      "out_args": [
        "/model/layers.19/mlp/Mul/output_0.out3_19"
      ],
      "attrs": {
        "input_shape": {
          "type": "int",
          "value": [
            "1",
            "3072",
            "8192"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "in_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "uint8",
            "float",
            "uint8",
            "float",
            "uint8",
            "float",
            "uint8",
            "float"
          ]
        },
        "out_dtypes": {
          "type": "str",
          "value": [
            "bfloat16"
          ]
        }
      }
    },
    {
      "name": "layers.19.mlp.down_proj",
      "type": "MladfMatMul",
      "in_args": [
        "/model/layers.19/mlp/Mul/output_0.out3_19"
      ],
      "const_args": [
        "model.layers.19.mlp.down_proj.MatMulNBits.qweight.preformat",
        "model.layers.19.mlp.down_proj.MatMulNBits.bias.preformat",
        "model.layers.19.mlp.down_proj.MatMulNBits.scales.preformat",
        "model.layers.19.mlp.down_proj.MatMulNBits.qzeros.preformat"
      ],
      "out_args": [
        "/model/layers.19/mlp/down_proj/MatMulNBits/output_0.out5_4_59"
      ],
      "attrs": {
        "accuracy_level": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "bits": {
          "type": "int",
          "value": [
            "4"
          ]
        },
        "block_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "K": {
          "type": "int",
          "value": [
            "8192"
          ]
        },
        "N": {
          "type": "int",
          "value": [
            "3072"
          ]
        },
        "default_shape": {
          "type": "int",
          "value": [
            "1"
          ]
        },
        "op_version": {
          "type": "str",
          "value": [
            "flat"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        }
      }
    },
    {
      "name": "FlatRMSAdd_4_39",
      "type": "FlatRMSAdd",
      "in_args": [
        "/model/layers.19/post_attention_layernorm/output_3.out4_38",
        "/model/layers.19/mlp/down_proj/MatMulNBits/output_0.out5_4_59"
      ],
      "const_args": [
        "model.layers.20.input_layernorm.weight.bf"
      ],
      "out_args": [
        "/model/layers.20/input_layernorm/output_3.out4_39",
        "/model/layers.20/input_layernorm/output_0.out4_39"
      ],
      "attrs": {
        "a_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "3072"
          ]
        },
        "in_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "bfloat16"
          ]
        },
        "out_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "bfloat16"
          ]
        },
        "c_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "3072"
          ]
        },
        "b_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "3072"
          ]
        },
        "is_gamma_ifm": {
          "type": "int",
          "value": [
            "1"
          ]
        }
      }
    },
    {
      "name": "MatMulNBits_2_20",
      "type": "MladfMatMul",
      "in_args": [
        "/model/layers.20/input_layernorm/output_0.out4_39"
      ],
      "const_args": [
        "model.layers.20.attn.qk_proj.MatMulNBits.qweight.preformat",
        "model.layers.20.attn.qk_proj.MatMulNBits.bias.preformat",
        "model.layers.20.attn.qk_proj.MatMulNBits.scales.preformat",
        "model.layers.20.attn.qk_proj.MatMulNBits.qzeros.preformat"
      ],
      "out_args": [
        "/model/layers.20/attn/qk_proj/MatMulNBits/output_0.out5_4_60"
      ],
      "attrs": {
        "accuracy_level": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "bits": {
          "type": "int",
          "value": [
            "4"
          ]
        },
        "block_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "K": {
          "type": "int",
          "value": [
            "3072"
          ]
        },
        "N": {
          "type": "int",
          "value": [
            "6144"
          ]
        },
        "default_shape": {
          "type": "int",
          "value": [
            "1"
          ]
        },
        "op_version": {
          "type": "str",
          "value": [
            "flat"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        }
      }
    },
    {
      "name": "layers.20.attn.v_proj",
      "type": "MladfMatMul",
      "in_args": [
        "/model/layers.20/input_layernorm/output_0.out4_39"
      ],
      "const_args": [
        "model.layers.20.attn.v_proj.MatMulNBits.qweight.preformat",
        "model.layers.20.attn.v_proj.MatMulNBits.bias.preformat",
        "model.layers.20.attn.v_proj.MatMulNBits.scales.preformat",
        "model.layers.20.attn.v_proj.MatMulNBits.qzeros.preformat"
      ],
      "out_args": [
        "present.20.value"
      ],
      "attrs": {
        "accuracy_level": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "bits": {
          "type": "int",
          "value": [
            "4"
          ]
        },
        "block_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "K": {
          "type": "int",
          "value": [
            "3072"
          ]
        },
        "N": {
          "type": "int",
          "value": [
            "3072"
          ]
        },
        "default_shape": {
          "type": "int",
          "value": [
            "1"
          ]
        },
        "op_version": {
          "type": "str",
          "value": [
            "flat"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "total_seq_len": {
          "type": "int",
          "value": [
            "4096"
          ]
        },
        "external_buffers": {
          "type": "int",
          "value": [
            "5",
            "0",
            "83",
            "41"
          ]
        },
        "update_tensor_offsets": {
          "type": "int",
          "value": [
            "5",
            "0",
            "0",
            "192"
          ]
        }
      }
    },
    {
      "name": "/model/layers.20/attn/GroupQueryAttention",
      "type": "FLATMHA",
      "in_args": [
        "/model/layers.20/attn/qk_proj/MatMulNBits/output_0.out5_4_60",
        "past_key_values.20.key",
        "past_key_values.20.value",
        "attention_mask_const_uint",
        "sin_cos_cache_token"
      ],
      "const_args": [],
      "out_args": [
        "/model/layers.20/attn/GroupQueryAttention/output_0.out2_20",
        "present.20.key"
      ],
      "attrs": {
        "num_heads": {
          "type": "int",
          "value": [
            "32"
          ]
        },
        "kv_num_heads": {
          "type": "int",
          "value": [
            "32"
          ]
        },
        "scale": {
          "type": "float",
          "value": [
            "0.10206207633018494"
          ]
        },
        "local_window_size": {
          "type": "int",
          "value": [
            "262144"
          ]
        },
        "softcap": {
          "type": "float",
          "value": [
            "0.0"
          ]
        },
        "do_rotary": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "rotary_interleaved": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "input_shape": {
          "type": "int",
          "value": [
            "32",
            "32",
            "1",
            "4096",
            "96"
          ]
        },
        "external_buffers": {
          "type": "int",
          "value": [
            "4",
            "1",
            "0",
            "0",
            "1",
            "0",
            "80",
            "40",
            "2",
            "0",
            "81",
            "41",
            "6",
            "0",
            "82",
            "40"
          ]
        },
        "update_tensor_offsets": {
          "type": "int",
          "value": [
            "4",
            "0",
            "0",
            "192",
            "6",
            "0",
            "0",
            "192"
          ]
        }
      }
    },
    {
      "name": "layers.20.attn.o_proj",
      "type": "MladfMatMul",
      "in_args": [
        "/model/layers.20/attn/GroupQueryAttention/output_0.out2_20"
      ],
      "const_args": [
        "model.layers.20.attn.o_proj.MatMulNBits.qweight.preformat",
        "model.layers.20.attn.o_proj.MatMulNBits.bias.preformat",
        "model.layers.20.attn.o_proj.MatMulNBits.scales.preformat",
        "model.layers.20.attn.o_proj.MatMulNBits.qzeros.preformat"
      ],
      "out_args": [
        "/model/layers.20/attn/o_proj/MatMulNBits/output_0.out5_4_61"
      ],
      "attrs": {
        "accuracy_level": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "bits": {
          "type": "int",
          "value": [
            "4"
          ]
        },
        "block_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "K": {
          "type": "int",
          "value": [
            "3072"
          ]
        },
        "N": {
          "type": "int",
          "value": [
            "3072"
          ]
        },
        "default_shape": {
          "type": "int",
          "value": [
            "1"
          ]
        },
        "op_version": {
          "type": "str",
          "value": [
            "flat"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        }
      }
    },
    {
      "name": "FlatRMSAdd_4_40",
      "type": "FlatRMSAdd",
      "in_args": [
        "/model/layers.20/input_layernorm/output_3.out4_39",
        "/model/layers.20/attn/o_proj/MatMulNBits/output_0.out5_4_61"
      ],
      "const_args": [
        "model.layers.20.post_attention_layernorm.weight.bf"
      ],
      "out_args": [
        "/model/layers.20/post_attention_layernorm/output_3.out4_40",
        "/model/layers.20/post_attention_layernorm/output_0.out4_40"
      ],
      "attrs": {
        "a_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "3072"
          ]
        },
        "in_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "bfloat16"
          ]
        },
        "out_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "bfloat16"
          ]
        },
        "c_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "3072"
          ]
        },
        "b_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "3072"
          ]
        },
        "is_gamma_ifm": {
          "type": "int",
          "value": [
            "1"
          ]
        }
      }
    },
    {
      "name": "FlatMLP_3_20",
      "type": "FlatMLP",
      "in_args": [
        "/model/layers.20/post_attention_layernorm/output_0.out4_40"
      ],
      "const_args": [
        "model.layers.20.mlp.gate_proj.MatMulNBits.qweight",
        "model.layers.20.mlp.gate_proj.MatMulNBits.scales.f",
        "model.layers.20.mlp.gate_proj.MatMulNBits.qzeros",
        "model.layers.20.mlp.gate_proj.MatMulNBits.bias.f",
        "model.layers.20.mlp.up_proj.MatMulNBits.qweight",
        "model.layers.20.mlp.up_proj.MatMulNBits.scales.f",
        "model.layers.20.mlp.up_proj.MatMulNBits.qzeros",
        "model.layers.20.mlp.up_proj.MatMulNBits.bias.f"
      ],
      "out_args": [
        "/model/layers.20/mlp/Mul/output_0.out3_20"
      ],
      "attrs": {
        "input_shape": {
          "type": "int",
          "value": [
            "1",
            "3072",
            "8192"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "in_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "uint8",
            "float",
            "uint8",
            "float",
            "uint8",
            "float",
            "uint8",
            "float"
          ]
        },
        "out_dtypes": {
          "type": "str",
          "value": [
            "bfloat16"
          ]
        }
      }
    },
    {
      "name": "layers.20.mlp.down_proj",
      "type": "MladfMatMul",
      "in_args": [
        "/model/layers.20/mlp/Mul/output_0.out3_20"
      ],
      "const_args": [
        "model.layers.20.mlp.down_proj.MatMulNBits.qweight.preformat",
        "model.layers.20.mlp.down_proj.MatMulNBits.bias.preformat",
        "model.layers.20.mlp.down_proj.MatMulNBits.scales.preformat",
        "model.layers.20.mlp.down_proj.MatMulNBits.qzeros.preformat"
      ],
      "out_args": [
        "/model/layers.20/mlp/down_proj/MatMulNBits/output_0.out5_4_62"
      ],
      "attrs": {
        "accuracy_level": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "bits": {
          "type": "int",
          "value": [
            "4"
          ]
        },
        "block_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "K": {
          "type": "int",
          "value": [
            "8192"
          ]
        },
        "N": {
          "type": "int",
          "value": [
            "3072"
          ]
        },
        "default_shape": {
          "type": "int",
          "value": [
            "1"
          ]
        },
        "op_version": {
          "type": "str",
          "value": [
            "flat"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        }
      }
    },
    {
      "name": "FlatRMSAdd_4_41",
      "type": "FlatRMSAdd",
      "in_args": [
        "/model/layers.20/post_attention_layernorm/output_3.out4_40",
        "/model/layers.20/mlp/down_proj/MatMulNBits/output_0.out5_4_62"
      ],
      "const_args": [
        "model.layers.21.input_layernorm.weight.bf"
      ],
      "out_args": [
        "/model/layers.21/input_layernorm/output_3.out4_41",
        "/model/layers.21/input_layernorm/output_0.out4_41"
      ],
      "attrs": {
        "a_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "3072"
          ]
        },
        "in_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "bfloat16"
          ]
        },
        "out_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "bfloat16"
          ]
        },
        "c_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "3072"
          ]
        },
        "b_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "3072"
          ]
        },
        "is_gamma_ifm": {
          "type": "int",
          "value": [
            "1"
          ]
        }
      }
    },
    {
      "name": "MatMulNBits_2_21",
      "type": "MladfMatMul",
      "in_args": [
        "/model/layers.21/input_layernorm/output_0.out4_41"
      ],
      "const_args": [
        "model.layers.21.attn.qk_proj.MatMulNBits.qweight.preformat",
        "model.layers.21.attn.qk_proj.MatMulNBits.bias.preformat",
        "model.layers.21.attn.qk_proj.MatMulNBits.scales.preformat",
        "model.layers.21.attn.qk_proj.MatMulNBits.qzeros.preformat"
      ],
      "out_args": [
        "/model/layers.21/attn/qk_proj/MatMulNBits/output_0.out5_4_63"
      ],
      "attrs": {
        "accuracy_level": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "bits": {
          "type": "int",
          "value": [
            "4"
          ]
        },
        "block_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "K": {
          "type": "int",
          "value": [
            "3072"
          ]
        },
        "N": {
          "type": "int",
          "value": [
            "6144"
          ]
        },
        "default_shape": {
          "type": "int",
          "value": [
            "1"
          ]
        },
        "op_version": {
          "type": "str",
          "value": [
            "flat"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        }
      }
    },
    {
      "name": "layers.21.attn.v_proj",
      "type": "MladfMatMul",
      "in_args": [
        "/model/layers.21/input_layernorm/output_0.out4_41"
      ],
      "const_args": [
        "model.layers.21.attn.v_proj.MatMulNBits.qweight.preformat",
        "model.layers.21.attn.v_proj.MatMulNBits.bias.preformat",
        "model.layers.21.attn.v_proj.MatMulNBits.scales.preformat",
        "model.layers.21.attn.v_proj.MatMulNBits.qzeros.preformat"
      ],
      "out_args": [
        "present.21.value"
      ],
      "attrs": {
        "accuracy_level": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "bits": {
          "type": "int",
          "value": [
            "4"
          ]
        },
        "block_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "K": {
          "type": "int",
          "value": [
            "3072"
          ]
        },
        "N": {
          "type": "int",
          "value": [
            "3072"
          ]
        },
        "default_shape": {
          "type": "int",
          "value": [
            "1"
          ]
        },
        "op_version": {
          "type": "str",
          "value": [
            "flat"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "total_seq_len": {
          "type": "int",
          "value": [
            "4096"
          ]
        },
        "external_buffers": {
          "type": "int",
          "value": [
            "5",
            "0",
            "87",
            "43"
          ]
        },
        "update_tensor_offsets": {
          "type": "int",
          "value": [
            "5",
            "0",
            "0",
            "192"
          ]
        }
      }
    },
    {
      "name": "/model/layers.21/attn/GroupQueryAttention",
      "type": "FLATMHA",
      "in_args": [
        "/model/layers.21/attn/qk_proj/MatMulNBits/output_0.out5_4_63",
        "past_key_values.21.key",
        "past_key_values.21.value",
        "attention_mask_const_uint",
        "sin_cos_cache_token"
      ],
      "const_args": [],
      "out_args": [
        "/model/layers.21/attn/GroupQueryAttention/output_0.out2_21",
        "present.21.key"
      ],
      "attrs": {
        "num_heads": {
          "type": "int",
          "value": [
            "32"
          ]
        },
        "kv_num_heads": {
          "type": "int",
          "value": [
            "32"
          ]
        },
        "scale": {
          "type": "float",
          "value": [
            "0.10206207633018494"
          ]
        },
        "local_window_size": {
          "type": "int",
          "value": [
            "262144"
          ]
        },
        "softcap": {
          "type": "float",
          "value": [
            "0.0"
          ]
        },
        "do_rotary": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "rotary_interleaved": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "input_shape": {
          "type": "int",
          "value": [
            "32",
            "32",
            "1",
            "4096",
            "96"
          ]
        },
        "external_buffers": {
          "type": "int",
          "value": [
            "4",
            "1",
            "0",
            "0",
            "1",
            "0",
            "84",
            "42",
            "2",
            "0",
            "85",
            "43",
            "6",
            "0",
            "86",
            "42"
          ]
        },
        "update_tensor_offsets": {
          "type": "int",
          "value": [
            "4",
            "0",
            "0",
            "192",
            "6",
            "0",
            "0",
            "192"
          ]
        }
      }
    },
    {
      "name": "layers.21.attn.o_proj",
      "type": "MladfMatMul",
      "in_args": [
        "/model/layers.21/attn/GroupQueryAttention/output_0.out2_21"
      ],
      "const_args": [
        "model.layers.21.attn.o_proj.MatMulNBits.qweight.preformat",
        "model.layers.21.attn.o_proj.MatMulNBits.bias.preformat",
        "model.layers.21.attn.o_proj.MatMulNBits.scales.preformat",
        "model.layers.21.attn.o_proj.MatMulNBits.qzeros.preformat"
      ],
      "out_args": [
        "/model/layers.21/attn/o_proj/MatMulNBits/output_0.out5_4_64"
      ],
      "attrs": {
        "accuracy_level": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "bits": {
          "type": "int",
          "value": [
            "4"
          ]
        },
        "block_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "K": {
          "type": "int",
          "value": [
            "3072"
          ]
        },
        "N": {
          "type": "int",
          "value": [
            "3072"
          ]
        },
        "default_shape": {
          "type": "int",
          "value": [
            "1"
          ]
        },
        "op_version": {
          "type": "str",
          "value": [
            "flat"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        }
      }
    },
    {
      "name": "FlatRMSAdd_4_42",
      "type": "FlatRMSAdd",
      "in_args": [
        "/model/layers.21/input_layernorm/output_3.out4_41",
        "/model/layers.21/attn/o_proj/MatMulNBits/output_0.out5_4_64"
      ],
      "const_args": [
        "model.layers.21.post_attention_layernorm.weight.bf"
      ],
      "out_args": [
        "/model/layers.21/post_attention_layernorm/output_3.out4_42",
        "/model/layers.21/post_attention_layernorm/output_0.out4_42"
      ],
      "attrs": {
        "a_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "3072"
          ]
        },
        "in_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "bfloat16"
          ]
        },
        "out_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "bfloat16"
          ]
        },
        "c_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "3072"
          ]
        },
        "b_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "3072"
          ]
        },
        "is_gamma_ifm": {
          "type": "int",
          "value": [
            "1"
          ]
        }
      }
    },
    {
      "name": "FlatMLP_3_21",
      "type": "FlatMLP",
      "in_args": [
        "/model/layers.21/post_attention_layernorm/output_0.out4_42"
      ],
      "const_args": [
        "model.layers.21.mlp.gate_proj.MatMulNBits.qweight",
        "model.layers.21.mlp.gate_proj.MatMulNBits.scales.f",
        "model.layers.21.mlp.gate_proj.MatMulNBits.qzeros",
        "model.layers.21.mlp.gate_proj.MatMulNBits.bias.f",
        "model.layers.21.mlp.up_proj.MatMulNBits.qweight",
        "model.layers.21.mlp.up_proj.MatMulNBits.scales.f",
        "model.layers.21.mlp.up_proj.MatMulNBits.qzeros",
        "model.layers.21.mlp.up_proj.MatMulNBits.bias.f"
      ],
      "out_args": [
        "/model/layers.21/mlp/Mul/output_0.out3_21"
      ],
      "attrs": {
        "input_shape": {
          "type": "int",
          "value": [
            "1",
            "3072",
            "8192"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "in_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "uint8",
            "float",
            "uint8",
            "float",
            "uint8",
            "float",
            "uint8",
            "float"
          ]
        },
        "out_dtypes": {
          "type": "str",
          "value": [
            "bfloat16"
          ]
        }
      }
    },
    {
      "name": "layers.21.mlp.down_proj",
      "type": "MladfMatMul",
      "in_args": [
        "/model/layers.21/mlp/Mul/output_0.out3_21"
      ],
      "const_args": [
        "model.layers.21.mlp.down_proj.MatMulNBits.qweight.preformat",
        "model.layers.21.mlp.down_proj.MatMulNBits.bias.preformat",
        "model.layers.21.mlp.down_proj.MatMulNBits.scales.preformat",
        "model.layers.21.mlp.down_proj.MatMulNBits.qzeros.preformat"
      ],
      "out_args": [
        "/model/layers.21/mlp/down_proj/MatMulNBits/output_0.out5_4_65"
      ],
      "attrs": {
        "accuracy_level": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "bits": {
          "type": "int",
          "value": [
            "4"
          ]
        },
        "block_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "K": {
          "type": "int",
          "value": [
            "8192"
          ]
        },
        "N": {
          "type": "int",
          "value": [
            "3072"
          ]
        },
        "default_shape": {
          "type": "int",
          "value": [
            "1"
          ]
        },
        "op_version": {
          "type": "str",
          "value": [
            "flat"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        }
      }
    },
    {
      "name": "FlatRMSAdd_4_43",
      "type": "FlatRMSAdd",
      "in_args": [
        "/model/layers.21/post_attention_layernorm/output_3.out4_42",
        "/model/layers.21/mlp/down_proj/MatMulNBits/output_0.out5_4_65"
      ],
      "const_args": [
        "model.layers.22.input_layernorm.weight.bf"
      ],
      "out_args": [
        "/model/layers.22/input_layernorm/output_3.out4_43",
        "/model/layers.22/input_layernorm/output_0.out4_43"
      ],
      "attrs": {
        "a_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "3072"
          ]
        },
        "in_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "bfloat16"
          ]
        },
        "out_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "bfloat16"
          ]
        },
        "c_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "3072"
          ]
        },
        "b_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "3072"
          ]
        },
        "is_gamma_ifm": {
          "type": "int",
          "value": [
            "1"
          ]
        }
      }
    },
    {
      "name": "MatMulNBits_2_22",
      "type": "MladfMatMul",
      "in_args": [
        "/model/layers.22/input_layernorm/output_0.out4_43"
      ],
      "const_args": [
        "model.layers.22.attn.qk_proj.MatMulNBits.qweight.preformat",
        "model.layers.22.attn.qk_proj.MatMulNBits.bias.preformat",
        "model.layers.22.attn.qk_proj.MatMulNBits.scales.preformat",
        "model.layers.22.attn.qk_proj.MatMulNBits.qzeros.preformat"
      ],
      "out_args": [
        "/model/layers.22/attn/qk_proj/MatMulNBits/output_0.out5_4_66"
      ],
      "attrs": {
        "accuracy_level": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "bits": {
          "type": "int",
          "value": [
            "4"
          ]
        },
        "block_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "K": {
          "type": "int",
          "value": [
            "3072"
          ]
        },
        "N": {
          "type": "int",
          "value": [
            "6144"
          ]
        },
        "default_shape": {
          "type": "int",
          "value": [
            "1"
          ]
        },
        "op_version": {
          "type": "str",
          "value": [
            "flat"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        }
      }
    },
    {
      "name": "layers.22.attn.v_proj",
      "type": "MladfMatMul",
      "in_args": [
        "/model/layers.22/input_layernorm/output_0.out4_43"
      ],
      "const_args": [
        "model.layers.22.attn.v_proj.MatMulNBits.qweight.preformat",
        "model.layers.22.attn.v_proj.MatMulNBits.bias.preformat",
        "model.layers.22.attn.v_proj.MatMulNBits.scales.preformat",
        "model.layers.22.attn.v_proj.MatMulNBits.qzeros.preformat"
      ],
      "out_args": [
        "present.22.value"
      ],
      "attrs": {
        "accuracy_level": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "bits": {
          "type": "int",
          "value": [
            "4"
          ]
        },
        "block_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "K": {
          "type": "int",
          "value": [
            "3072"
          ]
        },
        "N": {
          "type": "int",
          "value": [
            "3072"
          ]
        },
        "default_shape": {
          "type": "int",
          "value": [
            "1"
          ]
        },
        "op_version": {
          "type": "str",
          "value": [
            "flat"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "total_seq_len": {
          "type": "int",
          "value": [
            "4096"
          ]
        },
        "external_buffers": {
          "type": "int",
          "value": [
            "5",
            "0",
            "91",
            "45"
          ]
        },
        "update_tensor_offsets": {
          "type": "int",
          "value": [
            "5",
            "0",
            "0",
            "192"
          ]
        }
      }
    },
    {
      "name": "/model/layers.22/attn/GroupQueryAttention",
      "type": "FLATMHA",
      "in_args": [
        "/model/layers.22/attn/qk_proj/MatMulNBits/output_0.out5_4_66",
        "past_key_values.22.key",
        "past_key_values.22.value",
        "attention_mask_const_uint",
        "sin_cos_cache_token"
      ],
      "const_args": [],
      "out_args": [
        "/model/layers.22/attn/GroupQueryAttention/output_0.out2_22",
        "present.22.key"
      ],
      "attrs": {
        "num_heads": {
          "type": "int",
          "value": [
            "32"
          ]
        },
        "kv_num_heads": {
          "type": "int",
          "value": [
            "32"
          ]
        },
        "scale": {
          "type": "float",
          "value": [
            "0.10206207633018494"
          ]
        },
        "local_window_size": {
          "type": "int",
          "value": [
            "262144"
          ]
        },
        "softcap": {
          "type": "float",
          "value": [
            "0.0"
          ]
        },
        "do_rotary": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "rotary_interleaved": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "input_shape": {
          "type": "int",
          "value": [
            "32",
            "32",
            "1",
            "4096",
            "96"
          ]
        },
        "external_buffers": {
          "type": "int",
          "value": [
            "4",
            "1",
            "0",
            "0",
            "1",
            "0",
            "88",
            "44",
            "2",
            "0",
            "89",
            "45",
            "6",
            "0",
            "90",
            "44"
          ]
        },
        "update_tensor_offsets": {
          "type": "int",
          "value": [
            "4",
            "0",
            "0",
            "192",
            "6",
            "0",
            "0",
            "192"
          ]
        }
      }
    },
    {
      "name": "layers.22.attn.o_proj",
      "type": "MladfMatMul",
      "in_args": [
        "/model/layers.22/attn/GroupQueryAttention/output_0.out2_22"
      ],
      "const_args": [
        "model.layers.22.attn.o_proj.MatMulNBits.qweight.preformat",
        "model.layers.22.attn.o_proj.MatMulNBits.bias.preformat",
        "model.layers.22.attn.o_proj.MatMulNBits.scales.preformat",
        "model.layers.22.attn.o_proj.MatMulNBits.qzeros.preformat"
      ],
      "out_args": [
        "/model/layers.22/attn/o_proj/MatMulNBits/output_0.out5_4_67"
      ],
      "attrs": {
        "accuracy_level": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "bits": {
          "type": "int",
          "value": [
            "4"
          ]
        },
        "block_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "K": {
          "type": "int",
          "value": [
            "3072"
          ]
        },
        "N": {
          "type": "int",
          "value": [
            "3072"
          ]
        },
        "default_shape": {
          "type": "int",
          "value": [
            "1"
          ]
        },
        "op_version": {
          "type": "str",
          "value": [
            "flat"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        }
      }
    },
    {
      "name": "FlatRMSAdd_4_44",
      "type": "FlatRMSAdd",
      "in_args": [
        "/model/layers.22/input_layernorm/output_3.out4_43",
        "/model/layers.22/attn/o_proj/MatMulNBits/output_0.out5_4_67"
      ],
      "const_args": [
        "model.layers.22.post_attention_layernorm.weight.bf"
      ],
      "out_args": [
        "/model/layers.22/post_attention_layernorm/output_3.out4_44",
        "/model/layers.22/post_attention_layernorm/output_0.out4_44"
      ],
      "attrs": {
        "a_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "3072"
          ]
        },
        "in_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "bfloat16"
          ]
        },
        "out_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "bfloat16"
          ]
        },
        "c_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "3072"
          ]
        },
        "b_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "3072"
          ]
        },
        "is_gamma_ifm": {
          "type": "int",
          "value": [
            "1"
          ]
        }
      }
    },
    {
      "name": "FlatMLP_3_22",
      "type": "FlatMLP",
      "in_args": [
        "/model/layers.22/post_attention_layernorm/output_0.out4_44"
      ],
      "const_args": [
        "model.layers.22.mlp.gate_proj.MatMulNBits.qweight",
        "model.layers.22.mlp.gate_proj.MatMulNBits.scales.f",
        "model.layers.22.mlp.gate_proj.MatMulNBits.qzeros",
        "model.layers.22.mlp.gate_proj.MatMulNBits.bias.f",
        "model.layers.22.mlp.up_proj.MatMulNBits.qweight",
        "model.layers.22.mlp.up_proj.MatMulNBits.scales.f",
        "model.layers.22.mlp.up_proj.MatMulNBits.qzeros",
        "model.layers.22.mlp.up_proj.MatMulNBits.bias.f"
      ],
      "out_args": [
        "/model/layers.22/mlp/Mul/output_0.out3_22"
      ],
      "attrs": {
        "input_shape": {
          "type": "int",
          "value": [
            "1",
            "3072",
            "8192"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "in_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "uint8",
            "float",
            "uint8",
            "float",
            "uint8",
            "float",
            "uint8",
            "float"
          ]
        },
        "out_dtypes": {
          "type": "str",
          "value": [
            "bfloat16"
          ]
        }
      }
    },
    {
      "name": "layers.22.mlp.down_proj",
      "type": "MladfMatMul",
      "in_args": [
        "/model/layers.22/mlp/Mul/output_0.out3_22"
      ],
      "const_args": [
        "model.layers.22.mlp.down_proj.MatMulNBits.qweight.preformat",
        "model.layers.22.mlp.down_proj.MatMulNBits.bias.preformat",
        "model.layers.22.mlp.down_proj.MatMulNBits.scales.preformat",
        "model.layers.22.mlp.down_proj.MatMulNBits.qzeros.preformat"
      ],
      "out_args": [
        "/model/layers.22/mlp/down_proj/MatMulNBits/output_0.out5_4_68"
      ],
      "attrs": {
        "accuracy_level": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "bits": {
          "type": "int",
          "value": [
            "4"
          ]
        },
        "block_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "K": {
          "type": "int",
          "value": [
            "8192"
          ]
        },
        "N": {
          "type": "int",
          "value": [
            "3072"
          ]
        },
        "default_shape": {
          "type": "int",
          "value": [
            "1"
          ]
        },
        "op_version": {
          "type": "str",
          "value": [
            "flat"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        }
      }
    },
    {
      "name": "FlatRMSAdd_4_45",
      "type": "FlatRMSAdd",
      "in_args": [
        "/model/layers.22/post_attention_layernorm/output_3.out4_44",
        "/model/layers.22/mlp/down_proj/MatMulNBits/output_0.out5_4_68"
      ],
      "const_args": [
        "model.layers.23.input_layernorm.weight.bf"
      ],
      "out_args": [
        "/model/layers.23/input_layernorm/output_3.out4_45",
        "/model/layers.23/input_layernorm/output_0.out4_45"
      ],
      "attrs": {
        "a_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "3072"
          ]
        },
        "in_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "bfloat16"
          ]
        },
        "out_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "bfloat16"
          ]
        },
        "c_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "3072"
          ]
        },
        "b_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "3072"
          ]
        },
        "is_gamma_ifm": {
          "type": "int",
          "value": [
            "1"
          ]
        }
      }
    },
    {
      "name": "MatMulNBits_2_23",
      "type": "MladfMatMul",
      "in_args": [
        "/model/layers.23/input_layernorm/output_0.out4_45"
      ],
      "const_args": [
        "model.layers.23.attn.qk_proj.MatMulNBits.qweight.preformat",
        "model.layers.23.attn.qk_proj.MatMulNBits.bias.preformat",
        "model.layers.23.attn.qk_proj.MatMulNBits.scales.preformat",
        "model.layers.23.attn.qk_proj.MatMulNBits.qzeros.preformat"
      ],
      "out_args": [
        "/model/layers.23/attn/qk_proj/MatMulNBits/output_0.out5_4_69"
      ],
      "attrs": {
        "accuracy_level": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "bits": {
          "type": "int",
          "value": [
            "4"
          ]
        },
        "block_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "K": {
          "type": "int",
          "value": [
            "3072"
          ]
        },
        "N": {
          "type": "int",
          "value": [
            "6144"
          ]
        },
        "default_shape": {
          "type": "int",
          "value": [
            "1"
          ]
        },
        "op_version": {
          "type": "str",
          "value": [
            "flat"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        }
      }
    },
    {
      "name": "layers.23.attn.v_proj",
      "type": "MladfMatMul",
      "in_args": [
        "/model/layers.23/input_layernorm/output_0.out4_45"
      ],
      "const_args": [
        "model.layers.23.attn.v_proj.MatMulNBits.qweight.preformat",
        "model.layers.23.attn.v_proj.MatMulNBits.bias.preformat",
        "model.layers.23.attn.v_proj.MatMulNBits.scales.preformat",
        "model.layers.23.attn.v_proj.MatMulNBits.qzeros.preformat"
      ],
      "out_args": [
        "present.23.value"
      ],
      "attrs": {
        "accuracy_level": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "bits": {
          "type": "int",
          "value": [
            "4"
          ]
        },
        "block_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "K": {
          "type": "int",
          "value": [
            "3072"
          ]
        },
        "N": {
          "type": "int",
          "value": [
            "3072"
          ]
        },
        "default_shape": {
          "type": "int",
          "value": [
            "1"
          ]
        },
        "op_version": {
          "type": "str",
          "value": [
            "flat"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "total_seq_len": {
          "type": "int",
          "value": [
            "4096"
          ]
        },
        "external_buffers": {
          "type": "int",
          "value": [
            "5",
            "0",
            "95",
            "47"
          ]
        },
        "update_tensor_offsets": {
          "type": "int",
          "value": [
            "5",
            "0",
            "0",
            "192"
          ]
        }
      }
    },
    {
      "name": "/model/layers.23/attn/GroupQueryAttention",
      "type": "FLATMHA",
      "in_args": [
        "/model/layers.23/attn/qk_proj/MatMulNBits/output_0.out5_4_69",
        "past_key_values.23.key",
        "past_key_values.23.value",
        "attention_mask_const_uint",
        "sin_cos_cache_token"
      ],
      "const_args": [],
      "out_args": [
        "/model/layers.23/attn/GroupQueryAttention/output_0.out2_23",
        "present.23.key"
      ],
      "attrs": {
        "num_heads": {
          "type": "int",
          "value": [
            "32"
          ]
        },
        "kv_num_heads": {
          "type": "int",
          "value": [
            "32"
          ]
        },
        "scale": {
          "type": "float",
          "value": [
            "0.10206207633018494"
          ]
        },
        "local_window_size": {
          "type": "int",
          "value": [
            "262144"
          ]
        },
        "softcap": {
          "type": "float",
          "value": [
            "0.0"
          ]
        },
        "do_rotary": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "rotary_interleaved": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "input_shape": {
          "type": "int",
          "value": [
            "32",
            "32",
            "1",
            "4096",
            "96"
          ]
        },
        "external_buffers": {
          "type": "int",
          "value": [
            "4",
            "1",
            "0",
            "0",
            "1",
            "0",
            "92",
            "46",
            "2",
            "0",
            "93",
            "47",
            "6",
            "0",
            "94",
            "46"
          ]
        },
        "update_tensor_offsets": {
          "type": "int",
          "value": [
            "4",
            "0",
            "0",
            "192",
            "6",
            "0",
            "0",
            "192"
          ]
        }
      }
    },
    {
      "name": "layers.23.attn.o_proj",
      "type": "MladfMatMul",
      "in_args": [
        "/model/layers.23/attn/GroupQueryAttention/output_0.out2_23"
      ],
      "const_args": [
        "model.layers.23.attn.o_proj.MatMulNBits.qweight.preformat",
        "model.layers.23.attn.o_proj.MatMulNBits.bias.preformat",
        "model.layers.23.attn.o_proj.MatMulNBits.scales.preformat",
        "model.layers.23.attn.o_proj.MatMulNBits.qzeros.preformat"
      ],
      "out_args": [
        "/model/layers.23/attn/o_proj/MatMulNBits/output_0.out5_4_70"
      ],
      "attrs": {
        "accuracy_level": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "bits": {
          "type": "int",
          "value": [
            "4"
          ]
        },
        "block_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "K": {
          "type": "int",
          "value": [
            "3072"
          ]
        },
        "N": {
          "type": "int",
          "value": [
            "3072"
          ]
        },
        "default_shape": {
          "type": "int",
          "value": [
            "1"
          ]
        },
        "op_version": {
          "type": "str",
          "value": [
            "flat"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        }
      }
    },
    {
      "name": "FlatRMSAdd_4_46",
      "type": "FlatRMSAdd",
      "in_args": [
        "/model/layers.23/input_layernorm/output_3.out4_45",
        "/model/layers.23/attn/o_proj/MatMulNBits/output_0.out5_4_70"
      ],
      "const_args": [
        "model.layers.23.post_attention_layernorm.weight.bf"
      ],
      "out_args": [
        "/model/layers.23/post_attention_layernorm/output_3.out4_46",
        "/model/layers.23/post_attention_layernorm/output_0.out4_46"
      ],
      "attrs": {
        "a_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "3072"
          ]
        },
        "in_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "bfloat16"
          ]
        },
        "out_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "bfloat16"
          ]
        },
        "c_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "3072"
          ]
        },
        "b_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "3072"
          ]
        },
        "is_gamma_ifm": {
          "type": "int",
          "value": [
            "1"
          ]
        }
      }
    },
    {
      "name": "FlatMLP_3_23",
      "type": "FlatMLP",
      "in_args": [
        "/model/layers.23/post_attention_layernorm/output_0.out4_46"
      ],
      "const_args": [
        "model.layers.23.mlp.gate_proj.MatMulNBits.qweight",
        "model.layers.23.mlp.gate_proj.MatMulNBits.scales.f",
        "model.layers.23.mlp.gate_proj.MatMulNBits.qzeros",
        "model.layers.23.mlp.gate_proj.MatMulNBits.bias.f",
        "model.layers.23.mlp.up_proj.MatMulNBits.qweight",
        "model.layers.23.mlp.up_proj.MatMulNBits.scales.f",
        "model.layers.23.mlp.up_proj.MatMulNBits.qzeros",
        "model.layers.23.mlp.up_proj.MatMulNBits.bias.f"
      ],
      "out_args": [
        "/model/layers.23/mlp/Mul/output_0.out3_23"
      ],
      "attrs": {
        "input_shape": {
          "type": "int",
          "value": [
            "1",
            "3072",
            "8192"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "in_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "uint8",
            "float",
            "uint8",
            "float",
            "uint8",
            "float",
            "uint8",
            "float"
          ]
        },
        "out_dtypes": {
          "type": "str",
          "value": [
            "bfloat16"
          ]
        }
      }
    },
    {
      "name": "layers.23.mlp.down_proj",
      "type": "MladfMatMul",
      "in_args": [
        "/model/layers.23/mlp/Mul/output_0.out3_23"
      ],
      "const_args": [
        "model.layers.23.mlp.down_proj.MatMulNBits.qweight.preformat",
        "model.layers.23.mlp.down_proj.MatMulNBits.bias.preformat",
        "model.layers.23.mlp.down_proj.MatMulNBits.scales.preformat",
        "model.layers.23.mlp.down_proj.MatMulNBits.qzeros.preformat"
      ],
      "out_args": [
        "/model/layers.23/mlp/down_proj/MatMulNBits/output_0.out5_4_71"
      ],
      "attrs": {
        "accuracy_level": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "bits": {
          "type": "int",
          "value": [
            "4"
          ]
        },
        "block_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "K": {
          "type": "int",
          "value": [
            "8192"
          ]
        },
        "N": {
          "type": "int",
          "value": [
            "3072"
          ]
        },
        "default_shape": {
          "type": "int",
          "value": [
            "1"
          ]
        },
        "op_version": {
          "type": "str",
          "value": [
            "flat"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        }
      }
    },
    {
      "name": "FlatRMSAdd_4_47",
      "type": "FlatRMSAdd",
      "in_args": [
        "/model/layers.23/post_attention_layernorm/output_3.out4_46",
        "/model/layers.23/mlp/down_proj/MatMulNBits/output_0.out5_4_71"
      ],
      "const_args": [
        "model.layers.24.input_layernorm.weight.bf"
      ],
      "out_args": [
        "/model/layers.24/input_layernorm/output_3.out4_47",
        "/model/layers.24/input_layernorm/output_0.out4_47"
      ],
      "attrs": {
        "a_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "3072"
          ]
        },
        "in_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "bfloat16"
          ]
        },
        "out_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "bfloat16"
          ]
        },
        "c_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "3072"
          ]
        },
        "b_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "3072"
          ]
        },
        "is_gamma_ifm": {
          "type": "int",
          "value": [
            "1"
          ]
        }
      }
    },
    {
      "name": "MatMulNBits_2_24",
      "type": "MladfMatMul",
      "in_args": [
        "/model/layers.24/input_layernorm/output_0.out4_47"
      ],
      "const_args": [
        "model.layers.24.attn.qk_proj.MatMulNBits.qweight.preformat",
        "model.layers.24.attn.qk_proj.MatMulNBits.bias.preformat",
        "model.layers.24.attn.qk_proj.MatMulNBits.scales.preformat",
        "model.layers.24.attn.qk_proj.MatMulNBits.qzeros.preformat"
      ],
      "out_args": [
        "/model/layers.24/attn/qk_proj/MatMulNBits/output_0.out5_4_72"
      ],
      "attrs": {
        "accuracy_level": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "bits": {
          "type": "int",
          "value": [
            "4"
          ]
        },
        "block_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "K": {
          "type": "int",
          "value": [
            "3072"
          ]
        },
        "N": {
          "type": "int",
          "value": [
            "6144"
          ]
        },
        "default_shape": {
          "type": "int",
          "value": [
            "1"
          ]
        },
        "op_version": {
          "type": "str",
          "value": [
            "flat"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        }
      }
    },
    {
      "name": "layers.24.attn.v_proj",
      "type": "MladfMatMul",
      "in_args": [
        "/model/layers.24/input_layernorm/output_0.out4_47"
      ],
      "const_args": [
        "model.layers.24.attn.v_proj.MatMulNBits.qweight.preformat",
        "model.layers.24.attn.v_proj.MatMulNBits.bias.preformat",
        "model.layers.24.attn.v_proj.MatMulNBits.scales.preformat",
        "model.layers.24.attn.v_proj.MatMulNBits.qzeros.preformat"
      ],
      "out_args": [
        "present.24.value"
      ],
      "attrs": {
        "accuracy_level": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "bits": {
          "type": "int",
          "value": [
            "4"
          ]
        },
        "block_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "K": {
          "type": "int",
          "value": [
            "3072"
          ]
        },
        "N": {
          "type": "int",
          "value": [
            "3072"
          ]
        },
        "default_shape": {
          "type": "int",
          "value": [
            "1"
          ]
        },
        "op_version": {
          "type": "str",
          "value": [
            "flat"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "total_seq_len": {
          "type": "int",
          "value": [
            "4096"
          ]
        },
        "external_buffers": {
          "type": "int",
          "value": [
            "5",
            "0",
            "99",
            "49"
          ]
        },
        "update_tensor_offsets": {
          "type": "int",
          "value": [
            "5",
            "0",
            "0",
            "192"
          ]
        }
      }
    },
    {
      "name": "/model/layers.24/attn/GroupQueryAttention",
      "type": "FLATMHA",
      "in_args": [
        "/model/layers.24/attn/qk_proj/MatMulNBits/output_0.out5_4_72",
        "past_key_values.24.key",
        "past_key_values.24.value",
        "attention_mask_const_uint",
        "sin_cos_cache_token"
      ],
      "const_args": [],
      "out_args": [
        "/model/layers.24/attn/GroupQueryAttention/output_0.out2_24",
        "present.24.key"
      ],
      "attrs": {
        "num_heads": {
          "type": "int",
          "value": [
            "32"
          ]
        },
        "kv_num_heads": {
          "type": "int",
          "value": [
            "32"
          ]
        },
        "scale": {
          "type": "float",
          "value": [
            "0.10206207633018494"
          ]
        },
        "local_window_size": {
          "type": "int",
          "value": [
            "262144"
          ]
        },
        "softcap": {
          "type": "float",
          "value": [
            "0.0"
          ]
        },
        "do_rotary": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "rotary_interleaved": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "input_shape": {
          "type": "int",
          "value": [
            "32",
            "32",
            "1",
            "4096",
            "96"
          ]
        },
        "external_buffers": {
          "type": "int",
          "value": [
            "4",
            "1",
            "0",
            "0",
            "1",
            "0",
            "96",
            "48",
            "2",
            "0",
            "97",
            "49",
            "6",
            "0",
            "98",
            "48"
          ]
        },
        "update_tensor_offsets": {
          "type": "int",
          "value": [
            "4",
            "0",
            "0",
            "192",
            "6",
            "0",
            "0",
            "192"
          ]
        }
      }
    },
    {
      "name": "layers.24.attn.o_proj",
      "type": "MladfMatMul",
      "in_args": [
        "/model/layers.24/attn/GroupQueryAttention/output_0.out2_24"
      ],
      "const_args": [
        "model.layers.24.attn.o_proj.MatMulNBits.qweight.preformat",
        "model.layers.24.attn.o_proj.MatMulNBits.bias.preformat",
        "model.layers.24.attn.o_proj.MatMulNBits.scales.preformat",
        "model.layers.24.attn.o_proj.MatMulNBits.qzeros.preformat"
      ],
      "out_args": [
        "/model/layers.24/attn/o_proj/MatMulNBits/output_0.out5_4_73"
      ],
      "attrs": {
        "accuracy_level": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "bits": {
          "type": "int",
          "value": [
            "4"
          ]
        },
        "block_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "K": {
          "type": "int",
          "value": [
            "3072"
          ]
        },
        "N": {
          "type": "int",
          "value": [
            "3072"
          ]
        },
        "default_shape": {
          "type": "int",
          "value": [
            "1"
          ]
        },
        "op_version": {
          "type": "str",
          "value": [
            "flat"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        }
      }
    },
    {
      "name": "FlatRMSAdd_4_48",
      "type": "FlatRMSAdd",
      "in_args": [
        "/model/layers.24/input_layernorm/output_3.out4_47",
        "/model/layers.24/attn/o_proj/MatMulNBits/output_0.out5_4_73"
      ],
      "const_args": [
        "model.layers.24.post_attention_layernorm.weight.bf"
      ],
      "out_args": [
        "/model/layers.24/post_attention_layernorm/output_3.out4_48",
        "/model/layers.24/post_attention_layernorm/output_0.out4_48"
      ],
      "attrs": {
        "a_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "3072"
          ]
        },
        "in_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "bfloat16"
          ]
        },
        "out_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "bfloat16"
          ]
        },
        "c_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "3072"
          ]
        },
        "b_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "3072"
          ]
        },
        "is_gamma_ifm": {
          "type": "int",
          "value": [
            "1"
          ]
        }
      }
    },
    {
      "name": "FlatMLP_3_24",
      "type": "FlatMLP",
      "in_args": [
        "/model/layers.24/post_attention_layernorm/output_0.out4_48"
      ],
      "const_args": [
        "model.layers.24.mlp.gate_proj.MatMulNBits.qweight",
        "model.layers.24.mlp.gate_proj.MatMulNBits.scales.f",
        "model.layers.24.mlp.gate_proj.MatMulNBits.qzeros",
        "model.layers.24.mlp.gate_proj.MatMulNBits.bias.f",
        "model.layers.24.mlp.up_proj.MatMulNBits.qweight",
        "model.layers.24.mlp.up_proj.MatMulNBits.scales.f",
        "model.layers.24.mlp.up_proj.MatMulNBits.qzeros",
        "model.layers.24.mlp.up_proj.MatMulNBits.bias.f"
      ],
      "out_args": [
        "/model/layers.24/mlp/Mul/output_0.out3_24"
      ],
      "attrs": {
        "input_shape": {
          "type": "int",
          "value": [
            "1",
            "3072",
            "8192"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "in_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "uint8",
            "float",
            "uint8",
            "float",
            "uint8",
            "float",
            "uint8",
            "float"
          ]
        },
        "out_dtypes": {
          "type": "str",
          "value": [
            "bfloat16"
          ]
        }
      }
    },
    {
      "name": "layers.24.mlp.down_proj",
      "type": "MladfMatMul",
      "in_args": [
        "/model/layers.24/mlp/Mul/output_0.out3_24"
      ],
      "const_args": [
        "model.layers.24.mlp.down_proj.MatMulNBits.qweight.preformat",
        "model.layers.24.mlp.down_proj.MatMulNBits.bias.preformat",
        "model.layers.24.mlp.down_proj.MatMulNBits.scales.preformat",
        "model.layers.24.mlp.down_proj.MatMulNBits.qzeros.preformat"
      ],
      "out_args": [
        "/model/layers.24/mlp/down_proj/MatMulNBits/output_0.out5_4_74"
      ],
      "attrs": {
        "accuracy_level": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "bits": {
          "type": "int",
          "value": [
            "4"
          ]
        },
        "block_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "K": {
          "type": "int",
          "value": [
            "8192"
          ]
        },
        "N": {
          "type": "int",
          "value": [
            "3072"
          ]
        },
        "default_shape": {
          "type": "int",
          "value": [
            "1"
          ]
        },
        "op_version": {
          "type": "str",
          "value": [
            "flat"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        }
      }
    },
    {
      "name": "FlatRMSAdd_4_49",
      "type": "FlatRMSAdd",
      "in_args": [
        "/model/layers.24/post_attention_layernorm/output_3.out4_48",
        "/model/layers.24/mlp/down_proj/MatMulNBits/output_0.out5_4_74"
      ],
      "const_args": [
        "model.layers.25.input_layernorm.weight.bf"
      ],
      "out_args": [
        "/model/layers.25/input_layernorm/output_3.out4_49",
        "/model/layers.25/input_layernorm/output_0.out4_49"
      ],
      "attrs": {
        "a_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "3072"
          ]
        },
        "in_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "bfloat16"
          ]
        },
        "out_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "bfloat16"
          ]
        },
        "c_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "3072"
          ]
        },
        "b_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "3072"
          ]
        },
        "is_gamma_ifm": {
          "type": "int",
          "value": [
            "1"
          ]
        }
      }
    },
    {
      "name": "MatMulNBits_2_25",
      "type": "MladfMatMul",
      "in_args": [
        "/model/layers.25/input_layernorm/output_0.out4_49"
      ],
      "const_args": [
        "model.layers.25.attn.qk_proj.MatMulNBits.qweight.preformat",
        "model.layers.25.attn.qk_proj.MatMulNBits.bias.preformat",
        "model.layers.25.attn.qk_proj.MatMulNBits.scales.preformat",
        "model.layers.25.attn.qk_proj.MatMulNBits.qzeros.preformat"
      ],
      "out_args": [
        "/model/layers.25/attn/qk_proj/MatMulNBits/output_0.out5_4_75"
      ],
      "attrs": {
        "accuracy_level": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "bits": {
          "type": "int",
          "value": [
            "4"
          ]
        },
        "block_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "K": {
          "type": "int",
          "value": [
            "3072"
          ]
        },
        "N": {
          "type": "int",
          "value": [
            "6144"
          ]
        },
        "default_shape": {
          "type": "int",
          "value": [
            "1"
          ]
        },
        "op_version": {
          "type": "str",
          "value": [
            "flat"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        }
      }
    },
    {
      "name": "layers.25.attn.v_proj",
      "type": "MladfMatMul",
      "in_args": [
        "/model/layers.25/input_layernorm/output_0.out4_49"
      ],
      "const_args": [
        "model.layers.25.attn.v_proj.MatMulNBits.qweight.preformat",
        "model.layers.25.attn.v_proj.MatMulNBits.bias.preformat",
        "model.layers.25.attn.v_proj.MatMulNBits.scales.preformat",
        "model.layers.25.attn.v_proj.MatMulNBits.qzeros.preformat"
      ],
      "out_args": [
        "present.25.value"
      ],
      "attrs": {
        "accuracy_level": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "bits": {
          "type": "int",
          "value": [
            "4"
          ]
        },
        "block_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "K": {
          "type": "int",
          "value": [
            "3072"
          ]
        },
        "N": {
          "type": "int",
          "value": [
            "3072"
          ]
        },
        "default_shape": {
          "type": "int",
          "value": [
            "1"
          ]
        },
        "op_version": {
          "type": "str",
          "value": [
            "flat"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "total_seq_len": {
          "type": "int",
          "value": [
            "4096"
          ]
        },
        "external_buffers": {
          "type": "int",
          "value": [
            "5",
            "0",
            "103",
            "51"
          ]
        },
        "update_tensor_offsets": {
          "type": "int",
          "value": [
            "5",
            "0",
            "0",
            "192"
          ]
        }
      }
    },
    {
      "name": "/model/layers.25/attn/GroupQueryAttention",
      "type": "FLATMHA",
      "in_args": [
        "/model/layers.25/attn/qk_proj/MatMulNBits/output_0.out5_4_75",
        "past_key_values.25.key",
        "past_key_values.25.value",
        "attention_mask_const_uint",
        "sin_cos_cache_token"
      ],
      "const_args": [],
      "out_args": [
        "/model/layers.25/attn/GroupQueryAttention/output_0.out2_25",
        "present.25.key"
      ],
      "attrs": {
        "num_heads": {
          "type": "int",
          "value": [
            "32"
          ]
        },
        "kv_num_heads": {
          "type": "int",
          "value": [
            "32"
          ]
        },
        "scale": {
          "type": "float",
          "value": [
            "0.10206207633018494"
          ]
        },
        "local_window_size": {
          "type": "int",
          "value": [
            "262144"
          ]
        },
        "softcap": {
          "type": "float",
          "value": [
            "0.0"
          ]
        },
        "do_rotary": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "rotary_interleaved": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "input_shape": {
          "type": "int",
          "value": [
            "32",
            "32",
            "1",
            "4096",
            "96"
          ]
        },
        "external_buffers": {
          "type": "int",
          "value": [
            "4",
            "1",
            "0",
            "0",
            "1",
            "0",
            "100",
            "50",
            "2",
            "0",
            "101",
            "51",
            "6",
            "0",
            "102",
            "50"
          ]
        },
        "update_tensor_offsets": {
          "type": "int",
          "value": [
            "4",
            "0",
            "0",
            "192",
            "6",
            "0",
            "0",
            "192"
          ]
        }
      }
    },
    {
      "name": "layers.25.attn.o_proj",
      "type": "MladfMatMul",
      "in_args": [
        "/model/layers.25/attn/GroupQueryAttention/output_0.out2_25"
      ],
      "const_args": [
        "model.layers.25.attn.o_proj.MatMulNBits.qweight.preformat",
        "model.layers.25.attn.o_proj.MatMulNBits.bias.preformat",
        "model.layers.25.attn.o_proj.MatMulNBits.scales.preformat",
        "model.layers.25.attn.o_proj.MatMulNBits.qzeros.preformat"
      ],
      "out_args": [
        "/model/layers.25/attn/o_proj/MatMulNBits/output_0.out5_4_76"
      ],
      "attrs": {
        "accuracy_level": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "bits": {
          "type": "int",
          "value": [
            "4"
          ]
        },
        "block_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "K": {
          "type": "int",
          "value": [
            "3072"
          ]
        },
        "N": {
          "type": "int",
          "value": [
            "3072"
          ]
        },
        "default_shape": {
          "type": "int",
          "value": [
            "1"
          ]
        },
        "op_version": {
          "type": "str",
          "value": [
            "flat"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        }
      }
    },
    {
      "name": "FlatRMSAdd_4_50",
      "type": "FlatRMSAdd",
      "in_args": [
        "/model/layers.25/input_layernorm/output_3.out4_49",
        "/model/layers.25/attn/o_proj/MatMulNBits/output_0.out5_4_76"
      ],
      "const_args": [
        "model.layers.25.post_attention_layernorm.weight.bf"
      ],
      "out_args": [
        "/model/layers.25/post_attention_layernorm/output_3.out4_50",
        "/model/layers.25/post_attention_layernorm/output_0.out4_50"
      ],
      "attrs": {
        "a_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "3072"
          ]
        },
        "in_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "bfloat16"
          ]
        },
        "out_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "bfloat16"
          ]
        },
        "c_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "3072"
          ]
        },
        "b_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "3072"
          ]
        },
        "is_gamma_ifm": {
          "type": "int",
          "value": [
            "1"
          ]
        }
      }
    },
    {
      "name": "FlatMLP_3_25",
      "type": "FlatMLP",
      "in_args": [
        "/model/layers.25/post_attention_layernorm/output_0.out4_50"
      ],
      "const_args": [
        "model.layers.25.mlp.gate_proj.MatMulNBits.qweight",
        "model.layers.25.mlp.gate_proj.MatMulNBits.scales.f",
        "model.layers.25.mlp.gate_proj.MatMulNBits.qzeros",
        "model.layers.25.mlp.gate_proj.MatMulNBits.bias.f",
        "model.layers.25.mlp.up_proj.MatMulNBits.qweight",
        "model.layers.25.mlp.up_proj.MatMulNBits.scales.f",
        "model.layers.25.mlp.up_proj.MatMulNBits.qzeros",
        "model.layers.25.mlp.up_proj.MatMulNBits.bias.f"
      ],
      "out_args": [
        "/model/layers.25/mlp/Mul/output_0.out3_25"
      ],
      "attrs": {
        "input_shape": {
          "type": "int",
          "value": [
            "1",
            "3072",
            "8192"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "in_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "uint8",
            "float",
            "uint8",
            "float",
            "uint8",
            "float",
            "uint8",
            "float"
          ]
        },
        "out_dtypes": {
          "type": "str",
          "value": [
            "bfloat16"
          ]
        }
      }
    },
    {
      "name": "layers.25.mlp.down_proj",
      "type": "MladfMatMul",
      "in_args": [
        "/model/layers.25/mlp/Mul/output_0.out3_25"
      ],
      "const_args": [
        "model.layers.25.mlp.down_proj.MatMulNBits.qweight.preformat",
        "model.layers.25.mlp.down_proj.MatMulNBits.bias.preformat",
        "model.layers.25.mlp.down_proj.MatMulNBits.scales.preformat",
        "model.layers.25.mlp.down_proj.MatMulNBits.qzeros.preformat"
      ],
      "out_args": [
        "/model/layers.25/mlp/down_proj/MatMulNBits/output_0.out5_4_77"
      ],
      "attrs": {
        "accuracy_level": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "bits": {
          "type": "int",
          "value": [
            "4"
          ]
        },
        "block_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "K": {
          "type": "int",
          "value": [
            "8192"
          ]
        },
        "N": {
          "type": "int",
          "value": [
            "3072"
          ]
        },
        "default_shape": {
          "type": "int",
          "value": [
            "1"
          ]
        },
        "op_version": {
          "type": "str",
          "value": [
            "flat"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        }
      }
    },
    {
      "name": "FlatRMSAdd_4_51",
      "type": "FlatRMSAdd",
      "in_args": [
        "/model/layers.25/post_attention_layernorm/output_3.out4_50",
        "/model/layers.25/mlp/down_proj/MatMulNBits/output_0.out5_4_77"
      ],
      "const_args": [
        "model.layers.26.input_layernorm.weight.bf"
      ],
      "out_args": [
        "/model/layers.26/input_layernorm/output_3.out4_51",
        "/model/layers.26/input_layernorm/output_0.out4_51"
      ],
      "attrs": {
        "a_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "3072"
          ]
        },
        "in_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "bfloat16"
          ]
        },
        "out_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "bfloat16"
          ]
        },
        "c_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "3072"
          ]
        },
        "b_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "3072"
          ]
        },
        "is_gamma_ifm": {
          "type": "int",
          "value": [
            "1"
          ]
        }
      }
    },
    {
      "name": "MatMulNBits_2_26",
      "type": "MladfMatMul",
      "in_args": [
        "/model/layers.26/input_layernorm/output_0.out4_51"
      ],
      "const_args": [
        "model.layers.26.attn.qk_proj.MatMulNBits.qweight.preformat",
        "model.layers.26.attn.qk_proj.MatMulNBits.bias.preformat",
        "model.layers.26.attn.qk_proj.MatMulNBits.scales.preformat",
        "model.layers.26.attn.qk_proj.MatMulNBits.qzeros.preformat"
      ],
      "out_args": [
        "/model/layers.26/attn/qk_proj/MatMulNBits/output_0.out5_4_78"
      ],
      "attrs": {
        "accuracy_level": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "bits": {
          "type": "int",
          "value": [
            "4"
          ]
        },
        "block_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "K": {
          "type": "int",
          "value": [
            "3072"
          ]
        },
        "N": {
          "type": "int",
          "value": [
            "6144"
          ]
        },
        "default_shape": {
          "type": "int",
          "value": [
            "1"
          ]
        },
        "op_version": {
          "type": "str",
          "value": [
            "flat"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        }
      }
    },
    {
      "name": "layers.26.attn.v_proj",
      "type": "MladfMatMul",
      "in_args": [
        "/model/layers.26/input_layernorm/output_0.out4_51"
      ],
      "const_args": [
        "model.layers.26.attn.v_proj.MatMulNBits.qweight.preformat",
        "model.layers.26.attn.v_proj.MatMulNBits.bias.preformat",
        "model.layers.26.attn.v_proj.MatMulNBits.scales.preformat",
        "model.layers.26.attn.v_proj.MatMulNBits.qzeros.preformat"
      ],
      "out_args": [
        "present.26.value"
      ],
      "attrs": {
        "accuracy_level": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "bits": {
          "type": "int",
          "value": [
            "4"
          ]
        },
        "block_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "K": {
          "type": "int",
          "value": [
            "3072"
          ]
        },
        "N": {
          "type": "int",
          "value": [
            "3072"
          ]
        },
        "default_shape": {
          "type": "int",
          "value": [
            "1"
          ]
        },
        "op_version": {
          "type": "str",
          "value": [
            "flat"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "total_seq_len": {
          "type": "int",
          "value": [
            "4096"
          ]
        },
        "external_buffers": {
          "type": "int",
          "value": [
            "5",
            "0",
            "107",
            "53"
          ]
        },
        "update_tensor_offsets": {
          "type": "int",
          "value": [
            "5",
            "0",
            "0",
            "192"
          ]
        }
      }
    },
    {
      "name": "/model/layers.26/attn/GroupQueryAttention",
      "type": "FLATMHA",
      "in_args": [
        "/model/layers.26/attn/qk_proj/MatMulNBits/output_0.out5_4_78",
        "past_key_values.26.key",
        "past_key_values.26.value",
        "attention_mask_const_uint",
        "sin_cos_cache_token"
      ],
      "const_args": [],
      "out_args": [
        "/model/layers.26/attn/GroupQueryAttention/output_0.out2_26",
        "present.26.key"
      ],
      "attrs": {
        "num_heads": {
          "type": "int",
          "value": [
            "32"
          ]
        },
        "kv_num_heads": {
          "type": "int",
          "value": [
            "32"
          ]
        },
        "scale": {
          "type": "float",
          "value": [
            "0.10206207633018494"
          ]
        },
        "local_window_size": {
          "type": "int",
          "value": [
            "262144"
          ]
        },
        "softcap": {
          "type": "float",
          "value": [
            "0.0"
          ]
        },
        "do_rotary": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "rotary_interleaved": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "input_shape": {
          "type": "int",
          "value": [
            "32",
            "32",
            "1",
            "4096",
            "96"
          ]
        },
        "external_buffers": {
          "type": "int",
          "value": [
            "4",
            "1",
            "0",
            "0",
            "1",
            "0",
            "104",
            "52",
            "2",
            "0",
            "105",
            "53",
            "6",
            "0",
            "106",
            "52"
          ]
        },
        "update_tensor_offsets": {
          "type": "int",
          "value": [
            "4",
            "0",
            "0",
            "192",
            "6",
            "0",
            "0",
            "192"
          ]
        }
      }
    },
    {
      "name": "layers.26.attn.o_proj",
      "type": "MladfMatMul",
      "in_args": [
        "/model/layers.26/attn/GroupQueryAttention/output_0.out2_26"
      ],
      "const_args": [
        "model.layers.26.attn.o_proj.MatMulNBits.qweight.preformat",
        "model.layers.26.attn.o_proj.MatMulNBits.bias.preformat",
        "model.layers.26.attn.o_proj.MatMulNBits.scales.preformat",
        "model.layers.26.attn.o_proj.MatMulNBits.qzeros.preformat"
      ],
      "out_args": [
        "/model/layers.26/attn/o_proj/MatMulNBits/output_0.out5_4_79"
      ],
      "attrs": {
        "accuracy_level": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "bits": {
          "type": "int",
          "value": [
            "4"
          ]
        },
        "block_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "K": {
          "type": "int",
          "value": [
            "3072"
          ]
        },
        "N": {
          "type": "int",
          "value": [
            "3072"
          ]
        },
        "default_shape": {
          "type": "int",
          "value": [
            "1"
          ]
        },
        "op_version": {
          "type": "str",
          "value": [
            "flat"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        }
      }
    },
    {
      "name": "FlatRMSAdd_4_52",
      "type": "FlatRMSAdd",
      "in_args": [
        "/model/layers.26/input_layernorm/output_3.out4_51",
        "/model/layers.26/attn/o_proj/MatMulNBits/output_0.out5_4_79"
      ],
      "const_args": [
        "model.layers.26.post_attention_layernorm.weight.bf"
      ],
      "out_args": [
        "/model/layers.26/post_attention_layernorm/output_3.out4_52",
        "/model/layers.26/post_attention_layernorm/output_0.out4_52"
      ],
      "attrs": {
        "a_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "3072"
          ]
        },
        "in_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "bfloat16"
          ]
        },
        "out_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "bfloat16"
          ]
        },
        "c_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "3072"
          ]
        },
        "b_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "3072"
          ]
        },
        "is_gamma_ifm": {
          "type": "int",
          "value": [
            "1"
          ]
        }
      }
    },
    {
      "name": "FlatMLP_3_26",
      "type": "FlatMLP",
      "in_args": [
        "/model/layers.26/post_attention_layernorm/output_0.out4_52"
      ],
      "const_args": [
        "model.layers.26.mlp.gate_proj.MatMulNBits.qweight",
        "model.layers.26.mlp.gate_proj.MatMulNBits.scales.f",
        "model.layers.26.mlp.gate_proj.MatMulNBits.qzeros",
        "model.layers.26.mlp.gate_proj.MatMulNBits.bias.f",
        "model.layers.26.mlp.up_proj.MatMulNBits.qweight",
        "model.layers.26.mlp.up_proj.MatMulNBits.scales.f",
        "model.layers.26.mlp.up_proj.MatMulNBits.qzeros",
        "model.layers.26.mlp.up_proj.MatMulNBits.bias.f"
      ],
      "out_args": [
        "/model/layers.26/mlp/Mul/output_0.out3_26"
      ],
      "attrs": {
        "input_shape": {
          "type": "int",
          "value": [
            "1",
            "3072",
            "8192"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "in_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "uint8",
            "float",
            "uint8",
            "float",
            "uint8",
            "float",
            "uint8",
            "float"
          ]
        },
        "out_dtypes": {
          "type": "str",
          "value": [
            "bfloat16"
          ]
        }
      }
    },
    {
      "name": "layers.26.mlp.down_proj",
      "type": "MladfMatMul",
      "in_args": [
        "/model/layers.26/mlp/Mul/output_0.out3_26"
      ],
      "const_args": [
        "model.layers.26.mlp.down_proj.MatMulNBits.qweight.preformat",
        "model.layers.26.mlp.down_proj.MatMulNBits.bias.preformat",
        "model.layers.26.mlp.down_proj.MatMulNBits.scales.preformat",
        "model.layers.26.mlp.down_proj.MatMulNBits.qzeros.preformat"
      ],
      "out_args": [
        "/model/layers.26/mlp/down_proj/MatMulNBits/output_0.out5_4_80"
      ],
      "attrs": {
        "accuracy_level": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "bits": {
          "type": "int",
          "value": [
            "4"
          ]
        },
        "block_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "K": {
          "type": "int",
          "value": [
            "8192"
          ]
        },
        "N": {
          "type": "int",
          "value": [
            "3072"
          ]
        },
        "default_shape": {
          "type": "int",
          "value": [
            "1"
          ]
        },
        "op_version": {
          "type": "str",
          "value": [
            "flat"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        }
      }
    },
    {
      "name": "FlatRMSAdd_4_53",
      "type": "FlatRMSAdd",
      "in_args": [
        "/model/layers.26/post_attention_layernorm/output_3.out4_52",
        "/model/layers.26/mlp/down_proj/MatMulNBits/output_0.out5_4_80"
      ],
      "const_args": [
        "model.layers.27.input_layernorm.weight.bf"
      ],
      "out_args": [
        "/model/layers.27/input_layernorm/output_3.out4_53",
        "/model/layers.27/input_layernorm/output_0.out4_53"
      ],
      "attrs": {
        "a_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "3072"
          ]
        },
        "in_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "bfloat16"
          ]
        },
        "out_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "bfloat16"
          ]
        },
        "c_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "3072"
          ]
        },
        "b_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "3072"
          ]
        },
        "is_gamma_ifm": {
          "type": "int",
          "value": [
            "1"
          ]
        }
      }
    },
    {
      "name": "MatMulNBits_2_27",
      "type": "MladfMatMul",
      "in_args": [
        "/model/layers.27/input_layernorm/output_0.out4_53"
      ],
      "const_args": [
        "model.layers.27.attn.qk_proj.MatMulNBits.qweight.preformat",
        "model.layers.27.attn.qk_proj.MatMulNBits.bias.preformat",
        "model.layers.27.attn.qk_proj.MatMulNBits.scales.preformat",
        "model.layers.27.attn.qk_proj.MatMulNBits.qzeros.preformat"
      ],
      "out_args": [
        "/model/layers.27/attn/qk_proj/MatMulNBits/output_0.out5_4_81"
      ],
      "attrs": {
        "accuracy_level": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "bits": {
          "type": "int",
          "value": [
            "4"
          ]
        },
        "block_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "K": {
          "type": "int",
          "value": [
            "3072"
          ]
        },
        "N": {
          "type": "int",
          "value": [
            "6144"
          ]
        },
        "default_shape": {
          "type": "int",
          "value": [
            "1"
          ]
        },
        "op_version": {
          "type": "str",
          "value": [
            "flat"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        }
      }
    },
    {
      "name": "layers.27.attn.v_proj",
      "type": "MladfMatMul",
      "in_args": [
        "/model/layers.27/input_layernorm/output_0.out4_53"
      ],
      "const_args": [
        "model.layers.27.attn.v_proj.MatMulNBits.qweight.preformat",
        "model.layers.27.attn.v_proj.MatMulNBits.bias.preformat",
        "model.layers.27.attn.v_proj.MatMulNBits.scales.preformat",
        "model.layers.27.attn.v_proj.MatMulNBits.qzeros.preformat"
      ],
      "out_args": [
        "present.27.value"
      ],
      "attrs": {
        "accuracy_level": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "bits": {
          "type": "int",
          "value": [
            "4"
          ]
        },
        "block_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "K": {
          "type": "int",
          "value": [
            "3072"
          ]
        },
        "N": {
          "type": "int",
          "value": [
            "3072"
          ]
        },
        "default_shape": {
          "type": "int",
          "value": [
            "1"
          ]
        },
        "op_version": {
          "type": "str",
          "value": [
            "flat"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "total_seq_len": {
          "type": "int",
          "value": [
            "4096"
          ]
        },
        "external_buffers": {
          "type": "int",
          "value": [
            "5",
            "0",
            "111",
            "55"
          ]
        },
        "update_tensor_offsets": {
          "type": "int",
          "value": [
            "5",
            "0",
            "0",
            "192"
          ]
        }
      }
    },
    {
      "name": "/model/layers.27/attn/GroupQueryAttention",
      "type": "FLATMHA",
      "in_args": [
        "/model/layers.27/attn/qk_proj/MatMulNBits/output_0.out5_4_81",
        "past_key_values.27.key",
        "past_key_values.27.value",
        "attention_mask_const_uint",
        "sin_cos_cache_token"
      ],
      "const_args": [],
      "out_args": [
        "/model/layers.27/attn/GroupQueryAttention/output_0.out2_27",
        "present.27.key"
      ],
      "attrs": {
        "num_heads": {
          "type": "int",
          "value": [
            "32"
          ]
        },
        "kv_num_heads": {
          "type": "int",
          "value": [
            "32"
          ]
        },
        "scale": {
          "type": "float",
          "value": [
            "0.10206207633018494"
          ]
        },
        "local_window_size": {
          "type": "int",
          "value": [
            "262144"
          ]
        },
        "softcap": {
          "type": "float",
          "value": [
            "0.0"
          ]
        },
        "do_rotary": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "rotary_interleaved": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "input_shape": {
          "type": "int",
          "value": [
            "32",
            "32",
            "1",
            "4096",
            "96"
          ]
        },
        "external_buffers": {
          "type": "int",
          "value": [
            "4",
            "1",
            "0",
            "0",
            "1",
            "0",
            "108",
            "54",
            "2",
            "0",
            "109",
            "55",
            "6",
            "0",
            "110",
            "54"
          ]
        },
        "update_tensor_offsets": {
          "type": "int",
          "value": [
            "4",
            "0",
            "0",
            "192",
            "6",
            "0",
            "0",
            "192"
          ]
        }
      }
    },
    {
      "name": "layers.27.attn.o_proj",
      "type": "MladfMatMul",
      "in_args": [
        "/model/layers.27/attn/GroupQueryAttention/output_0.out2_27"
      ],
      "const_args": [
        "model.layers.27.attn.o_proj.MatMulNBits.qweight.preformat",
        "model.layers.27.attn.o_proj.MatMulNBits.bias.preformat",
        "model.layers.27.attn.o_proj.MatMulNBits.scales.preformat",
        "model.layers.27.attn.o_proj.MatMulNBits.qzeros.preformat"
      ],
      "out_args": [
        "/model/layers.27/attn/o_proj/MatMulNBits/output_0.out5_4_82"
      ],
      "attrs": {
        "accuracy_level": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "bits": {
          "type": "int",
          "value": [
            "4"
          ]
        },
        "block_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "K": {
          "type": "int",
          "value": [
            "3072"
          ]
        },
        "N": {
          "type": "int",
          "value": [
            "3072"
          ]
        },
        "default_shape": {
          "type": "int",
          "value": [
            "1"
          ]
        },
        "op_version": {
          "type": "str",
          "value": [
            "flat"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        }
      }
    },
    {
      "name": "FlatRMSAdd_4_54",
      "type": "FlatRMSAdd",
      "in_args": [
        "/model/layers.27/input_layernorm/output_3.out4_53",
        "/model/layers.27/attn/o_proj/MatMulNBits/output_0.out5_4_82"
      ],
      "const_args": [
        "model.layers.27.post_attention_layernorm.weight.bf"
      ],
      "out_args": [
        "/model/layers.27/post_attention_layernorm/output_3.out4_54",
        "/model/layers.27/post_attention_layernorm/output_0.out4_54"
      ],
      "attrs": {
        "a_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "3072"
          ]
        },
        "in_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "bfloat16"
          ]
        },
        "out_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "bfloat16"
          ]
        },
        "c_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "3072"
          ]
        },
        "b_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "3072"
          ]
        },
        "is_gamma_ifm": {
          "type": "int",
          "value": [
            "1"
          ]
        }
      }
    },
    {
      "name": "FlatMLP_3_27",
      "type": "FlatMLP",
      "in_args": [
        "/model/layers.27/post_attention_layernorm/output_0.out4_54"
      ],
      "const_args": [
        "model.layers.27.mlp.gate_proj.MatMulNBits.qweight",
        "model.layers.27.mlp.gate_proj.MatMulNBits.scales.f",
        "model.layers.27.mlp.gate_proj.MatMulNBits.qzeros",
        "model.layers.27.mlp.gate_proj.MatMulNBits.bias.f",
        "model.layers.27.mlp.up_proj.MatMulNBits.qweight",
        "model.layers.27.mlp.up_proj.MatMulNBits.scales.f",
        "model.layers.27.mlp.up_proj.MatMulNBits.qzeros",
        "model.layers.27.mlp.up_proj.MatMulNBits.bias.f"
      ],
      "out_args": [
        "/model/layers.27/mlp/Mul/output_0.out3_27"
      ],
      "attrs": {
        "input_shape": {
          "type": "int",
          "value": [
            "1",
            "3072",
            "8192"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "in_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "uint8",
            "float",
            "uint8",
            "float",
            "uint8",
            "float",
            "uint8",
            "float"
          ]
        },
        "out_dtypes": {
          "type": "str",
          "value": [
            "bfloat16"
          ]
        }
      }
    },
    {
      "name": "layers.27.mlp.down_proj",
      "type": "MladfMatMul",
      "in_args": [
        "/model/layers.27/mlp/Mul/output_0.out3_27"
      ],
      "const_args": [
        "model.layers.27.mlp.down_proj.MatMulNBits.qweight.preformat",
        "model.layers.27.mlp.down_proj.MatMulNBits.bias.preformat",
        "model.layers.27.mlp.down_proj.MatMulNBits.scales.preformat",
        "model.layers.27.mlp.down_proj.MatMulNBits.qzeros.preformat"
      ],
      "out_args": [
        "/model/layers.27/mlp/down_proj/MatMulNBits/output_0.out5_4_83"
      ],
      "attrs": {
        "accuracy_level": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "bits": {
          "type": "int",
          "value": [
            "4"
          ]
        },
        "block_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "K": {
          "type": "int",
          "value": [
            "8192"
          ]
        },
        "N": {
          "type": "int",
          "value": [
            "3072"
          ]
        },
        "default_shape": {
          "type": "int",
          "value": [
            "1"
          ]
        },
        "op_version": {
          "type": "str",
          "value": [
            "flat"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        }
      }
    },
    {
      "name": "FlatRMSAdd_4_55",
      "type": "FlatRMSAdd",
      "in_args": [
        "/model/layers.27/post_attention_layernorm/output_3.out4_54",
        "/model/layers.27/mlp/down_proj/MatMulNBits/output_0.out5_4_83"
      ],
      "const_args": [
        "model.layers.28.input_layernorm.weight.bf"
      ],
      "out_args": [
        "/model/layers.28/input_layernorm/output_3.out4_55",
        "/model/layers.28/input_layernorm/output_0.out4_55"
      ],
      "attrs": {
        "a_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "3072"
          ]
        },
        "in_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "bfloat16"
          ]
        },
        "out_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "bfloat16"
          ]
        },
        "c_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "3072"
          ]
        },
        "b_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "3072"
          ]
        },
        "is_gamma_ifm": {
          "type": "int",
          "value": [
            "1"
          ]
        }
      }
    },
    {
      "name": "MatMulNBits_2_28",
      "type": "MladfMatMul",
      "in_args": [
        "/model/layers.28/input_layernorm/output_0.out4_55"
      ],
      "const_args": [
        "model.layers.28.attn.qk_proj.MatMulNBits.qweight.preformat",
        "model.layers.28.attn.qk_proj.MatMulNBits.bias.preformat",
        "model.layers.28.attn.qk_proj.MatMulNBits.scales.preformat",
        "model.layers.28.attn.qk_proj.MatMulNBits.qzeros.preformat"
      ],
      "out_args": [
        "/model/layers.28/attn/qk_proj/MatMulNBits/output_0.out5_4_84"
      ],
      "attrs": {
        "accuracy_level": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "bits": {
          "type": "int",
          "value": [
            "4"
          ]
        },
        "block_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "K": {
          "type": "int",
          "value": [
            "3072"
          ]
        },
        "N": {
          "type": "int",
          "value": [
            "6144"
          ]
        },
        "default_shape": {
          "type": "int",
          "value": [
            "1"
          ]
        },
        "op_version": {
          "type": "str",
          "value": [
            "flat"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        }
      }
    },
    {
      "name": "layers.28.attn.v_proj",
      "type": "MladfMatMul",
      "in_args": [
        "/model/layers.28/input_layernorm/output_0.out4_55"
      ],
      "const_args": [
        "model.layers.28.attn.v_proj.MatMulNBits.qweight.preformat",
        "model.layers.28.attn.v_proj.MatMulNBits.bias.preformat",
        "model.layers.28.attn.v_proj.MatMulNBits.scales.preformat",
        "model.layers.28.attn.v_proj.MatMulNBits.qzeros.preformat"
      ],
      "out_args": [
        "present.28.value"
      ],
      "attrs": {
        "accuracy_level": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "bits": {
          "type": "int",
          "value": [
            "4"
          ]
        },
        "block_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "K": {
          "type": "int",
          "value": [
            "3072"
          ]
        },
        "N": {
          "type": "int",
          "value": [
            "3072"
          ]
        },
        "default_shape": {
          "type": "int",
          "value": [
            "1"
          ]
        },
        "op_version": {
          "type": "str",
          "value": [
            "flat"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "total_seq_len": {
          "type": "int",
          "value": [
            "4096"
          ]
        },
        "external_buffers": {
          "type": "int",
          "value": [
            "5",
            "0",
            "115",
            "57"
          ]
        },
        "update_tensor_offsets": {
          "type": "int",
          "value": [
            "5",
            "0",
            "0",
            "192"
          ]
        }
      }
    },
    {
      "name": "/model/layers.28/attn/GroupQueryAttention",
      "type": "FLATMHA",
      "in_args": [
        "/model/layers.28/attn/qk_proj/MatMulNBits/output_0.out5_4_84",
        "past_key_values.28.key",
        "past_key_values.28.value",
        "attention_mask_const_uint",
        "sin_cos_cache_token"
      ],
      "const_args": [],
      "out_args": [
        "/model/layers.28/attn/GroupQueryAttention/output_0.out2_28",
        "present.28.key"
      ],
      "attrs": {
        "num_heads": {
          "type": "int",
          "value": [
            "32"
          ]
        },
        "kv_num_heads": {
          "type": "int",
          "value": [
            "32"
          ]
        },
        "scale": {
          "type": "float",
          "value": [
            "0.10206207633018494"
          ]
        },
        "local_window_size": {
          "type": "int",
          "value": [
            "262144"
          ]
        },
        "softcap": {
          "type": "float",
          "value": [
            "0.0"
          ]
        },
        "do_rotary": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "rotary_interleaved": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "input_shape": {
          "type": "int",
          "value": [
            "32",
            "32",
            "1",
            "4096",
            "96"
          ]
        },
        "external_buffers": {
          "type": "int",
          "value": [
            "4",
            "1",
            "0",
            "0",
            "1",
            "0",
            "112",
            "56",
            "2",
            "0",
            "113",
            "57",
            "6",
            "0",
            "114",
            "56"
          ]
        },
        "update_tensor_offsets": {
          "type": "int",
          "value": [
            "4",
            "0",
            "0",
            "192",
            "6",
            "0",
            "0",
            "192"
          ]
        }
      }
    },
    {
      "name": "layers.28.attn.o_proj",
      "type": "MladfMatMul",
      "in_args": [
        "/model/layers.28/attn/GroupQueryAttention/output_0.out2_28"
      ],
      "const_args": [
        "model.layers.28.attn.o_proj.MatMulNBits.qweight.preformat",
        "model.layers.28.attn.o_proj.MatMulNBits.bias.preformat",
        "model.layers.28.attn.o_proj.MatMulNBits.scales.preformat",
        "model.layers.28.attn.o_proj.MatMulNBits.qzeros.preformat"
      ],
      "out_args": [
        "/model/layers.28/attn/o_proj/MatMulNBits/output_0.out5_4_85"
      ],
      "attrs": {
        "accuracy_level": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "bits": {
          "type": "int",
          "value": [
            "4"
          ]
        },
        "block_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "K": {
          "type": "int",
          "value": [
            "3072"
          ]
        },
        "N": {
          "type": "int",
          "value": [
            "3072"
          ]
        },
        "default_shape": {
          "type": "int",
          "value": [
            "1"
          ]
        },
        "op_version": {
          "type": "str",
          "value": [
            "flat"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        }
      }
    },
    {
      "name": "FlatRMSAdd_4_56",
      "type": "FlatRMSAdd",
      "in_args": [
        "/model/layers.28/input_layernorm/output_3.out4_55",
        "/model/layers.28/attn/o_proj/MatMulNBits/output_0.out5_4_85"
      ],
      "const_args": [
        "model.layers.28.post_attention_layernorm.weight.bf"
      ],
      "out_args": [
        "/model/layers.28/post_attention_layernorm/output_3.out4_56",
        "/model/layers.28/post_attention_layernorm/output_0.out4_56"
      ],
      "attrs": {
        "a_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "3072"
          ]
        },
        "in_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "bfloat16"
          ]
        },
        "out_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "bfloat16"
          ]
        },
        "c_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "3072"
          ]
        },
        "b_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "3072"
          ]
        },
        "is_gamma_ifm": {
          "type": "int",
          "value": [
            "1"
          ]
        }
      }
    },
    {
      "name": "FlatMLP_3_28",
      "type": "FlatMLP",
      "in_args": [
        "/model/layers.28/post_attention_layernorm/output_0.out4_56"
      ],
      "const_args": [
        "model.layers.28.mlp.gate_proj.MatMulNBits.qweight",
        "model.layers.28.mlp.gate_proj.MatMulNBits.scales.f",
        "model.layers.28.mlp.gate_proj.MatMulNBits.qzeros",
        "model.layers.28.mlp.gate_proj.MatMulNBits.bias.f",
        "model.layers.28.mlp.up_proj.MatMulNBits.qweight",
        "model.layers.28.mlp.up_proj.MatMulNBits.scales.f",
        "model.layers.28.mlp.up_proj.MatMulNBits.qzeros",
        "model.layers.28.mlp.up_proj.MatMulNBits.bias.f"
      ],
      "out_args": [
        "/model/layers.28/mlp/Mul/output_0.out3_28"
      ],
      "attrs": {
        "input_shape": {
          "type": "int",
          "value": [
            "1",
            "3072",
            "8192"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "in_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "uint8",
            "float",
            "uint8",
            "float",
            "uint8",
            "float",
            "uint8",
            "float"
          ]
        },
        "out_dtypes": {
          "type": "str",
          "value": [
            "bfloat16"
          ]
        }
      }
    },
    {
      "name": "layers.28.mlp.down_proj",
      "type": "MladfMatMul",
      "in_args": [
        "/model/layers.28/mlp/Mul/output_0.out3_28"
      ],
      "const_args": [
        "model.layers.28.mlp.down_proj.MatMulNBits.qweight.preformat",
        "model.layers.28.mlp.down_proj.MatMulNBits.bias.preformat",
        "model.layers.28.mlp.down_proj.MatMulNBits.scales.preformat",
        "model.layers.28.mlp.down_proj.MatMulNBits.qzeros.preformat"
      ],
      "out_args": [
        "/model/layers.28/mlp/down_proj/MatMulNBits/output_0.out5_4_86"
      ],
      "attrs": {
        "accuracy_level": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "bits": {
          "type": "int",
          "value": [
            "4"
          ]
        },
        "block_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "K": {
          "type": "int",
          "value": [
            "8192"
          ]
        },
        "N": {
          "type": "int",
          "value": [
            "3072"
          ]
        },
        "default_shape": {
          "type": "int",
          "value": [
            "1"
          ]
        },
        "op_version": {
          "type": "str",
          "value": [
            "flat"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        }
      }
    },
    {
      "name": "FlatRMSAdd_4_57",
      "type": "FlatRMSAdd",
      "in_args": [
        "/model/layers.28/post_attention_layernorm/output_3.out4_56",
        "/model/layers.28/mlp/down_proj/MatMulNBits/output_0.out5_4_86"
      ],
      "const_args": [
        "model.layers.29.input_layernorm.weight.bf"
      ],
      "out_args": [
        "/model/layers.29/input_layernorm/output_3.out4_57",
        "/model/layers.29/input_layernorm/output_0.out4_57"
      ],
      "attrs": {
        "a_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "3072"
          ]
        },
        "in_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "bfloat16"
          ]
        },
        "out_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "bfloat16"
          ]
        },
        "c_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "3072"
          ]
        },
        "b_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "3072"
          ]
        },
        "is_gamma_ifm": {
          "type": "int",
          "value": [
            "1"
          ]
        }
      }
    },
    {
      "name": "MatMulNBits_2_29",
      "type": "MladfMatMul",
      "in_args": [
        "/model/layers.29/input_layernorm/output_0.out4_57"
      ],
      "const_args": [
        "model.layers.29.attn.qk_proj.MatMulNBits.qweight.preformat",
        "model.layers.29.attn.qk_proj.MatMulNBits.bias.preformat",
        "model.layers.29.attn.qk_proj.MatMulNBits.scales.preformat",
        "model.layers.29.attn.qk_proj.MatMulNBits.qzeros.preformat"
      ],
      "out_args": [
        "/model/layers.29/attn/qk_proj/MatMulNBits/output_0.out5_4_87"
      ],
      "attrs": {
        "accuracy_level": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "bits": {
          "type": "int",
          "value": [
            "4"
          ]
        },
        "block_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "K": {
          "type": "int",
          "value": [
            "3072"
          ]
        },
        "N": {
          "type": "int",
          "value": [
            "6144"
          ]
        },
        "default_shape": {
          "type": "int",
          "value": [
            "1"
          ]
        },
        "op_version": {
          "type": "str",
          "value": [
            "flat"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        }
      }
    },
    {
      "name": "layers.29.attn.v_proj",
      "type": "MladfMatMul",
      "in_args": [
        "/model/layers.29/input_layernorm/output_0.out4_57"
      ],
      "const_args": [
        "model.layers.29.attn.v_proj.MatMulNBits.qweight.preformat",
        "model.layers.29.attn.v_proj.MatMulNBits.bias.preformat",
        "model.layers.29.attn.v_proj.MatMulNBits.scales.preformat",
        "model.layers.29.attn.v_proj.MatMulNBits.qzeros.preformat"
      ],
      "out_args": [
        "present.29.value"
      ],
      "attrs": {
        "accuracy_level": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "bits": {
          "type": "int",
          "value": [
            "4"
          ]
        },
        "block_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "K": {
          "type": "int",
          "value": [
            "3072"
          ]
        },
        "N": {
          "type": "int",
          "value": [
            "3072"
          ]
        },
        "default_shape": {
          "type": "int",
          "value": [
            "1"
          ]
        },
        "op_version": {
          "type": "str",
          "value": [
            "flat"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "total_seq_len": {
          "type": "int",
          "value": [
            "4096"
          ]
        },
        "external_buffers": {
          "type": "int",
          "value": [
            "5",
            "0",
            "119",
            "59"
          ]
        },
        "update_tensor_offsets": {
          "type": "int",
          "value": [
            "5",
            "0",
            "0",
            "192"
          ]
        }
      }
    },
    {
      "name": "/model/layers.29/attn/GroupQueryAttention",
      "type": "FLATMHA",
      "in_args": [
        "/model/layers.29/attn/qk_proj/MatMulNBits/output_0.out5_4_87",
        "past_key_values.29.key",
        "past_key_values.29.value",
        "attention_mask_const_uint",
        "sin_cos_cache_token"
      ],
      "const_args": [],
      "out_args": [
        "/model/layers.29/attn/GroupQueryAttention/output_0.out2_29",
        "present.29.key"
      ],
      "attrs": {
        "num_heads": {
          "type": "int",
          "value": [
            "32"
          ]
        },
        "kv_num_heads": {
          "type": "int",
          "value": [
            "32"
          ]
        },
        "scale": {
          "type": "float",
          "value": [
            "0.10206207633018494"
          ]
        },
        "local_window_size": {
          "type": "int",
          "value": [
            "262144"
          ]
        },
        "softcap": {
          "type": "float",
          "value": [
            "0.0"
          ]
        },
        "do_rotary": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "rotary_interleaved": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "input_shape": {
          "type": "int",
          "value": [
            "32",
            "32",
            "1",
            "4096",
            "96"
          ]
        },
        "external_buffers": {
          "type": "int",
          "value": [
            "4",
            "1",
            "0",
            "0",
            "1",
            "0",
            "116",
            "58",
            "2",
            "0",
            "117",
            "59",
            "6",
            "0",
            "118",
            "58"
          ]
        },
        "update_tensor_offsets": {
          "type": "int",
          "value": [
            "4",
            "0",
            "0",
            "192",
            "6",
            "0",
            "0",
            "192"
          ]
        }
      }
    },
    {
      "name": "layers.29.attn.o_proj",
      "type": "MladfMatMul",
      "in_args": [
        "/model/layers.29/attn/GroupQueryAttention/output_0.out2_29"
      ],
      "const_args": [
        "model.layers.29.attn.o_proj.MatMulNBits.qweight.preformat",
        "model.layers.29.attn.o_proj.MatMulNBits.bias.preformat",
        "model.layers.29.attn.o_proj.MatMulNBits.scales.preformat",
        "model.layers.29.attn.o_proj.MatMulNBits.qzeros.preformat"
      ],
      "out_args": [
        "/model/layers.29/attn/o_proj/MatMulNBits/output_0.out5_4_88"
      ],
      "attrs": {
        "accuracy_level": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "bits": {
          "type": "int",
          "value": [
            "4"
          ]
        },
        "block_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "K": {
          "type": "int",
          "value": [
            "3072"
          ]
        },
        "N": {
          "type": "int",
          "value": [
            "3072"
          ]
        },
        "default_shape": {
          "type": "int",
          "value": [
            "1"
          ]
        },
        "op_version": {
          "type": "str",
          "value": [
            "flat"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        }
      }
    },
    {
      "name": "FlatRMSAdd_4_58",
      "type": "FlatRMSAdd",
      "in_args": [
        "/model/layers.29/input_layernorm/output_3.out4_57",
        "/model/layers.29/attn/o_proj/MatMulNBits/output_0.out5_4_88"
      ],
      "const_args": [
        "model.layers.29.post_attention_layernorm.weight.bf"
      ],
      "out_args": [
        "/model/layers.29/post_attention_layernorm/output_3.out4_58",
        "/model/layers.29/post_attention_layernorm/output_0.out4_58"
      ],
      "attrs": {
        "a_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "3072"
          ]
        },
        "in_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "bfloat16"
          ]
        },
        "out_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "bfloat16"
          ]
        },
        "c_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "3072"
          ]
        },
        "b_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "3072"
          ]
        },
        "is_gamma_ifm": {
          "type": "int",
          "value": [
            "1"
          ]
        }
      }
    },
    {
      "name": "FlatMLP_3_29",
      "type": "FlatMLP",
      "in_args": [
        "/model/layers.29/post_attention_layernorm/output_0.out4_58"
      ],
      "const_args": [
        "model.layers.29.mlp.gate_proj.MatMulNBits.qweight",
        "model.layers.29.mlp.gate_proj.MatMulNBits.scales.f",
        "model.layers.29.mlp.gate_proj.MatMulNBits.qzeros",
        "model.layers.29.mlp.gate_proj.MatMulNBits.bias.f",
        "model.layers.29.mlp.up_proj.MatMulNBits.qweight",
        "model.layers.29.mlp.up_proj.MatMulNBits.scales.f",
        "model.layers.29.mlp.up_proj.MatMulNBits.qzeros",
        "model.layers.29.mlp.up_proj.MatMulNBits.bias.f"
      ],
      "out_args": [
        "/model/layers.29/mlp/Mul/output_0.out3_29"
      ],
      "attrs": {
        "input_shape": {
          "type": "int",
          "value": [
            "1",
            "3072",
            "8192"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "in_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "uint8",
            "float",
            "uint8",
            "float",
            "uint8",
            "float",
            "uint8",
            "float"
          ]
        },
        "out_dtypes": {
          "type": "str",
          "value": [
            "bfloat16"
          ]
        }
      }
    },
    {
      "name": "layers.29.mlp.down_proj",
      "type": "MladfMatMul",
      "in_args": [
        "/model/layers.29/mlp/Mul/output_0.out3_29"
      ],
      "const_args": [
        "model.layers.29.mlp.down_proj.MatMulNBits.qweight.preformat",
        "model.layers.29.mlp.down_proj.MatMulNBits.bias.preformat",
        "model.layers.29.mlp.down_proj.MatMulNBits.scales.preformat",
        "model.layers.29.mlp.down_proj.MatMulNBits.qzeros.preformat"
      ],
      "out_args": [
        "/model/layers.29/mlp/down_proj/MatMulNBits/output_0.out5_4_89"
      ],
      "attrs": {
        "accuracy_level": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "bits": {
          "type": "int",
          "value": [
            "4"
          ]
        },
        "block_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "K": {
          "type": "int",
          "value": [
            "8192"
          ]
        },
        "N": {
          "type": "int",
          "value": [
            "3072"
          ]
        },
        "default_shape": {
          "type": "int",
          "value": [
            "1"
          ]
        },
        "op_version": {
          "type": "str",
          "value": [
            "flat"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        }
      }
    },
    {
      "name": "FlatRMSAdd_4_59",
      "type": "FlatRMSAdd",
      "in_args": [
        "/model/layers.29/post_attention_layernorm/output_3.out4_58",
        "/model/layers.29/mlp/down_proj/MatMulNBits/output_0.out5_4_89"
      ],
      "const_args": [
        "model.layers.30.input_layernorm.weight.bf"
      ],
      "out_args": [
        "/model/layers.30/input_layernorm/output_3.out4_59",
        "/model/layers.30/input_layernorm/output_0.out4_59"
      ],
      "attrs": {
        "a_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "3072"
          ]
        },
        "in_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "bfloat16"
          ]
        },
        "out_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "bfloat16"
          ]
        },
        "c_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "3072"
          ]
        },
        "b_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "3072"
          ]
        },
        "is_gamma_ifm": {
          "type": "int",
          "value": [
            "1"
          ]
        }
      }
    },
    {
      "name": "MatMulNBits_2_30",
      "type": "MladfMatMul",
      "in_args": [
        "/model/layers.30/input_layernorm/output_0.out4_59"
      ],
      "const_args": [
        "model.layers.30.attn.qk_proj.MatMulNBits.qweight.preformat",
        "model.layers.30.attn.qk_proj.MatMulNBits.bias.preformat",
        "model.layers.30.attn.qk_proj.MatMulNBits.scales.preformat",
        "model.layers.30.attn.qk_proj.MatMulNBits.qzeros.preformat"
      ],
      "out_args": [
        "/model/layers.30/attn/qk_proj/MatMulNBits/output_0.out5_4_90"
      ],
      "attrs": {
        "accuracy_level": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "bits": {
          "type": "int",
          "value": [
            "4"
          ]
        },
        "block_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "K": {
          "type": "int",
          "value": [
            "3072"
          ]
        },
        "N": {
          "type": "int",
          "value": [
            "6144"
          ]
        },
        "default_shape": {
          "type": "int",
          "value": [
            "1"
          ]
        },
        "op_version": {
          "type": "str",
          "value": [
            "flat"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        }
      }
    },
    {
      "name": "layers.30.attn.v_proj",
      "type": "MladfMatMul",
      "in_args": [
        "/model/layers.30/input_layernorm/output_0.out4_59"
      ],
      "const_args": [
        "model.layers.30.attn.v_proj.MatMulNBits.qweight.preformat",
        "model.layers.30.attn.v_proj.MatMulNBits.bias.preformat",
        "model.layers.30.attn.v_proj.MatMulNBits.scales.preformat",
        "model.layers.30.attn.v_proj.MatMulNBits.qzeros.preformat"
      ],
      "out_args": [
        "present.30.value"
      ],
      "attrs": {
        "accuracy_level": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "bits": {
          "type": "int",
          "value": [
            "4"
          ]
        },
        "block_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "K": {
          "type": "int",
          "value": [
            "3072"
          ]
        },
        "N": {
          "type": "int",
          "value": [
            "3072"
          ]
        },
        "default_shape": {
          "type": "int",
          "value": [
            "1"
          ]
        },
        "op_version": {
          "type": "str",
          "value": [
            "flat"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "total_seq_len": {
          "type": "int",
          "value": [
            "4096"
          ]
        },
        "external_buffers": {
          "type": "int",
          "value": [
            "5",
            "0",
            "123",
            "61"
          ]
        },
        "update_tensor_offsets": {
          "type": "int",
          "value": [
            "5",
            "0",
            "0",
            "192"
          ]
        }
      }
    },
    {
      "name": "/model/layers.30/attn/GroupQueryAttention",
      "type": "FLATMHA",
      "in_args": [
        "/model/layers.30/attn/qk_proj/MatMulNBits/output_0.out5_4_90",
        "past_key_values.30.key",
        "past_key_values.30.value",
        "attention_mask_const_uint",
        "sin_cos_cache_token"
      ],
      "const_args": [],
      "out_args": [
        "/model/layers.30/attn/GroupQueryAttention/output_0.out2_30",
        "present.30.key"
      ],
      "attrs": {
        "num_heads": {
          "type": "int",
          "value": [
            "32"
          ]
        },
        "kv_num_heads": {
          "type": "int",
          "value": [
            "32"
          ]
        },
        "scale": {
          "type": "float",
          "value": [
            "0.10206207633018494"
          ]
        },
        "local_window_size": {
          "type": "int",
          "value": [
            "262144"
          ]
        },
        "softcap": {
          "type": "float",
          "value": [
            "0.0"
          ]
        },
        "do_rotary": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "rotary_interleaved": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "input_shape": {
          "type": "int",
          "value": [
            "32",
            "32",
            "1",
            "4096",
            "96"
          ]
        },
        "external_buffers": {
          "type": "int",
          "value": [
            "4",
            "1",
            "0",
            "0",
            "1",
            "0",
            "120",
            "60",
            "2",
            "0",
            "121",
            "61",
            "6",
            "0",
            "122",
            "60"
          ]
        },
        "update_tensor_offsets": {
          "type": "int",
          "value": [
            "4",
            "0",
            "0",
            "192",
            "6",
            "0",
            "0",
            "192"
          ]
        }
      }
    },
    {
      "name": "layers.30.attn.o_proj",
      "type": "MladfMatMul",
      "in_args": [
        "/model/layers.30/attn/GroupQueryAttention/output_0.out2_30"
      ],
      "const_args": [
        "model.layers.30.attn.o_proj.MatMulNBits.qweight.preformat",
        "model.layers.30.attn.o_proj.MatMulNBits.bias.preformat",
        "model.layers.30.attn.o_proj.MatMulNBits.scales.preformat",
        "model.layers.30.attn.o_proj.MatMulNBits.qzeros.preformat"
      ],
      "out_args": [
        "/model/layers.30/attn/o_proj/MatMulNBits/output_0.out5_4_91"
      ],
      "attrs": {
        "accuracy_level": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "bits": {
          "type": "int",
          "value": [
            "4"
          ]
        },
        "block_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "K": {
          "type": "int",
          "value": [
            "3072"
          ]
        },
        "N": {
          "type": "int",
          "value": [
            "3072"
          ]
        },
        "default_shape": {
          "type": "int",
          "value": [
            "1"
          ]
        },
        "op_version": {
          "type": "str",
          "value": [
            "flat"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        }
      }
    },
    {
      "name": "FlatRMSAdd_4_60",
      "type": "FlatRMSAdd",
      "in_args": [
        "/model/layers.30/input_layernorm/output_3.out4_59",
        "/model/layers.30/attn/o_proj/MatMulNBits/output_0.out5_4_91"
      ],
      "const_args": [
        "model.layers.30.post_attention_layernorm.weight.bf"
      ],
      "out_args": [
        "/model/layers.30/post_attention_layernorm/output_3.out4_60",
        "/model/layers.30/post_attention_layernorm/output_0.out4_60"
      ],
      "attrs": {
        "a_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "3072"
          ]
        },
        "in_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "bfloat16"
          ]
        },
        "out_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "bfloat16"
          ]
        },
        "c_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "3072"
          ]
        },
        "b_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "3072"
          ]
        },
        "is_gamma_ifm": {
          "type": "int",
          "value": [
            "1"
          ]
        }
      }
    },
    {
      "name": "FlatMLP_3_30",
      "type": "FlatMLP",
      "in_args": [
        "/model/layers.30/post_attention_layernorm/output_0.out4_60"
      ],
      "const_args": [
        "model.layers.30.mlp.gate_proj.MatMulNBits.qweight",
        "model.layers.30.mlp.gate_proj.MatMulNBits.scales.f",
        "model.layers.30.mlp.gate_proj.MatMulNBits.qzeros",
        "model.layers.30.mlp.gate_proj.MatMulNBits.bias.f",
        "model.layers.30.mlp.up_proj.MatMulNBits.qweight",
        "model.layers.30.mlp.up_proj.MatMulNBits.scales.f",
        "model.layers.30.mlp.up_proj.MatMulNBits.qzeros",
        "model.layers.30.mlp.up_proj.MatMulNBits.bias.f"
      ],
      "out_args": [
        "/model/layers.30/mlp/Mul/output_0.out3_30"
      ],
      "attrs": {
        "input_shape": {
          "type": "int",
          "value": [
            "1",
            "3072",
            "8192"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "in_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "uint8",
            "float",
            "uint8",
            "float",
            "uint8",
            "float",
            "uint8",
            "float"
          ]
        },
        "out_dtypes": {
          "type": "str",
          "value": [
            "bfloat16"
          ]
        }
      }
    },
    {
      "name": "layers.30.mlp.down_proj",
      "type": "MladfMatMul",
      "in_args": [
        "/model/layers.30/mlp/Mul/output_0.out3_30"
      ],
      "const_args": [
        "model.layers.30.mlp.down_proj.MatMulNBits.qweight.preformat",
        "model.layers.30.mlp.down_proj.MatMulNBits.bias.preformat",
        "model.layers.30.mlp.down_proj.MatMulNBits.scales.preformat",
        "model.layers.30.mlp.down_proj.MatMulNBits.qzeros.preformat"
      ],
      "out_args": [
        "/model/layers.30/mlp/down_proj/MatMulNBits/output_0.out5_4_92"
      ],
      "attrs": {
        "accuracy_level": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "bits": {
          "type": "int",
          "value": [
            "4"
          ]
        },
        "block_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "K": {
          "type": "int",
          "value": [
            "8192"
          ]
        },
        "N": {
          "type": "int",
          "value": [
            "3072"
          ]
        },
        "default_shape": {
          "type": "int",
          "value": [
            "1"
          ]
        },
        "op_version": {
          "type": "str",
          "value": [
            "flat"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        }
      }
    },
    {
      "name": "FlatRMSAdd_4_61",
      "type": "FlatRMSAdd",
      "in_args": [
        "/model/layers.30/post_attention_layernorm/output_3.out4_60",
        "/model/layers.30/mlp/down_proj/MatMulNBits/output_0.out5_4_92"
      ],
      "const_args": [
        "model.layers.31.input_layernorm.weight.bf"
      ],
      "out_args": [
        "/model/layers.31/input_layernorm/output_3.out4_61",
        "/model/layers.31/input_layernorm/output_0.out4_61"
      ],
      "attrs": {
        "a_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "3072"
          ]
        },
        "in_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "bfloat16"
          ]
        },
        "out_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "bfloat16"
          ]
        },
        "c_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "3072"
          ]
        },
        "b_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "3072"
          ]
        },
        "is_gamma_ifm": {
          "type": "int",
          "value": [
            "1"
          ]
        }
      }
    },
    {
      "name": "MatMulNBits_2_31",
      "type": "MladfMatMul",
      "in_args": [
        "/model/layers.31/input_layernorm/output_0.out4_61"
      ],
      "const_args": [
        "model.layers.31.attn.qk_proj.MatMulNBits.qweight.preformat",
        "model.layers.31.attn.qk_proj.MatMulNBits.bias.preformat",
        "model.layers.31.attn.qk_proj.MatMulNBits.scales.preformat",
        "model.layers.31.attn.qk_proj.MatMulNBits.qzeros.preformat"
      ],
      "out_args": [
        "/model/layers.31/attn/qk_proj/MatMulNBits/output_0.out5_4_93"
      ],
      "attrs": {
        "accuracy_level": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "bits": {
          "type": "int",
          "value": [
            "4"
          ]
        },
        "block_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "K": {
          "type": "int",
          "value": [
            "3072"
          ]
        },
        "N": {
          "type": "int",
          "value": [
            "6144"
          ]
        },
        "default_shape": {
          "type": "int",
          "value": [
            "1"
          ]
        },
        "op_version": {
          "type": "str",
          "value": [
            "flat"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        }
      }
    },
    {
      "name": "layers.31.attn.v_proj",
      "type": "MladfMatMul",
      "in_args": [
        "/model/layers.31/input_layernorm/output_0.out4_61"
      ],
      "const_args": [
        "model.layers.31.attn.v_proj.MatMulNBits.qweight.preformat",
        "model.layers.31.attn.v_proj.MatMulNBits.bias.preformat",
        "model.layers.31.attn.v_proj.MatMulNBits.scales.preformat",
        "model.layers.31.attn.v_proj.MatMulNBits.qzeros.preformat"
      ],
      "out_args": [
        "present.31.value"
      ],
      "attrs": {
        "accuracy_level": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "bits": {
          "type": "int",
          "value": [
            "4"
          ]
        },
        "block_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "K": {
          "type": "int",
          "value": [
            "3072"
          ]
        },
        "N": {
          "type": "int",
          "value": [
            "3072"
          ]
        },
        "default_shape": {
          "type": "int",
          "value": [
            "1"
          ]
        },
        "op_version": {
          "type": "str",
          "value": [
            "flat"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "total_seq_len": {
          "type": "int",
          "value": [
            "4096"
          ]
        },
        "external_buffers": {
          "type": "int",
          "value": [
            "5",
            "0",
            "127",
            "63"
          ]
        },
        "update_tensor_offsets": {
          "type": "int",
          "value": [
            "5",
            "0",
            "0",
            "192"
          ]
        }
      }
    },
    {
      "name": "/model/layers.31/attn/GroupQueryAttention",
      "type": "FLATMHA",
      "in_args": [
        "/model/layers.31/attn/qk_proj/MatMulNBits/output_0.out5_4_93",
        "past_key_values.31.key",
        "past_key_values.31.value",
        "attention_mask_const_uint",
        "sin_cos_cache_token"
      ],
      "const_args": [],
      "out_args": [
        "/model/layers.31/attn/GroupQueryAttention/output_0.out2_31",
        "present.31.key"
      ],
      "attrs": {
        "num_heads": {
          "type": "int",
          "value": [
            "32"
          ]
        },
        "kv_num_heads": {
          "type": "int",
          "value": [
            "32"
          ]
        },
        "scale": {
          "type": "float",
          "value": [
            "0.10206207633018494"
          ]
        },
        "local_window_size": {
          "type": "int",
          "value": [
            "262144"
          ]
        },
        "softcap": {
          "type": "float",
          "value": [
            "0.0"
          ]
        },
        "do_rotary": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "rotary_interleaved": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "input_shape": {
          "type": "int",
          "value": [
            "32",
            "32",
            "1",
            "4096",
            "96"
          ]
        },
        "external_buffers": {
          "type": "int",
          "value": [
            "4",
            "1",
            "0",
            "0",
            "1",
            "0",
            "124",
            "62",
            "2",
            "0",
            "125",
            "63",
            "6",
            "0",
            "126",
            "62"
          ]
        },
        "update_tensor_offsets": {
          "type": "int",
          "value": [
            "4",
            "0",
            "0",
            "192",
            "6",
            "0",
            "0",
            "192"
          ]
        }
      }
    },
    {
      "name": "layers.31.attn.o_proj",
      "type": "MladfMatMul",
      "in_args": [
        "/model/layers.31/attn/GroupQueryAttention/output_0.out2_31"
      ],
      "const_args": [
        "model.layers.31.attn.o_proj.MatMulNBits.qweight.preformat",
        "model.layers.31.attn.o_proj.MatMulNBits.bias.preformat",
        "model.layers.31.attn.o_proj.MatMulNBits.scales.preformat",
        "model.layers.31.attn.o_proj.MatMulNBits.qzeros.preformat"
      ],
      "out_args": [
        "/model/layers.31/attn/o_proj/MatMulNBits/output_0.out5_4_94"
      ],
      "attrs": {
        "accuracy_level": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "bits": {
          "type": "int",
          "value": [
            "4"
          ]
        },
        "block_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "K": {
          "type": "int",
          "value": [
            "3072"
          ]
        },
        "N": {
          "type": "int",
          "value": [
            "3072"
          ]
        },
        "default_shape": {
          "type": "int",
          "value": [
            "1"
          ]
        },
        "op_version": {
          "type": "str",
          "value": [
            "flat"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        }
      }
    },
    {
      "name": "FlatRMSAdd_4_62",
      "type": "FlatRMSAdd",
      "in_args": [
        "/model/layers.31/input_layernorm/output_3.out4_61",
        "/model/layers.31/attn/o_proj/MatMulNBits/output_0.out5_4_94"
      ],
      "const_args": [
        "model.layers.31.post_attention_layernorm.weight.bf"
      ],
      "out_args": [
        "/model/layers.31/post_attention_layernorm/output_3.out4_62",
        "/model/layers.31/post_attention_layernorm/output_0.out4_62"
      ],
      "attrs": {
        "a_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "3072"
          ]
        },
        "in_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "bfloat16"
          ]
        },
        "out_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "bfloat16"
          ]
        },
        "c_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "3072"
          ]
        },
        "b_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "3072"
          ]
        },
        "is_gamma_ifm": {
          "type": "int",
          "value": [
            "1"
          ]
        }
      }
    },
    {
      "name": "FlatMLP_3_31",
      "type": "FlatMLP",
      "in_args": [
        "/model/layers.31/post_attention_layernorm/output_0.out4_62"
      ],
      "const_args": [
        "model.layers.31.mlp.gate_proj.MatMulNBits.qweight",
        "model.layers.31.mlp.gate_proj.MatMulNBits.scales.f",
        "model.layers.31.mlp.gate_proj.MatMulNBits.qzeros",
        "model.layers.31.mlp.gate_proj.MatMulNBits.bias.f",
        "model.layers.31.mlp.up_proj.MatMulNBits.qweight",
        "model.layers.31.mlp.up_proj.MatMulNBits.scales.f",
        "model.layers.31.mlp.up_proj.MatMulNBits.qzeros",
        "model.layers.31.mlp.up_proj.MatMulNBits.bias.f"
      ],
      "out_args": [
        "/model/layers.31/mlp/Mul/output_0.out3_31"
      ],
      "attrs": {
        "input_shape": {
          "type": "int",
          "value": [
            "1",
            "3072",
            "8192"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "in_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "uint8",
            "float",
            "uint8",
            "float",
            "uint8",
            "float",
            "uint8",
            "float"
          ]
        },
        "out_dtypes": {
          "type": "str",
          "value": [
            "bfloat16"
          ]
        }
      }
    },
    {
      "name": "layers.31.mlp.down_proj",
      "type": "MladfMatMul",
      "in_args": [
        "/model/layers.31/mlp/Mul/output_0.out3_31"
      ],
      "const_args": [
        "model.layers.31.mlp.down_proj.MatMulNBits.qweight.preformat",
        "model.layers.31.mlp.down_proj.MatMulNBits.bias.preformat",
        "model.layers.31.mlp.down_proj.MatMulNBits.scales.preformat",
        "model.layers.31.mlp.down_proj.MatMulNBits.qzeros.preformat"
      ],
      "out_args": [
        "/model/layers.31/mlp/down_proj/MatMulNBits/output_0.out5_4_95"
      ],
      "attrs": {
        "accuracy_level": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "bits": {
          "type": "int",
          "value": [
            "4"
          ]
        },
        "block_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "K": {
          "type": "int",
          "value": [
            "8192"
          ]
        },
        "N": {
          "type": "int",
          "value": [
            "3072"
          ]
        },
        "default_shape": {
          "type": "int",
          "value": [
            "1"
          ]
        },
        "op_version": {
          "type": "str",
          "value": [
            "flat"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        }
      }
    },
    {
      "name": "FlatRMSAdd_4_63",
      "type": "FlatRMSAdd",
      "in_args": [
        "/model/layers.31/post_attention_layernorm/output_3.out4_62",
        "/model/layers.31/mlp/down_proj/MatMulNBits/output_0.out5_4_95"
      ],
      "const_args": [
        "model.layers.32.final_norm_layernorm.weight.bf"
      ],
      "out_args": [
        "/model/layers.32/final_norm_layernorm/output_0.dummy",
        "/model/layers.32/final_norm_layernorm/output_0.out4_63"
      ],
      "attrs": {
        "a_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "3072"
          ]
        },
        "in_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "bfloat16"
          ]
        },
        "out_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "bfloat16"
          ]
        },
        "c_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "3072"
          ]
        },
        "b_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "3072"
          ]
        },
        "is_gamma_ifm": {
          "type": "int",
          "value": [
            "1"
          ]
        }
      }
    },
    {
      "name": "/lm_head/MatMulNBits",
      "type": "MladfMatMul",
      "in_args": [
        "/model/layers.32/final_norm_layernorm/output_0.out4_63"
      ],
      "const_args": [
        "lm_head.MatMulNBits.qweight.preformat",
        "lm_head.MatMulNBits.bias.preformat",
        "lm_head.MatMulNBits.scales.preformat",
        "lm_head.MatMulNBits.qzeros.preformat"
      ],
      "out_args": [
        "logits.out5_4_96"
      ],
      "attrs": {
        "accuracy_level": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "bits": {
          "type": "int",
          "value": [
            "4"
          ]
        },
        "block_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "K": {
          "type": "int",
          "value": [
            "3072"
          ]
        },
        "N": {
          "type": "int",
          "value": [
            "32064"
          ]
        },
        "default_shape": {
          "type": "int",
          "value": [
            "1"
          ]
        },
        "op_version": {
          "type": "str",
          "value": [
            "flat"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        }
      }
    }
  ],
  "fused_tensors": {
    "in": {
      "buffer_size": 18560,
      "xrt_arg_id": 0,
      "packed_tensors": [
        "/model/layers.0/input_layernorm/output_0.out5_4_0",
        "attention_mask_const_uint",
        "/model/embed_tokens/Gather/output_0.out4_0"
      ]
    },
    "out": {
      "buffer_size": 70272,
      "xrt_arg_id": 1,
      "packed_tensors": [
        "/model/layers.32/final_norm_layernorm/output_0.dummy",
        "logits.out5_4_96"
      ]
    },
    "scratch": {
      "buffer_size": 2287616,
      "xrt_arg_id": 2,
      "packed_tensors": [
        "/model/layers.0/attn/qk_proj/MatMulNBits/output_0.out5_4_0",
        "/model/layers.0/attn/GroupQueryAttention/output_0.out2_0",
        "/model/layers.0/attn/o_proj/MatMulNBits/output_0.out5_4_1",
        "/model/layers.0/post_attention_layernorm/output_3.out4_0",
        "/model/layers.0/post_attention_layernorm/output_0.out4_0",
        "/model/layers.0/mlp/Mul/output_0.out3_0",
        "/model/layers.0/mlp/down_proj/MatMulNBits/output_0.out5_4_2",
        "/model/layers.1/input_layernorm/output_3.out4_1",
        "/model/layers.1/input_layernorm/output_0.out4_1",
        "/model/layers.1/attn/qk_proj/MatMulNBits/output_0.out5_4_3",
        "/model/layers.1/attn/GroupQueryAttention/output_0.out2_1",
        "/model/layers.1/attn/o_proj/MatMulNBits/output_0.out5_4_4",
        "/model/layers.1/post_attention_layernorm/output_3.out4_2",
        "/model/layers.1/post_attention_layernorm/output_0.out4_2",
        "/model/layers.1/mlp/Mul/output_0.out3_1",
        "/model/layers.1/mlp/down_proj/MatMulNBits/output_0.out5_4_5",
        "/model/layers.2/input_layernorm/output_3.out4_3",
        "/model/layers.2/input_layernorm/output_0.out4_3",
        "/model/layers.2/attn/qk_proj/MatMulNBits/output_0.out5_4_6",
        "/model/layers.2/attn/GroupQueryAttention/output_0.out2_2",
        "/model/layers.2/attn/o_proj/MatMulNBits/output_0.out5_4_7",
        "/model/layers.2/post_attention_layernorm/output_3.out4_4",
        "/model/layers.2/post_attention_layernorm/output_0.out4_4",
        "/model/layers.2/mlp/Mul/output_0.out3_2",
        "/model/layers.2/mlp/down_proj/MatMulNBits/output_0.out5_4_8",
        "/model/layers.3/input_layernorm/output_3.out4_5",
        "/model/layers.3/input_layernorm/output_0.out4_5",
        "/model/layers.3/attn/qk_proj/MatMulNBits/output_0.out5_4_9",
        "/model/layers.3/attn/GroupQueryAttention/output_0.out2_3",
        "/model/layers.3/attn/o_proj/MatMulNBits/output_0.out5_4_10",
        "/model/layers.3/post_attention_layernorm/output_3.out4_6",
        "/model/layers.3/post_attention_layernorm/output_0.out4_6",
        "/model/layers.3/mlp/Mul/output_0.out3_3",
        "/model/layers.3/mlp/down_proj/MatMulNBits/output_0.out5_4_11",
        "/model/layers.4/input_layernorm/output_3.out4_7",
        "/model/layers.4/input_layernorm/output_0.out4_7",
        "/model/layers.4/attn/qk_proj/MatMulNBits/output_0.out5_4_12",
        "/model/layers.4/attn/GroupQueryAttention/output_0.out2_4",
        "/model/layers.4/attn/o_proj/MatMulNBits/output_0.out5_4_13",
        "/model/layers.4/post_attention_layernorm/output_3.out4_8",
        "/model/layers.4/post_attention_layernorm/output_0.out4_8",
        "/model/layers.4/mlp/Mul/output_0.out3_4",
        "/model/layers.4/mlp/down_proj/MatMulNBits/output_0.out5_4_14",
        "/model/layers.5/input_layernorm/output_3.out4_9",
        "/model/layers.5/input_layernorm/output_0.out4_9",
        "/model/layers.5/attn/qk_proj/MatMulNBits/output_0.out5_4_15",
        "/model/layers.5/attn/GroupQueryAttention/output_0.out2_5",
        "/model/layers.5/attn/o_proj/MatMulNBits/output_0.out5_4_16",
        "/model/layers.5/post_attention_layernorm/output_3.out4_10",
        "/model/layers.5/post_attention_layernorm/output_0.out4_10",
        "/model/layers.5/mlp/Mul/output_0.out3_5",
        "/model/layers.5/mlp/down_proj/MatMulNBits/output_0.out5_4_17",
        "/model/layers.6/input_layernorm/output_3.out4_11",
        "/model/layers.6/input_layernorm/output_0.out4_11",
        "/model/layers.6/attn/qk_proj/MatMulNBits/output_0.out5_4_18",
        "/model/layers.6/attn/GroupQueryAttention/output_0.out2_6",
        "/model/layers.6/attn/o_proj/MatMulNBits/output_0.out5_4_19",
        "/model/layers.6/post_attention_layernorm/output_3.out4_12",
        "/model/layers.6/post_attention_layernorm/output_0.out4_12",
        "/model/layers.6/mlp/Mul/output_0.out3_6",
        "/model/layers.6/mlp/down_proj/MatMulNBits/output_0.out5_4_20",
        "/model/layers.7/input_layernorm/output_3.out4_13",
        "/model/layers.7/input_layernorm/output_0.out4_13",
        "/model/layers.7/attn/qk_proj/MatMulNBits/output_0.out5_4_21",
        "/model/layers.7/attn/GroupQueryAttention/output_0.out2_7",
        "/model/layers.7/attn/o_proj/MatMulNBits/output_0.out5_4_22",
        "/model/layers.7/post_attention_layernorm/output_3.out4_14",
        "/model/layers.7/post_attention_layernorm/output_0.out4_14",
        "/model/layers.7/mlp/Mul/output_0.out3_7",
        "/model/layers.7/mlp/down_proj/MatMulNBits/output_0.out5_4_23",
        "/model/layers.8/input_layernorm/output_3.out4_15",
        "/model/layers.8/input_layernorm/output_0.out4_15",
        "/model/layers.8/attn/qk_proj/MatMulNBits/output_0.out5_4_24",
        "/model/layers.8/attn/GroupQueryAttention/output_0.out2_8",
        "/model/layers.8/attn/o_proj/MatMulNBits/output_0.out5_4_25",
        "/model/layers.8/post_attention_layernorm/output_3.out4_16",
        "/model/layers.8/post_attention_layernorm/output_0.out4_16",
        "/model/layers.8/mlp/Mul/output_0.out3_8",
        "/model/layers.8/mlp/down_proj/MatMulNBits/output_0.out5_4_26",
        "/model/layers.9/input_layernorm/output_3.out4_17",
        "/model/layers.9/input_layernorm/output_0.out4_17",
        "/model/layers.9/attn/qk_proj/MatMulNBits/output_0.out5_4_27",
        "/model/layers.9/attn/GroupQueryAttention/output_0.out2_9",
        "/model/layers.9/attn/o_proj/MatMulNBits/output_0.out5_4_28",
        "/model/layers.9/post_attention_layernorm/output_3.out4_18",
        "/model/layers.9/post_attention_layernorm/output_0.out4_18",
        "/model/layers.9/mlp/Mul/output_0.out3_9",
        "/model/layers.9/mlp/down_proj/MatMulNBits/output_0.out5_4_29",
        "/model/layers.10/input_layernorm/output_3.out4_19",
        "/model/layers.10/input_layernorm/output_0.out4_19",
        "/model/layers.10/attn/qk_proj/MatMulNBits/output_0.out5_4_30",
        "/model/layers.10/attn/GroupQueryAttention/output_0.out2_10",
        "/model/layers.10/attn/o_proj/MatMulNBits/output_0.out5_4_31",
        "/model/layers.10/post_attention_layernorm/output_3.out4_20",
        "/model/layers.10/post_attention_layernorm/output_0.out4_20",
        "/model/layers.10/mlp/Mul/output_0.out3_10",
        "/model/layers.10/mlp/down_proj/MatMulNBits/output_0.out5_4_32",
        "/model/layers.11/input_layernorm/output_3.out4_21",
        "/model/layers.11/input_layernorm/output_0.out4_21",
        "/model/layers.11/attn/qk_proj/MatMulNBits/output_0.out5_4_33",
        "/model/layers.11/attn/GroupQueryAttention/output_0.out2_11",
        "/model/layers.11/attn/o_proj/MatMulNBits/output_0.out5_4_34",
        "/model/layers.11/post_attention_layernorm/output_3.out4_22",
        "/model/layers.11/post_attention_layernorm/output_0.out4_22",
        "/model/layers.11/mlp/Mul/output_0.out3_11",
        "/model/layers.11/mlp/down_proj/MatMulNBits/output_0.out5_4_35",
        "/model/layers.12/input_layernorm/output_3.out4_23",
        "/model/layers.12/input_layernorm/output_0.out4_23",
        "/model/layers.12/attn/qk_proj/MatMulNBits/output_0.out5_4_36",
        "/model/layers.12/attn/GroupQueryAttention/output_0.out2_12",
        "/model/layers.12/attn/o_proj/MatMulNBits/output_0.out5_4_37",
        "/model/layers.12/post_attention_layernorm/output_3.out4_24",
        "/model/layers.12/post_attention_layernorm/output_0.out4_24",
        "/model/layers.12/mlp/Mul/output_0.out3_12",
        "/model/layers.12/mlp/down_proj/MatMulNBits/output_0.out5_4_38",
        "/model/layers.13/input_layernorm/output_3.out4_25",
        "/model/layers.13/input_layernorm/output_0.out4_25",
        "/model/layers.13/attn/qk_proj/MatMulNBits/output_0.out5_4_39",
        "/model/layers.13/attn/GroupQueryAttention/output_0.out2_13",
        "/model/layers.13/attn/o_proj/MatMulNBits/output_0.out5_4_40",
        "/model/layers.13/post_attention_layernorm/output_3.out4_26",
        "/model/layers.13/post_attention_layernorm/output_0.out4_26",
        "/model/layers.13/mlp/Mul/output_0.out3_13",
        "/model/layers.13/mlp/down_proj/MatMulNBits/output_0.out5_4_41",
        "/model/layers.14/input_layernorm/output_3.out4_27",
        "/model/layers.14/input_layernorm/output_0.out4_27",
        "/model/layers.14/attn/qk_proj/MatMulNBits/output_0.out5_4_42",
        "/model/layers.14/attn/GroupQueryAttention/output_0.out2_14",
        "/model/layers.14/attn/o_proj/MatMulNBits/output_0.out5_4_43",
        "/model/layers.14/post_attention_layernorm/output_3.out4_28",
        "/model/layers.14/post_attention_layernorm/output_0.out4_28",
        "/model/layers.14/mlp/Mul/output_0.out3_14",
        "/model/layers.14/mlp/down_proj/MatMulNBits/output_0.out5_4_44",
        "/model/layers.15/input_layernorm/output_3.out4_29",
        "/model/layers.15/input_layernorm/output_0.out4_29",
        "/model/layers.15/attn/qk_proj/MatMulNBits/output_0.out5_4_45",
        "/model/layers.15/attn/GroupQueryAttention/output_0.out2_15",
        "/model/layers.15/attn/o_proj/MatMulNBits/output_0.out5_4_46",
        "/model/layers.15/post_attention_layernorm/output_3.out4_30",
        "/model/layers.15/post_attention_layernorm/output_0.out4_30",
        "/model/layers.15/mlp/Mul/output_0.out3_15",
        "/model/layers.15/mlp/down_proj/MatMulNBits/output_0.out5_4_47",
        "/model/layers.16/input_layernorm/output_3.out4_31",
        "/model/layers.16/input_layernorm/output_0.out4_31",
        "/model/layers.16/attn/qk_proj/MatMulNBits/output_0.out5_4_48",
        "/model/layers.16/attn/GroupQueryAttention/output_0.out2_16",
        "/model/layers.16/attn/o_proj/MatMulNBits/output_0.out5_4_49",
        "/model/layers.16/post_attention_layernorm/output_3.out4_32",
        "/model/layers.16/post_attention_layernorm/output_0.out4_32",
        "/model/layers.16/mlp/Mul/output_0.out3_16",
        "/model/layers.16/mlp/down_proj/MatMulNBits/output_0.out5_4_50",
        "/model/layers.17/input_layernorm/output_3.out4_33",
        "/model/layers.17/input_layernorm/output_0.out4_33",
        "/model/layers.17/attn/qk_proj/MatMulNBits/output_0.out5_4_51",
        "/model/layers.17/attn/GroupQueryAttention/output_0.out2_17",
        "/model/layers.17/attn/o_proj/MatMulNBits/output_0.out5_4_52",
        "/model/layers.17/post_attention_layernorm/output_3.out4_34",
        "/model/layers.17/post_attention_layernorm/output_0.out4_34",
        "/model/layers.17/mlp/Mul/output_0.out3_17",
        "/model/layers.17/mlp/down_proj/MatMulNBits/output_0.out5_4_53",
        "/model/layers.18/input_layernorm/output_3.out4_35",
        "/model/layers.18/input_layernorm/output_0.out4_35",
        "/model/layers.18/attn/qk_proj/MatMulNBits/output_0.out5_4_54",
        "/model/layers.18/attn/GroupQueryAttention/output_0.out2_18",
        "/model/layers.18/attn/o_proj/MatMulNBits/output_0.out5_4_55",
        "/model/layers.18/post_attention_layernorm/output_3.out4_36",
        "/model/layers.18/post_attention_layernorm/output_0.out4_36",
        "/model/layers.18/mlp/Mul/output_0.out3_18",
        "/model/layers.18/mlp/down_proj/MatMulNBits/output_0.out5_4_56",
        "/model/layers.19/input_layernorm/output_3.out4_37",
        "/model/layers.19/input_layernorm/output_0.out4_37",
        "/model/layers.19/attn/qk_proj/MatMulNBits/output_0.out5_4_57",
        "/model/layers.19/attn/GroupQueryAttention/output_0.out2_19",
        "/model/layers.19/attn/o_proj/MatMulNBits/output_0.out5_4_58",
        "/model/layers.19/post_attention_layernorm/output_3.out4_38",
        "/model/layers.19/post_attention_layernorm/output_0.out4_38",
        "/model/layers.19/mlp/Mul/output_0.out3_19",
        "/model/layers.19/mlp/down_proj/MatMulNBits/output_0.out5_4_59",
        "/model/layers.20/input_layernorm/output_3.out4_39",
        "/model/layers.20/input_layernorm/output_0.out4_39",
        "/model/layers.20/attn/qk_proj/MatMulNBits/output_0.out5_4_60",
        "/model/layers.20/attn/GroupQueryAttention/output_0.out2_20",
        "/model/layers.20/attn/o_proj/MatMulNBits/output_0.out5_4_61",
        "/model/layers.20/post_attention_layernorm/output_3.out4_40",
        "/model/layers.20/post_attention_layernorm/output_0.out4_40",
        "/model/layers.20/mlp/Mul/output_0.out3_20",
        "/model/layers.20/mlp/down_proj/MatMulNBits/output_0.out5_4_62",
        "/model/layers.21/input_layernorm/output_3.out4_41",
        "/model/layers.21/input_layernorm/output_0.out4_41",
        "/model/layers.21/attn/qk_proj/MatMulNBits/output_0.out5_4_63",
        "/model/layers.21/attn/GroupQueryAttention/output_0.out2_21",
        "/model/layers.21/attn/o_proj/MatMulNBits/output_0.out5_4_64",
        "/model/layers.21/post_attention_layernorm/output_3.out4_42",
        "/model/layers.21/post_attention_layernorm/output_0.out4_42",
        "/model/layers.21/mlp/Mul/output_0.out3_21",
        "/model/layers.21/mlp/down_proj/MatMulNBits/output_0.out5_4_65",
        "/model/layers.22/input_layernorm/output_3.out4_43",
        "/model/layers.22/input_layernorm/output_0.out4_43",
        "/model/layers.22/attn/qk_proj/MatMulNBits/output_0.out5_4_66",
        "/model/layers.22/attn/GroupQueryAttention/output_0.out2_22",
        "/model/layers.22/attn/o_proj/MatMulNBits/output_0.out5_4_67",
        "/model/layers.22/post_attention_layernorm/output_3.out4_44",
        "/model/layers.22/post_attention_layernorm/output_0.out4_44",
        "/model/layers.22/mlp/Mul/output_0.out3_22",
        "/model/layers.22/mlp/down_proj/MatMulNBits/output_0.out5_4_68",
        "/model/layers.23/input_layernorm/output_3.out4_45",
        "/model/layers.23/input_layernorm/output_0.out4_45",
        "/model/layers.23/attn/qk_proj/MatMulNBits/output_0.out5_4_69",
        "/model/layers.23/attn/GroupQueryAttention/output_0.out2_23",
        "/model/layers.23/attn/o_proj/MatMulNBits/output_0.out5_4_70",
        "/model/layers.23/post_attention_layernorm/output_3.out4_46",
        "/model/layers.23/post_attention_layernorm/output_0.out4_46",
        "/model/layers.23/mlp/Mul/output_0.out3_23",
        "/model/layers.23/mlp/down_proj/MatMulNBits/output_0.out5_4_71",
        "/model/layers.24/input_layernorm/output_3.out4_47",
        "/model/layers.24/input_layernorm/output_0.out4_47",
        "/model/layers.24/attn/qk_proj/MatMulNBits/output_0.out5_4_72",
        "/model/layers.24/attn/GroupQueryAttention/output_0.out2_24",
        "/model/layers.24/attn/o_proj/MatMulNBits/output_0.out5_4_73",
        "/model/layers.24/post_attention_layernorm/output_3.out4_48",
        "/model/layers.24/post_attention_layernorm/output_0.out4_48",
        "/model/layers.24/mlp/Mul/output_0.out3_24",
        "/model/layers.24/mlp/down_proj/MatMulNBits/output_0.out5_4_74",
        "/model/layers.25/input_layernorm/output_3.out4_49",
        "/model/layers.25/input_layernorm/output_0.out4_49",
        "/model/layers.25/attn/qk_proj/MatMulNBits/output_0.out5_4_75",
        "/model/layers.25/attn/GroupQueryAttention/output_0.out2_25",
        "/model/layers.25/attn/o_proj/MatMulNBits/output_0.out5_4_76",
        "/model/layers.25/post_attention_layernorm/output_3.out4_50",
        "/model/layers.25/post_attention_layernorm/output_0.out4_50",
        "/model/layers.25/mlp/Mul/output_0.out3_25",
        "/model/layers.25/mlp/down_proj/MatMulNBits/output_0.out5_4_77",
        "/model/layers.26/input_layernorm/output_3.out4_51",
        "/model/layers.26/input_layernorm/output_0.out4_51",
        "/model/layers.26/attn/qk_proj/MatMulNBits/output_0.out5_4_78",
        "/model/layers.26/attn/GroupQueryAttention/output_0.out2_26",
        "/model/layers.26/attn/o_proj/MatMulNBits/output_0.out5_4_79",
        "/model/layers.26/post_attention_layernorm/output_3.out4_52",
        "/model/layers.26/post_attention_layernorm/output_0.out4_52",
        "/model/layers.26/mlp/Mul/output_0.out3_26",
        "/model/layers.26/mlp/down_proj/MatMulNBits/output_0.out5_4_80",
        "/model/layers.27/input_layernorm/output_3.out4_53",
        "/model/layers.27/input_layernorm/output_0.out4_53",
        "/model/layers.27/attn/qk_proj/MatMulNBits/output_0.out5_4_81",
        "/model/layers.27/attn/GroupQueryAttention/output_0.out2_27",
        "/model/layers.27/attn/o_proj/MatMulNBits/output_0.out5_4_82",
        "/model/layers.27/post_attention_layernorm/output_3.out4_54",
        "/model/layers.27/post_attention_layernorm/output_0.out4_54",
        "/model/layers.27/mlp/Mul/output_0.out3_27",
        "/model/layers.27/mlp/down_proj/MatMulNBits/output_0.out5_4_83",
        "/model/layers.28/input_layernorm/output_3.out4_55",
        "/model/layers.28/input_layernorm/output_0.out4_55",
        "/model/layers.28/attn/qk_proj/MatMulNBits/output_0.out5_4_84",
        "/model/layers.28/attn/GroupQueryAttention/output_0.out2_28",
        "/model/layers.28/attn/o_proj/MatMulNBits/output_0.out5_4_85",
        "/model/layers.28/post_attention_layernorm/output_3.out4_56",
        "/model/layers.28/post_attention_layernorm/output_0.out4_56",
        "/model/layers.28/mlp/Mul/output_0.out3_28",
        "/model/layers.28/mlp/down_proj/MatMulNBits/output_0.out5_4_86",
        "/model/layers.29/input_layernorm/output_3.out4_57",
        "/model/layers.29/input_layernorm/output_0.out4_57",
        "/model/layers.29/attn/qk_proj/MatMulNBits/output_0.out5_4_87",
        "/model/layers.29/attn/GroupQueryAttention/output_0.out2_29",
        "/model/layers.29/attn/o_proj/MatMulNBits/output_0.out5_4_88",
        "/model/layers.29/post_attention_layernorm/output_3.out4_58",
        "/model/layers.29/post_attention_layernorm/output_0.out4_58",
        "/model/layers.29/mlp/Mul/output_0.out3_29",
        "/model/layers.29/mlp/down_proj/MatMulNBits/output_0.out5_4_89",
        "/model/layers.30/input_layernorm/output_3.out4_59",
        "/model/layers.30/input_layernorm/output_0.out4_59",
        "/model/layers.30/attn/qk_proj/MatMulNBits/output_0.out5_4_90",
        "/model/layers.30/attn/GroupQueryAttention/output_0.out2_30",
        "/model/layers.30/attn/o_proj/MatMulNBits/output_0.out5_4_91",
        "/model/layers.30/post_attention_layernorm/output_3.out4_60",
        "/model/layers.30/post_attention_layernorm/output_0.out4_60",
        "/model/layers.30/mlp/Mul/output_0.out3_30",
        "/model/layers.30/mlp/down_proj/MatMulNBits/output_0.out5_4_92",
        "/model/layers.31/input_layernorm/output_3.out4_61",
        "/model/layers.31/input_layernorm/output_0.out4_61",
        "/model/layers.31/attn/qk_proj/MatMulNBits/output_0.out5_4_93",
        "/model/layers.31/attn/GroupQueryAttention/output_0.out2_31",
        "/model/layers.31/attn/o_proj/MatMulNBits/output_0.out5_4_94",
        "/model/layers.31/post_attention_layernorm/output_3.out4_62",
        "/model/layers.31/post_attention_layernorm/output_0.out4_62",
        "/model/layers.31/mlp/Mul/output_0.out3_31",
        "/model/layers.31/mlp/down_proj/MatMulNBits/output_0.out5_4_95",
        "/model/layers.32/final_norm_layernorm/output_0.out4_63"
      ]
    },
    "const": {
      "buffer_size": 3060771584,
      "xrt_arg_id": 3,
      "packed_tensors": [
        "model.layers.0.attn.qk_proj.MatMulNBits.qweight.preformat",
        "model.layers.0.attn.qk_proj.MatMulNBits.bias.preformat",
        "model.layers.0.attn.qk_proj.MatMulNBits.scales.preformat",
        "model.layers.0.attn.qk_proj.MatMulNBits.qzeros.preformat",
        "model.layers.0.attn.v_proj.MatMulNBits.qweight.preformat",
        "model.layers.0.attn.v_proj.MatMulNBits.bias.preformat",
        "model.layers.0.attn.v_proj.MatMulNBits.scales.preformat",
        "model.layers.0.attn.v_proj.MatMulNBits.qzeros.preformat",
        "model.layers.0.attn.o_proj.MatMulNBits.qweight.preformat",
        "model.layers.0.attn.o_proj.MatMulNBits.bias.preformat",
        "model.layers.0.attn.o_proj.MatMulNBits.scales.preformat",
        "model.layers.0.attn.o_proj.MatMulNBits.qzeros.preformat",
        "model.layers.0.post_attention_layernorm.weight.bf",
        "model.layers.0.mlp.gate_proj.MatMulNBits.qweight",
        "model.layers.0.mlp.gate_proj.MatMulNBits.scales.f",
        "model.layers.0.mlp.gate_proj.MatMulNBits.qzeros",
        "model.layers.0.mlp.gate_proj.MatMulNBits.bias.f",
        "model.layers.0.mlp.up_proj.MatMulNBits.qweight",
        "model.layers.0.mlp.up_proj.MatMulNBits.scales.f",
        "model.layers.0.mlp.up_proj.MatMulNBits.qzeros",
        "model.layers.0.mlp.up_proj.MatMulNBits.bias.f",
        "model.layers.0.mlp.down_proj.MatMulNBits.qweight.preformat",
        "model.layers.0.mlp.down_proj.MatMulNBits.bias.preformat",
        "model.layers.0.mlp.down_proj.MatMulNBits.scales.preformat",
        "model.layers.0.mlp.down_proj.MatMulNBits.qzeros.preformat",
        "model.layers.1.input_layernorm.weight.bf",
        "model.layers.1.attn.qk_proj.MatMulNBits.qweight.preformat",
        "model.layers.1.attn.qk_proj.MatMulNBits.bias.preformat",
        "model.layers.1.attn.qk_proj.MatMulNBits.scales.preformat",
        "model.layers.1.attn.qk_proj.MatMulNBits.qzeros.preformat",
        "model.layers.1.attn.v_proj.MatMulNBits.qweight.preformat",
        "model.layers.1.attn.v_proj.MatMulNBits.bias.preformat",
        "model.layers.1.attn.v_proj.MatMulNBits.scales.preformat",
        "model.layers.1.attn.v_proj.MatMulNBits.qzeros.preformat",
        "model.layers.1.attn.o_proj.MatMulNBits.qweight.preformat",
        "model.layers.1.attn.o_proj.MatMulNBits.bias.preformat",
        "model.layers.1.attn.o_proj.MatMulNBits.scales.preformat",
        "model.layers.1.attn.o_proj.MatMulNBits.qzeros.preformat",
        "model.layers.1.post_attention_layernorm.weight.bf",
        "model.layers.1.mlp.gate_proj.MatMulNBits.qweight",
        "model.layers.1.mlp.gate_proj.MatMulNBits.scales.f",
        "model.layers.1.mlp.gate_proj.MatMulNBits.qzeros",
        "model.layers.1.mlp.gate_proj.MatMulNBits.bias.f",
        "model.layers.1.mlp.up_proj.MatMulNBits.qweight",
        "model.layers.1.mlp.up_proj.MatMulNBits.scales.f",
        "model.layers.1.mlp.up_proj.MatMulNBits.qzeros",
        "model.layers.1.mlp.up_proj.MatMulNBits.bias.f",
        "model.layers.1.mlp.down_proj.MatMulNBits.qweight.preformat",
        "model.layers.1.mlp.down_proj.MatMulNBits.bias.preformat",
        "model.layers.1.mlp.down_proj.MatMulNBits.scales.preformat",
        "model.layers.1.mlp.down_proj.MatMulNBits.qzeros.preformat",
        "model.layers.2.input_layernorm.weight.bf",
        "model.layers.2.attn.qk_proj.MatMulNBits.qweight.preformat",
        "model.layers.2.attn.qk_proj.MatMulNBits.bias.preformat",
        "model.layers.2.attn.qk_proj.MatMulNBits.scales.preformat",
        "model.layers.2.attn.qk_proj.MatMulNBits.qzeros.preformat",
        "model.layers.2.attn.v_proj.MatMulNBits.qweight.preformat",
        "model.layers.2.attn.v_proj.MatMulNBits.bias.preformat",
        "model.layers.2.attn.v_proj.MatMulNBits.scales.preformat",
        "model.layers.2.attn.v_proj.MatMulNBits.qzeros.preformat",
        "model.layers.2.attn.o_proj.MatMulNBits.qweight.preformat",
        "model.layers.2.attn.o_proj.MatMulNBits.bias.preformat",
        "model.layers.2.attn.o_proj.MatMulNBits.scales.preformat",
        "model.layers.2.attn.o_proj.MatMulNBits.qzeros.preformat",
        "model.layers.2.post_attention_layernorm.weight.bf",
        "model.layers.2.mlp.gate_proj.MatMulNBits.qweight",
        "model.layers.2.mlp.gate_proj.MatMulNBits.scales.f",
        "model.layers.2.mlp.gate_proj.MatMulNBits.qzeros",
        "model.layers.2.mlp.gate_proj.MatMulNBits.bias.f",
        "model.layers.2.mlp.up_proj.MatMulNBits.qweight",
        "model.layers.2.mlp.up_proj.MatMulNBits.scales.f",
        "model.layers.2.mlp.up_proj.MatMulNBits.qzeros",
        "model.layers.2.mlp.up_proj.MatMulNBits.bias.f",
        "model.layers.2.mlp.down_proj.MatMulNBits.qweight.preformat",
        "model.layers.2.mlp.down_proj.MatMulNBits.bias.preformat",
        "model.layers.2.mlp.down_proj.MatMulNBits.scales.preformat",
        "model.layers.2.mlp.down_proj.MatMulNBits.qzeros.preformat",
        "model.layers.3.input_layernorm.weight.bf",
        "model.layers.3.attn.qk_proj.MatMulNBits.qweight.preformat",
        "model.layers.3.attn.qk_proj.MatMulNBits.bias.preformat",
        "model.layers.3.attn.qk_proj.MatMulNBits.scales.preformat",
        "model.layers.3.attn.qk_proj.MatMulNBits.qzeros.preformat",
        "model.layers.3.attn.v_proj.MatMulNBits.qweight.preformat",
        "model.layers.3.attn.v_proj.MatMulNBits.bias.preformat",
        "model.layers.3.attn.v_proj.MatMulNBits.scales.preformat",
        "model.layers.3.attn.v_proj.MatMulNBits.qzeros.preformat",
        "model.layers.3.attn.o_proj.MatMulNBits.qweight.preformat",
        "model.layers.3.attn.o_proj.MatMulNBits.bias.preformat",
        "model.layers.3.attn.o_proj.MatMulNBits.scales.preformat",
        "model.layers.3.attn.o_proj.MatMulNBits.qzeros.preformat",
        "model.layers.3.post_attention_layernorm.weight.bf",
        "model.layers.3.mlp.gate_proj.MatMulNBits.qweight",
        "model.layers.3.mlp.gate_proj.MatMulNBits.scales.f",
        "model.layers.3.mlp.gate_proj.MatMulNBits.qzeros",
        "model.layers.3.mlp.gate_proj.MatMulNBits.bias.f",
        "model.layers.3.mlp.up_proj.MatMulNBits.qweight",
        "model.layers.3.mlp.up_proj.MatMulNBits.scales.f",
        "model.layers.3.mlp.up_proj.MatMulNBits.qzeros",
        "model.layers.3.mlp.up_proj.MatMulNBits.bias.f",
        "model.layers.3.mlp.down_proj.MatMulNBits.qweight.preformat",
        "model.layers.3.mlp.down_proj.MatMulNBits.bias.preformat",
        "model.layers.3.mlp.down_proj.MatMulNBits.scales.preformat",
        "model.layers.3.mlp.down_proj.MatMulNBits.qzeros.preformat",
        "model.layers.4.input_layernorm.weight.bf",
        "model.layers.4.attn.qk_proj.MatMulNBits.qweight.preformat",
        "model.layers.4.attn.qk_proj.MatMulNBits.bias.preformat",
        "model.layers.4.attn.qk_proj.MatMulNBits.scales.preformat",
        "model.layers.4.attn.qk_proj.MatMulNBits.qzeros.preformat",
        "model.layers.4.attn.v_proj.MatMulNBits.qweight.preformat",
        "model.layers.4.attn.v_proj.MatMulNBits.bias.preformat",
        "model.layers.4.attn.v_proj.MatMulNBits.scales.preformat",
        "model.layers.4.attn.v_proj.MatMulNBits.qzeros.preformat",
        "model.layers.4.attn.o_proj.MatMulNBits.qweight.preformat",
        "model.layers.4.attn.o_proj.MatMulNBits.bias.preformat",
        "model.layers.4.attn.o_proj.MatMulNBits.scales.preformat",
        "model.layers.4.attn.o_proj.MatMulNBits.qzeros.preformat",
        "model.layers.4.post_attention_layernorm.weight.bf",
        "model.layers.4.mlp.gate_proj.MatMulNBits.qweight",
        "model.layers.4.mlp.gate_proj.MatMulNBits.scales.f",
        "model.layers.4.mlp.gate_proj.MatMulNBits.qzeros",
        "model.layers.4.mlp.gate_proj.MatMulNBits.bias.f",
        "model.layers.4.mlp.up_proj.MatMulNBits.qweight",
        "model.layers.4.mlp.up_proj.MatMulNBits.scales.f",
        "model.layers.4.mlp.up_proj.MatMulNBits.qzeros",
        "model.layers.4.mlp.up_proj.MatMulNBits.bias.f",
        "model.layers.4.mlp.down_proj.MatMulNBits.qweight.preformat",
        "model.layers.4.mlp.down_proj.MatMulNBits.bias.preformat",
        "model.layers.4.mlp.down_proj.MatMulNBits.scales.preformat",
        "model.layers.4.mlp.down_proj.MatMulNBits.qzeros.preformat",
        "model.layers.5.input_layernorm.weight.bf",
        "model.layers.5.attn.qk_proj.MatMulNBits.qweight.preformat",
        "model.layers.5.attn.qk_proj.MatMulNBits.bias.preformat",
        "model.layers.5.attn.qk_proj.MatMulNBits.scales.preformat",
        "model.layers.5.attn.qk_proj.MatMulNBits.qzeros.preformat",
        "model.layers.5.attn.v_proj.MatMulNBits.qweight.preformat",
        "model.layers.5.attn.v_proj.MatMulNBits.bias.preformat",
        "model.layers.5.attn.v_proj.MatMulNBits.scales.preformat",
        "model.layers.5.attn.v_proj.MatMulNBits.qzeros.preformat",
        "model.layers.5.attn.o_proj.MatMulNBits.qweight.preformat",
        "model.layers.5.attn.o_proj.MatMulNBits.bias.preformat",
        "model.layers.5.attn.o_proj.MatMulNBits.scales.preformat",
        "model.layers.5.attn.o_proj.MatMulNBits.qzeros.preformat",
        "model.layers.5.post_attention_layernorm.weight.bf",
        "model.layers.5.mlp.gate_proj.MatMulNBits.qweight",
        "model.layers.5.mlp.gate_proj.MatMulNBits.scales.f",
        "model.layers.5.mlp.gate_proj.MatMulNBits.qzeros",
        "model.layers.5.mlp.gate_proj.MatMulNBits.bias.f",
        "model.layers.5.mlp.up_proj.MatMulNBits.qweight",
        "model.layers.5.mlp.up_proj.MatMulNBits.scales.f",
        "model.layers.5.mlp.up_proj.MatMulNBits.qzeros",
        "model.layers.5.mlp.up_proj.MatMulNBits.bias.f",
        "model.layers.5.mlp.down_proj.MatMulNBits.qweight.preformat",
        "model.layers.5.mlp.down_proj.MatMulNBits.bias.preformat",
        "model.layers.5.mlp.down_proj.MatMulNBits.scales.preformat",
        "model.layers.5.mlp.down_proj.MatMulNBits.qzeros.preformat",
        "model.layers.6.input_layernorm.weight.bf",
        "model.layers.6.attn.qk_proj.MatMulNBits.qweight.preformat",
        "model.layers.6.attn.qk_proj.MatMulNBits.bias.preformat",
        "model.layers.6.attn.qk_proj.MatMulNBits.scales.preformat",
        "model.layers.6.attn.qk_proj.MatMulNBits.qzeros.preformat",
        "model.layers.6.attn.v_proj.MatMulNBits.qweight.preformat",
        "model.layers.6.attn.v_proj.MatMulNBits.bias.preformat",
        "model.layers.6.attn.v_proj.MatMulNBits.scales.preformat",
        "model.layers.6.attn.v_proj.MatMulNBits.qzeros.preformat",
        "model.layers.6.attn.o_proj.MatMulNBits.qweight.preformat",
        "model.layers.6.attn.o_proj.MatMulNBits.bias.preformat",
        "model.layers.6.attn.o_proj.MatMulNBits.scales.preformat",
        "model.layers.6.attn.o_proj.MatMulNBits.qzeros.preformat",
        "model.layers.6.post_attention_layernorm.weight.bf",
        "model.layers.6.mlp.gate_proj.MatMulNBits.qweight",
        "model.layers.6.mlp.gate_proj.MatMulNBits.scales.f",
        "model.layers.6.mlp.gate_proj.MatMulNBits.qzeros",
        "model.layers.6.mlp.gate_proj.MatMulNBits.bias.f",
        "model.layers.6.mlp.up_proj.MatMulNBits.qweight",
        "model.layers.6.mlp.up_proj.MatMulNBits.scales.f",
        "model.layers.6.mlp.up_proj.MatMulNBits.qzeros",
        "model.layers.6.mlp.up_proj.MatMulNBits.bias.f",
        "model.layers.6.mlp.down_proj.MatMulNBits.qweight.preformat",
        "model.layers.6.mlp.down_proj.MatMulNBits.bias.preformat",
        "model.layers.6.mlp.down_proj.MatMulNBits.scales.preformat",
        "model.layers.6.mlp.down_proj.MatMulNBits.qzeros.preformat",
        "model.layers.7.input_layernorm.weight.bf",
        "model.layers.7.attn.qk_proj.MatMulNBits.qweight.preformat",
        "model.layers.7.attn.qk_proj.MatMulNBits.bias.preformat",
        "model.layers.7.attn.qk_proj.MatMulNBits.scales.preformat",
        "model.layers.7.attn.qk_proj.MatMulNBits.qzeros.preformat",
        "model.layers.7.attn.v_proj.MatMulNBits.qweight.preformat",
        "model.layers.7.attn.v_proj.MatMulNBits.bias.preformat",
        "model.layers.7.attn.v_proj.MatMulNBits.scales.preformat",
        "model.layers.7.attn.v_proj.MatMulNBits.qzeros.preformat",
        "model.layers.7.attn.o_proj.MatMulNBits.qweight.preformat",
        "model.layers.7.attn.o_proj.MatMulNBits.bias.preformat",
        "model.layers.7.attn.o_proj.MatMulNBits.scales.preformat",
        "model.layers.7.attn.o_proj.MatMulNBits.qzeros.preformat",
        "model.layers.7.post_attention_layernorm.weight.bf",
        "model.layers.7.mlp.gate_proj.MatMulNBits.qweight",
        "model.layers.7.mlp.gate_proj.MatMulNBits.scales.f",
        "model.layers.7.mlp.gate_proj.MatMulNBits.qzeros",
        "model.layers.7.mlp.gate_proj.MatMulNBits.bias.f",
        "model.layers.7.mlp.up_proj.MatMulNBits.qweight",
        "model.layers.7.mlp.up_proj.MatMulNBits.scales.f",
        "model.layers.7.mlp.up_proj.MatMulNBits.qzeros",
        "model.layers.7.mlp.up_proj.MatMulNBits.bias.f",
        "model.layers.7.mlp.down_proj.MatMulNBits.qweight.preformat",
        "model.layers.7.mlp.down_proj.MatMulNBits.bias.preformat",
        "model.layers.7.mlp.down_proj.MatMulNBits.scales.preformat",
        "model.layers.7.mlp.down_proj.MatMulNBits.qzeros.preformat",
        "model.layers.8.input_layernorm.weight.bf",
        "model.layers.8.attn.qk_proj.MatMulNBits.qweight.preformat",
        "model.layers.8.attn.qk_proj.MatMulNBits.bias.preformat",
        "model.layers.8.attn.qk_proj.MatMulNBits.scales.preformat",
        "model.layers.8.attn.qk_proj.MatMulNBits.qzeros.preformat",
        "model.layers.8.attn.v_proj.MatMulNBits.qweight.preformat",
        "model.layers.8.attn.v_proj.MatMulNBits.bias.preformat",
        "model.layers.8.attn.v_proj.MatMulNBits.scales.preformat",
        "model.layers.8.attn.v_proj.MatMulNBits.qzeros.preformat",
        "model.layers.8.attn.o_proj.MatMulNBits.qweight.preformat",
        "model.layers.8.attn.o_proj.MatMulNBits.bias.preformat",
        "model.layers.8.attn.o_proj.MatMulNBits.scales.preformat",
        "model.layers.8.attn.o_proj.MatMulNBits.qzeros.preformat",
        "model.layers.8.post_attention_layernorm.weight.bf",
        "model.layers.8.mlp.gate_proj.MatMulNBits.qweight",
        "model.layers.8.mlp.gate_proj.MatMulNBits.scales.f",
        "model.layers.8.mlp.gate_proj.MatMulNBits.qzeros",
        "model.layers.8.mlp.gate_proj.MatMulNBits.bias.f",
        "model.layers.8.mlp.up_proj.MatMulNBits.qweight",
        "model.layers.8.mlp.up_proj.MatMulNBits.scales.f",
        "model.layers.8.mlp.up_proj.MatMulNBits.qzeros",
        "model.layers.8.mlp.up_proj.MatMulNBits.bias.f",
        "model.layers.8.mlp.down_proj.MatMulNBits.qweight.preformat",
        "model.layers.8.mlp.down_proj.MatMulNBits.bias.preformat",
        "model.layers.8.mlp.down_proj.MatMulNBits.scales.preformat",
        "model.layers.8.mlp.down_proj.MatMulNBits.qzeros.preformat",
        "model.layers.9.input_layernorm.weight.bf",
        "model.layers.9.attn.qk_proj.MatMulNBits.qweight.preformat",
        "model.layers.9.attn.qk_proj.MatMulNBits.bias.preformat",
        "model.layers.9.attn.qk_proj.MatMulNBits.scales.preformat",
        "model.layers.9.attn.qk_proj.MatMulNBits.qzeros.preformat",
        "model.layers.9.attn.v_proj.MatMulNBits.qweight.preformat",
        "model.layers.9.attn.v_proj.MatMulNBits.bias.preformat",
        "model.layers.9.attn.v_proj.MatMulNBits.scales.preformat",
        "model.layers.9.attn.v_proj.MatMulNBits.qzeros.preformat",
        "model.layers.9.attn.o_proj.MatMulNBits.qweight.preformat",
        "model.layers.9.attn.o_proj.MatMulNBits.bias.preformat",
        "model.layers.9.attn.o_proj.MatMulNBits.scales.preformat",
        "model.layers.9.attn.o_proj.MatMulNBits.qzeros.preformat",
        "model.layers.9.post_attention_layernorm.weight.bf",
        "model.layers.9.mlp.gate_proj.MatMulNBits.qweight",
        "model.layers.9.mlp.gate_proj.MatMulNBits.scales.f",
        "model.layers.9.mlp.gate_proj.MatMulNBits.qzeros",
        "model.layers.9.mlp.gate_proj.MatMulNBits.bias.f",
        "model.layers.9.mlp.up_proj.MatMulNBits.qweight",
        "model.layers.9.mlp.up_proj.MatMulNBits.scales.f",
        "model.layers.9.mlp.up_proj.MatMulNBits.qzeros",
        "model.layers.9.mlp.up_proj.MatMulNBits.bias.f",
        "model.layers.9.mlp.down_proj.MatMulNBits.qweight.preformat",
        "model.layers.9.mlp.down_proj.MatMulNBits.bias.preformat",
        "model.layers.9.mlp.down_proj.MatMulNBits.scales.preformat",
        "model.layers.9.mlp.down_proj.MatMulNBits.qzeros.preformat",
        "model.layers.10.input_layernorm.weight.bf",
        "model.layers.10.attn.qk_proj.MatMulNBits.qweight.preformat",
        "model.layers.10.attn.qk_proj.MatMulNBits.bias.preformat",
        "model.layers.10.attn.qk_proj.MatMulNBits.scales.preformat",
        "model.layers.10.attn.qk_proj.MatMulNBits.qzeros.preformat",
        "model.layers.10.attn.v_proj.MatMulNBits.qweight.preformat",
        "model.layers.10.attn.v_proj.MatMulNBits.bias.preformat",
        "model.layers.10.attn.v_proj.MatMulNBits.scales.preformat",
        "model.layers.10.attn.v_proj.MatMulNBits.qzeros.preformat",
        "model.layers.10.attn.o_proj.MatMulNBits.qweight.preformat",
        "model.layers.10.attn.o_proj.MatMulNBits.bias.preformat",
        "model.layers.10.attn.o_proj.MatMulNBits.scales.preformat",
        "model.layers.10.attn.o_proj.MatMulNBits.qzeros.preformat",
        "model.layers.10.post_attention_layernorm.weight.bf",
        "model.layers.10.mlp.gate_proj.MatMulNBits.qweight",
        "model.layers.10.mlp.gate_proj.MatMulNBits.scales.f",
        "model.layers.10.mlp.gate_proj.MatMulNBits.qzeros",
        "model.layers.10.mlp.gate_proj.MatMulNBits.bias.f",
        "model.layers.10.mlp.up_proj.MatMulNBits.qweight",
        "model.layers.10.mlp.up_proj.MatMulNBits.scales.f",
        "model.layers.10.mlp.up_proj.MatMulNBits.qzeros",
        "model.layers.10.mlp.up_proj.MatMulNBits.bias.f",
        "model.layers.10.mlp.down_proj.MatMulNBits.qweight.preformat",
        "model.layers.10.mlp.down_proj.MatMulNBits.bias.preformat",
        "model.layers.10.mlp.down_proj.MatMulNBits.scales.preformat",
        "model.layers.10.mlp.down_proj.MatMulNBits.qzeros.preformat",
        "model.layers.11.input_layernorm.weight.bf",
        "model.layers.11.attn.qk_proj.MatMulNBits.qweight.preformat",
        "model.layers.11.attn.qk_proj.MatMulNBits.bias.preformat",
        "model.layers.11.attn.qk_proj.MatMulNBits.scales.preformat",
        "model.layers.11.attn.qk_proj.MatMulNBits.qzeros.preformat",
        "model.layers.11.attn.v_proj.MatMulNBits.qweight.preformat",
        "model.layers.11.attn.v_proj.MatMulNBits.bias.preformat",
        "model.layers.11.attn.v_proj.MatMulNBits.scales.preformat",
        "model.layers.11.attn.v_proj.MatMulNBits.qzeros.preformat",
        "model.layers.11.attn.o_proj.MatMulNBits.qweight.preformat",
        "model.layers.11.attn.o_proj.MatMulNBits.bias.preformat",
        "model.layers.11.attn.o_proj.MatMulNBits.scales.preformat",
        "model.layers.11.attn.o_proj.MatMulNBits.qzeros.preformat",
        "model.layers.11.post_attention_layernorm.weight.bf",
        "model.layers.11.mlp.gate_proj.MatMulNBits.qweight",
        "model.layers.11.mlp.gate_proj.MatMulNBits.scales.f",
        "model.layers.11.mlp.gate_proj.MatMulNBits.qzeros",
        "model.layers.11.mlp.gate_proj.MatMulNBits.bias.f",
        "model.layers.11.mlp.up_proj.MatMulNBits.qweight",
        "model.layers.11.mlp.up_proj.MatMulNBits.scales.f",
        "model.layers.11.mlp.up_proj.MatMulNBits.qzeros",
        "model.layers.11.mlp.up_proj.MatMulNBits.bias.f",
        "model.layers.11.mlp.down_proj.MatMulNBits.qweight.preformat",
        "model.layers.11.mlp.down_proj.MatMulNBits.bias.preformat",
        "model.layers.11.mlp.down_proj.MatMulNBits.scales.preformat",
        "model.layers.11.mlp.down_proj.MatMulNBits.qzeros.preformat",
        "model.layers.12.input_layernorm.weight.bf",
        "model.layers.12.attn.qk_proj.MatMulNBits.qweight.preformat",
        "model.layers.12.attn.qk_proj.MatMulNBits.bias.preformat",
        "model.layers.12.attn.qk_proj.MatMulNBits.scales.preformat",
        "model.layers.12.attn.qk_proj.MatMulNBits.qzeros.preformat",
        "model.layers.12.attn.v_proj.MatMulNBits.qweight.preformat",
        "model.layers.12.attn.v_proj.MatMulNBits.bias.preformat",
        "model.layers.12.attn.v_proj.MatMulNBits.scales.preformat",
        "model.layers.12.attn.v_proj.MatMulNBits.qzeros.preformat",
        "model.layers.12.attn.o_proj.MatMulNBits.qweight.preformat",
        "model.layers.12.attn.o_proj.MatMulNBits.bias.preformat",
        "model.layers.12.attn.o_proj.MatMulNBits.scales.preformat",
        "model.layers.12.attn.o_proj.MatMulNBits.qzeros.preformat",
        "model.layers.12.post_attention_layernorm.weight.bf",
        "model.layers.12.mlp.gate_proj.MatMulNBits.qweight",
        "model.layers.12.mlp.gate_proj.MatMulNBits.scales.f",
        "model.layers.12.mlp.gate_proj.MatMulNBits.qzeros",
        "model.layers.12.mlp.gate_proj.MatMulNBits.bias.f",
        "model.layers.12.mlp.up_proj.MatMulNBits.qweight",
        "model.layers.12.mlp.up_proj.MatMulNBits.scales.f",
        "model.layers.12.mlp.up_proj.MatMulNBits.qzeros",
        "model.layers.12.mlp.up_proj.MatMulNBits.bias.f",
        "model.layers.12.mlp.down_proj.MatMulNBits.qweight.preformat",
        "model.layers.12.mlp.down_proj.MatMulNBits.bias.preformat",
        "model.layers.12.mlp.down_proj.MatMulNBits.scales.preformat",
        "model.layers.12.mlp.down_proj.MatMulNBits.qzeros.preformat",
        "model.layers.13.input_layernorm.weight.bf",
        "model.layers.13.attn.qk_proj.MatMulNBits.qweight.preformat",
        "model.layers.13.attn.qk_proj.MatMulNBits.bias.preformat",
        "model.layers.13.attn.qk_proj.MatMulNBits.scales.preformat",
        "model.layers.13.attn.qk_proj.MatMulNBits.qzeros.preformat",
        "model.layers.13.attn.v_proj.MatMulNBits.qweight.preformat",
        "model.layers.13.attn.v_proj.MatMulNBits.bias.preformat",
        "model.layers.13.attn.v_proj.MatMulNBits.scales.preformat",
        "model.layers.13.attn.v_proj.MatMulNBits.qzeros.preformat",
        "model.layers.13.attn.o_proj.MatMulNBits.qweight.preformat",
        "model.layers.13.attn.o_proj.MatMulNBits.bias.preformat",
        "model.layers.13.attn.o_proj.MatMulNBits.scales.preformat",
        "model.layers.13.attn.o_proj.MatMulNBits.qzeros.preformat",
        "model.layers.13.post_attention_layernorm.weight.bf",
        "model.layers.13.mlp.gate_proj.MatMulNBits.qweight",
        "model.layers.13.mlp.gate_proj.MatMulNBits.scales.f",
        "model.layers.13.mlp.gate_proj.MatMulNBits.qzeros",
        "model.layers.13.mlp.gate_proj.MatMulNBits.bias.f",
        "model.layers.13.mlp.up_proj.MatMulNBits.qweight",
        "model.layers.13.mlp.up_proj.MatMulNBits.scales.f",
        "model.layers.13.mlp.up_proj.MatMulNBits.qzeros",
        "model.layers.13.mlp.up_proj.MatMulNBits.bias.f",
        "model.layers.13.mlp.down_proj.MatMulNBits.qweight.preformat",
        "model.layers.13.mlp.down_proj.MatMulNBits.bias.preformat",
        "model.layers.13.mlp.down_proj.MatMulNBits.scales.preformat",
        "model.layers.13.mlp.down_proj.MatMulNBits.qzeros.preformat",
        "model.layers.14.input_layernorm.weight.bf",
        "model.layers.14.attn.qk_proj.MatMulNBits.qweight.preformat",
        "model.layers.14.attn.qk_proj.MatMulNBits.bias.preformat",
        "model.layers.14.attn.qk_proj.MatMulNBits.scales.preformat",
        "model.layers.14.attn.qk_proj.MatMulNBits.qzeros.preformat",
        "model.layers.14.attn.v_proj.MatMulNBits.qweight.preformat",
        "model.layers.14.attn.v_proj.MatMulNBits.bias.preformat",
        "model.layers.14.attn.v_proj.MatMulNBits.scales.preformat",
        "model.layers.14.attn.v_proj.MatMulNBits.qzeros.preformat",
        "model.layers.14.attn.o_proj.MatMulNBits.qweight.preformat",
        "model.layers.14.attn.o_proj.MatMulNBits.bias.preformat",
        "model.layers.14.attn.o_proj.MatMulNBits.scales.preformat",
        "model.layers.14.attn.o_proj.MatMulNBits.qzeros.preformat",
        "model.layers.14.post_attention_layernorm.weight.bf",
        "model.layers.14.mlp.gate_proj.MatMulNBits.qweight",
        "model.layers.14.mlp.gate_proj.MatMulNBits.scales.f",
        "model.layers.14.mlp.gate_proj.MatMulNBits.qzeros",
        "model.layers.14.mlp.gate_proj.MatMulNBits.bias.f",
        "model.layers.14.mlp.up_proj.MatMulNBits.qweight",
        "model.layers.14.mlp.up_proj.MatMulNBits.scales.f",
        "model.layers.14.mlp.up_proj.MatMulNBits.qzeros",
        "model.layers.14.mlp.up_proj.MatMulNBits.bias.f",
        "model.layers.14.mlp.down_proj.MatMulNBits.qweight.preformat",
        "model.layers.14.mlp.down_proj.MatMulNBits.bias.preformat",
        "model.layers.14.mlp.down_proj.MatMulNBits.scales.preformat",
        "model.layers.14.mlp.down_proj.MatMulNBits.qzeros.preformat",
        "model.layers.15.input_layernorm.weight.bf",
        "model.layers.15.attn.qk_proj.MatMulNBits.qweight.preformat",
        "model.layers.15.attn.qk_proj.MatMulNBits.bias.preformat",
        "model.layers.15.attn.qk_proj.MatMulNBits.scales.preformat",
        "model.layers.15.attn.qk_proj.MatMulNBits.qzeros.preformat",
        "model.layers.15.attn.v_proj.MatMulNBits.qweight.preformat",
        "model.layers.15.attn.v_proj.MatMulNBits.bias.preformat",
        "model.layers.15.attn.v_proj.MatMulNBits.scales.preformat",
        "model.layers.15.attn.v_proj.MatMulNBits.qzeros.preformat",
        "model.layers.15.attn.o_proj.MatMulNBits.qweight.preformat",
        "model.layers.15.attn.o_proj.MatMulNBits.bias.preformat",
        "model.layers.15.attn.o_proj.MatMulNBits.scales.preformat",
        "model.layers.15.attn.o_proj.MatMulNBits.qzeros.preformat",
        "model.layers.15.post_attention_layernorm.weight.bf",
        "model.layers.15.mlp.gate_proj.MatMulNBits.qweight",
        "model.layers.15.mlp.gate_proj.MatMulNBits.scales.f",
        "model.layers.15.mlp.gate_proj.MatMulNBits.qzeros",
        "model.layers.15.mlp.gate_proj.MatMulNBits.bias.f",
        "model.layers.15.mlp.up_proj.MatMulNBits.qweight",
        "model.layers.15.mlp.up_proj.MatMulNBits.scales.f",
        "model.layers.15.mlp.up_proj.MatMulNBits.qzeros",
        "model.layers.15.mlp.up_proj.MatMulNBits.bias.f",
        "model.layers.15.mlp.down_proj.MatMulNBits.qweight.preformat",
        "model.layers.15.mlp.down_proj.MatMulNBits.bias.preformat",
        "model.layers.15.mlp.down_proj.MatMulNBits.scales.preformat",
        "model.layers.15.mlp.down_proj.MatMulNBits.qzeros.preformat",
        "model.layers.16.input_layernorm.weight.bf",
        "model.layers.16.attn.qk_proj.MatMulNBits.qweight.preformat",
        "model.layers.16.attn.qk_proj.MatMulNBits.bias.preformat",
        "model.layers.16.attn.qk_proj.MatMulNBits.scales.preformat",
        "model.layers.16.attn.qk_proj.MatMulNBits.qzeros.preformat",
        "model.layers.16.attn.v_proj.MatMulNBits.qweight.preformat",
        "model.layers.16.attn.v_proj.MatMulNBits.bias.preformat",
        "model.layers.16.attn.v_proj.MatMulNBits.scales.preformat",
        "model.layers.16.attn.v_proj.MatMulNBits.qzeros.preformat",
        "model.layers.16.attn.o_proj.MatMulNBits.qweight.preformat",
        "model.layers.16.attn.o_proj.MatMulNBits.bias.preformat",
        "model.layers.16.attn.o_proj.MatMulNBits.scales.preformat",
        "model.layers.16.attn.o_proj.MatMulNBits.qzeros.preformat",
        "model.layers.16.post_attention_layernorm.weight.bf",
        "model.layers.16.mlp.gate_proj.MatMulNBits.qweight",
        "model.layers.16.mlp.gate_proj.MatMulNBits.scales.f",
        "model.layers.16.mlp.gate_proj.MatMulNBits.qzeros",
        "model.layers.16.mlp.gate_proj.MatMulNBits.bias.f",
        "model.layers.16.mlp.up_proj.MatMulNBits.qweight",
        "model.layers.16.mlp.up_proj.MatMulNBits.scales.f",
        "model.layers.16.mlp.up_proj.MatMulNBits.qzeros",
        "model.layers.16.mlp.up_proj.MatMulNBits.bias.f",
        "model.layers.16.mlp.down_proj.MatMulNBits.qweight.preformat",
        "model.layers.16.mlp.down_proj.MatMulNBits.bias.preformat",
        "model.layers.16.mlp.down_proj.MatMulNBits.scales.preformat",
        "model.layers.16.mlp.down_proj.MatMulNBits.qzeros.preformat",
        "model.layers.17.input_layernorm.weight.bf",
        "model.layers.17.attn.qk_proj.MatMulNBits.qweight.preformat",
        "model.layers.17.attn.qk_proj.MatMulNBits.bias.preformat",
        "model.layers.17.attn.qk_proj.MatMulNBits.scales.preformat",
        "model.layers.17.attn.qk_proj.MatMulNBits.qzeros.preformat",
        "model.layers.17.attn.v_proj.MatMulNBits.qweight.preformat",
        "model.layers.17.attn.v_proj.MatMulNBits.bias.preformat",
        "model.layers.17.attn.v_proj.MatMulNBits.scales.preformat",
        "model.layers.17.attn.v_proj.MatMulNBits.qzeros.preformat",
        "model.layers.17.attn.o_proj.MatMulNBits.qweight.preformat",
        "model.layers.17.attn.o_proj.MatMulNBits.bias.preformat",
        "model.layers.17.attn.o_proj.MatMulNBits.scales.preformat",
        "model.layers.17.attn.o_proj.MatMulNBits.qzeros.preformat",
        "model.layers.17.post_attention_layernorm.weight.bf",
        "model.layers.17.mlp.gate_proj.MatMulNBits.qweight",
        "model.layers.17.mlp.gate_proj.MatMulNBits.scales.f",
        "model.layers.17.mlp.gate_proj.MatMulNBits.qzeros",
        "model.layers.17.mlp.gate_proj.MatMulNBits.bias.f",
        "model.layers.17.mlp.up_proj.MatMulNBits.qweight",
        "model.layers.17.mlp.up_proj.MatMulNBits.scales.f",
        "model.layers.17.mlp.up_proj.MatMulNBits.qzeros",
        "model.layers.17.mlp.up_proj.MatMulNBits.bias.f",
        "model.layers.17.mlp.down_proj.MatMulNBits.qweight.preformat",
        "model.layers.17.mlp.down_proj.MatMulNBits.bias.preformat",
        "model.layers.17.mlp.down_proj.MatMulNBits.scales.preformat",
        "model.layers.17.mlp.down_proj.MatMulNBits.qzeros.preformat",
        "model.layers.18.input_layernorm.weight.bf",
        "model.layers.18.attn.qk_proj.MatMulNBits.qweight.preformat",
        "model.layers.18.attn.qk_proj.MatMulNBits.bias.preformat",
        "model.layers.18.attn.qk_proj.MatMulNBits.scales.preformat",
        "model.layers.18.attn.qk_proj.MatMulNBits.qzeros.preformat",
        "model.layers.18.attn.v_proj.MatMulNBits.qweight.preformat",
        "model.layers.18.attn.v_proj.MatMulNBits.bias.preformat",
        "model.layers.18.attn.v_proj.MatMulNBits.scales.preformat",
        "model.layers.18.attn.v_proj.MatMulNBits.qzeros.preformat",
        "model.layers.18.attn.o_proj.MatMulNBits.qweight.preformat",
        "model.layers.18.attn.o_proj.MatMulNBits.bias.preformat",
        "model.layers.18.attn.o_proj.MatMulNBits.scales.preformat",
        "model.layers.18.attn.o_proj.MatMulNBits.qzeros.preformat",
        "model.layers.18.post_attention_layernorm.weight.bf",
        "model.layers.18.mlp.gate_proj.MatMulNBits.qweight",
        "model.layers.18.mlp.gate_proj.MatMulNBits.scales.f",
        "model.layers.18.mlp.gate_proj.MatMulNBits.qzeros",
        "model.layers.18.mlp.gate_proj.MatMulNBits.bias.f",
        "model.layers.18.mlp.up_proj.MatMulNBits.qweight",
        "model.layers.18.mlp.up_proj.MatMulNBits.scales.f",
        "model.layers.18.mlp.up_proj.MatMulNBits.qzeros",
        "model.layers.18.mlp.up_proj.MatMulNBits.bias.f",
        "model.layers.18.mlp.down_proj.MatMulNBits.qweight.preformat",
        "model.layers.18.mlp.down_proj.MatMulNBits.bias.preformat",
        "model.layers.18.mlp.down_proj.MatMulNBits.scales.preformat",
        "model.layers.18.mlp.down_proj.MatMulNBits.qzeros.preformat",
        "model.layers.19.input_layernorm.weight.bf",
        "model.layers.19.attn.qk_proj.MatMulNBits.qweight.preformat",
        "model.layers.19.attn.qk_proj.MatMulNBits.bias.preformat",
        "model.layers.19.attn.qk_proj.MatMulNBits.scales.preformat",
        "model.layers.19.attn.qk_proj.MatMulNBits.qzeros.preformat",
        "model.layers.19.attn.v_proj.MatMulNBits.qweight.preformat",
        "model.layers.19.attn.v_proj.MatMulNBits.bias.preformat",
        "model.layers.19.attn.v_proj.MatMulNBits.scales.preformat",
        "model.layers.19.attn.v_proj.MatMulNBits.qzeros.preformat",
        "model.layers.19.attn.o_proj.MatMulNBits.qweight.preformat",
        "model.layers.19.attn.o_proj.MatMulNBits.bias.preformat",
        "model.layers.19.attn.o_proj.MatMulNBits.scales.preformat",
        "model.layers.19.attn.o_proj.MatMulNBits.qzeros.preformat",
        "model.layers.19.post_attention_layernorm.weight.bf",
        "model.layers.19.mlp.gate_proj.MatMulNBits.qweight",
        "model.layers.19.mlp.gate_proj.MatMulNBits.scales.f",
        "model.layers.19.mlp.gate_proj.MatMulNBits.qzeros",
        "model.layers.19.mlp.gate_proj.MatMulNBits.bias.f",
        "model.layers.19.mlp.up_proj.MatMulNBits.qweight",
        "model.layers.19.mlp.up_proj.MatMulNBits.scales.f",
        "model.layers.19.mlp.up_proj.MatMulNBits.qzeros",
        "model.layers.19.mlp.up_proj.MatMulNBits.bias.f",
        "model.layers.19.mlp.down_proj.MatMulNBits.qweight.preformat",
        "model.layers.19.mlp.down_proj.MatMulNBits.bias.preformat",
        "model.layers.19.mlp.down_proj.MatMulNBits.scales.preformat",
        "model.layers.19.mlp.down_proj.MatMulNBits.qzeros.preformat",
        "model.layers.20.input_layernorm.weight.bf",
        "model.layers.20.attn.qk_proj.MatMulNBits.qweight.preformat",
        "model.layers.20.attn.qk_proj.MatMulNBits.bias.preformat",
        "model.layers.20.attn.qk_proj.MatMulNBits.scales.preformat",
        "model.layers.20.attn.qk_proj.MatMulNBits.qzeros.preformat",
        "model.layers.20.attn.v_proj.MatMulNBits.qweight.preformat",
        "model.layers.20.attn.v_proj.MatMulNBits.bias.preformat",
        "model.layers.20.attn.v_proj.MatMulNBits.scales.preformat",
        "model.layers.20.attn.v_proj.MatMulNBits.qzeros.preformat",
        "model.layers.20.attn.o_proj.MatMulNBits.qweight.preformat",
        "model.layers.20.attn.o_proj.MatMulNBits.bias.preformat",
        "model.layers.20.attn.o_proj.MatMulNBits.scales.preformat",
        "model.layers.20.attn.o_proj.MatMulNBits.qzeros.preformat",
        "model.layers.20.post_attention_layernorm.weight.bf",
        "model.layers.20.mlp.gate_proj.MatMulNBits.qweight",
        "model.layers.20.mlp.gate_proj.MatMulNBits.scales.f",
        "model.layers.20.mlp.gate_proj.MatMulNBits.qzeros",
        "model.layers.20.mlp.gate_proj.MatMulNBits.bias.f",
        "model.layers.20.mlp.up_proj.MatMulNBits.qweight",
        "model.layers.20.mlp.up_proj.MatMulNBits.scales.f",
        "model.layers.20.mlp.up_proj.MatMulNBits.qzeros",
        "model.layers.20.mlp.up_proj.MatMulNBits.bias.f",
        "model.layers.20.mlp.down_proj.MatMulNBits.qweight.preformat",
        "model.layers.20.mlp.down_proj.MatMulNBits.bias.preformat",
        "model.layers.20.mlp.down_proj.MatMulNBits.scales.preformat",
        "model.layers.20.mlp.down_proj.MatMulNBits.qzeros.preformat",
        "model.layers.21.input_layernorm.weight.bf",
        "model.layers.21.attn.qk_proj.MatMulNBits.qweight.preformat",
        "model.layers.21.attn.qk_proj.MatMulNBits.bias.preformat",
        "model.layers.21.attn.qk_proj.MatMulNBits.scales.preformat",
        "model.layers.21.attn.qk_proj.MatMulNBits.qzeros.preformat",
        "model.layers.21.attn.v_proj.MatMulNBits.qweight.preformat",
        "model.layers.21.attn.v_proj.MatMulNBits.bias.preformat",
        "model.layers.21.attn.v_proj.MatMulNBits.scales.preformat",
        "model.layers.21.attn.v_proj.MatMulNBits.qzeros.preformat",
        "model.layers.21.attn.o_proj.MatMulNBits.qweight.preformat",
        "model.layers.21.attn.o_proj.MatMulNBits.bias.preformat",
        "model.layers.21.attn.o_proj.MatMulNBits.scales.preformat",
        "model.layers.21.attn.o_proj.MatMulNBits.qzeros.preformat",
        "model.layers.21.post_attention_layernorm.weight.bf",
        "model.layers.21.mlp.gate_proj.MatMulNBits.qweight",
        "model.layers.21.mlp.gate_proj.MatMulNBits.scales.f",
        "model.layers.21.mlp.gate_proj.MatMulNBits.qzeros",
        "model.layers.21.mlp.gate_proj.MatMulNBits.bias.f",
        "model.layers.21.mlp.up_proj.MatMulNBits.qweight",
        "model.layers.21.mlp.up_proj.MatMulNBits.scales.f",
        "model.layers.21.mlp.up_proj.MatMulNBits.qzeros",
        "model.layers.21.mlp.up_proj.MatMulNBits.bias.f",
        "model.layers.21.mlp.down_proj.MatMulNBits.qweight.preformat",
        "model.layers.21.mlp.down_proj.MatMulNBits.bias.preformat",
        "model.layers.21.mlp.down_proj.MatMulNBits.scales.preformat",
        "model.layers.21.mlp.down_proj.MatMulNBits.qzeros.preformat",
        "model.layers.22.input_layernorm.weight.bf",
        "model.layers.22.attn.qk_proj.MatMulNBits.qweight.preformat",
        "model.layers.22.attn.qk_proj.MatMulNBits.bias.preformat",
        "model.layers.22.attn.qk_proj.MatMulNBits.scales.preformat",
        "model.layers.22.attn.qk_proj.MatMulNBits.qzeros.preformat",
        "model.layers.22.attn.v_proj.MatMulNBits.qweight.preformat",
        "model.layers.22.attn.v_proj.MatMulNBits.bias.preformat",
        "model.layers.22.attn.v_proj.MatMulNBits.scales.preformat",
        "model.layers.22.attn.v_proj.MatMulNBits.qzeros.preformat",
        "model.layers.22.attn.o_proj.MatMulNBits.qweight.preformat",
        "model.layers.22.attn.o_proj.MatMulNBits.bias.preformat",
        "model.layers.22.attn.o_proj.MatMulNBits.scales.preformat",
        "model.layers.22.attn.o_proj.MatMulNBits.qzeros.preformat",
        "model.layers.22.post_attention_layernorm.weight.bf",
        "model.layers.22.mlp.gate_proj.MatMulNBits.qweight",
        "model.layers.22.mlp.gate_proj.MatMulNBits.scales.f",
        "model.layers.22.mlp.gate_proj.MatMulNBits.qzeros",
        "model.layers.22.mlp.gate_proj.MatMulNBits.bias.f",
        "model.layers.22.mlp.up_proj.MatMulNBits.qweight",
        "model.layers.22.mlp.up_proj.MatMulNBits.scales.f",
        "model.layers.22.mlp.up_proj.MatMulNBits.qzeros",
        "model.layers.22.mlp.up_proj.MatMulNBits.bias.f",
        "model.layers.22.mlp.down_proj.MatMulNBits.qweight.preformat",
        "model.layers.22.mlp.down_proj.MatMulNBits.bias.preformat",
        "model.layers.22.mlp.down_proj.MatMulNBits.scales.preformat",
        "model.layers.22.mlp.down_proj.MatMulNBits.qzeros.preformat",
        "model.layers.23.input_layernorm.weight.bf",
        "model.layers.23.attn.qk_proj.MatMulNBits.qweight.preformat",
        "model.layers.23.attn.qk_proj.MatMulNBits.bias.preformat",
        "model.layers.23.attn.qk_proj.MatMulNBits.scales.preformat",
        "model.layers.23.attn.qk_proj.MatMulNBits.qzeros.preformat",
        "model.layers.23.attn.v_proj.MatMulNBits.qweight.preformat",
        "model.layers.23.attn.v_proj.MatMulNBits.bias.preformat",
        "model.layers.23.attn.v_proj.MatMulNBits.scales.preformat",
        "model.layers.23.attn.v_proj.MatMulNBits.qzeros.preformat",
        "model.layers.23.attn.o_proj.MatMulNBits.qweight.preformat",
        "model.layers.23.attn.o_proj.MatMulNBits.bias.preformat",
        "model.layers.23.attn.o_proj.MatMulNBits.scales.preformat",
        "model.layers.23.attn.o_proj.MatMulNBits.qzeros.preformat",
        "model.layers.23.post_attention_layernorm.weight.bf",
        "model.layers.23.mlp.gate_proj.MatMulNBits.qweight",
        "model.layers.23.mlp.gate_proj.MatMulNBits.scales.f",
        "model.layers.23.mlp.gate_proj.MatMulNBits.qzeros",
        "model.layers.23.mlp.gate_proj.MatMulNBits.bias.f",
        "model.layers.23.mlp.up_proj.MatMulNBits.qweight",
        "model.layers.23.mlp.up_proj.MatMulNBits.scales.f",
        "model.layers.23.mlp.up_proj.MatMulNBits.qzeros",
        "model.layers.23.mlp.up_proj.MatMulNBits.bias.f",
        "model.layers.23.mlp.down_proj.MatMulNBits.qweight.preformat",
        "model.layers.23.mlp.down_proj.MatMulNBits.bias.preformat",
        "model.layers.23.mlp.down_proj.MatMulNBits.scales.preformat",
        "model.layers.23.mlp.down_proj.MatMulNBits.qzeros.preformat",
        "model.layers.24.input_layernorm.weight.bf",
        "model.layers.24.attn.qk_proj.MatMulNBits.qweight.preformat",
        "model.layers.24.attn.qk_proj.MatMulNBits.bias.preformat",
        "model.layers.24.attn.qk_proj.MatMulNBits.scales.preformat",
        "model.layers.24.attn.qk_proj.MatMulNBits.qzeros.preformat",
        "model.layers.24.attn.v_proj.MatMulNBits.qweight.preformat",
        "model.layers.24.attn.v_proj.MatMulNBits.bias.preformat",
        "model.layers.24.attn.v_proj.MatMulNBits.scales.preformat",
        "model.layers.24.attn.v_proj.MatMulNBits.qzeros.preformat",
        "model.layers.24.attn.o_proj.MatMulNBits.qweight.preformat",
        "model.layers.24.attn.o_proj.MatMulNBits.bias.preformat",
        "model.layers.24.attn.o_proj.MatMulNBits.scales.preformat",
        "model.layers.24.attn.o_proj.MatMulNBits.qzeros.preformat",
        "model.layers.24.post_attention_layernorm.weight.bf",
        "model.layers.24.mlp.gate_proj.MatMulNBits.qweight",
        "model.layers.24.mlp.gate_proj.MatMulNBits.scales.f",
        "model.layers.24.mlp.gate_proj.MatMulNBits.qzeros",
        "model.layers.24.mlp.gate_proj.MatMulNBits.bias.f",
        "model.layers.24.mlp.up_proj.MatMulNBits.qweight",
        "model.layers.24.mlp.up_proj.MatMulNBits.scales.f",
        "model.layers.24.mlp.up_proj.MatMulNBits.qzeros",
        "model.layers.24.mlp.up_proj.MatMulNBits.bias.f",
        "model.layers.24.mlp.down_proj.MatMulNBits.qweight.preformat",
        "model.layers.24.mlp.down_proj.MatMulNBits.bias.preformat",
        "model.layers.24.mlp.down_proj.MatMulNBits.scales.preformat",
        "model.layers.24.mlp.down_proj.MatMulNBits.qzeros.preformat",
        "model.layers.25.input_layernorm.weight.bf",
        "model.layers.25.attn.qk_proj.MatMulNBits.qweight.preformat",
        "model.layers.25.attn.qk_proj.MatMulNBits.bias.preformat",
        "model.layers.25.attn.qk_proj.MatMulNBits.scales.preformat",
        "model.layers.25.attn.qk_proj.MatMulNBits.qzeros.preformat",
        "model.layers.25.attn.v_proj.MatMulNBits.qweight.preformat",
        "model.layers.25.attn.v_proj.MatMulNBits.bias.preformat",
        "model.layers.25.attn.v_proj.MatMulNBits.scales.preformat",
        "model.layers.25.attn.v_proj.MatMulNBits.qzeros.preformat",
        "model.layers.25.attn.o_proj.MatMulNBits.qweight.preformat",
        "model.layers.25.attn.o_proj.MatMulNBits.bias.preformat",
        "model.layers.25.attn.o_proj.MatMulNBits.scales.preformat",
        "model.layers.25.attn.o_proj.MatMulNBits.qzeros.preformat",
        "model.layers.25.post_attention_layernorm.weight.bf",
        "model.layers.25.mlp.gate_proj.MatMulNBits.qweight",
        "model.layers.25.mlp.gate_proj.MatMulNBits.scales.f",
        "model.layers.25.mlp.gate_proj.MatMulNBits.qzeros",
        "model.layers.25.mlp.gate_proj.MatMulNBits.bias.f",
        "model.layers.25.mlp.up_proj.MatMulNBits.qweight",
        "model.layers.25.mlp.up_proj.MatMulNBits.scales.f",
        "model.layers.25.mlp.up_proj.MatMulNBits.qzeros",
        "model.layers.25.mlp.up_proj.MatMulNBits.bias.f",
        "model.layers.25.mlp.down_proj.MatMulNBits.qweight.preformat",
        "model.layers.25.mlp.down_proj.MatMulNBits.bias.preformat",
        "model.layers.25.mlp.down_proj.MatMulNBits.scales.preformat",
        "model.layers.25.mlp.down_proj.MatMulNBits.qzeros.preformat",
        "model.layers.26.input_layernorm.weight.bf",
        "model.layers.26.attn.qk_proj.MatMulNBits.qweight.preformat",
        "model.layers.26.attn.qk_proj.MatMulNBits.bias.preformat",
        "model.layers.26.attn.qk_proj.MatMulNBits.scales.preformat",
        "model.layers.26.attn.qk_proj.MatMulNBits.qzeros.preformat",
        "model.layers.26.attn.v_proj.MatMulNBits.qweight.preformat",
        "model.layers.26.attn.v_proj.MatMulNBits.bias.preformat",
        "model.layers.26.attn.v_proj.MatMulNBits.scales.preformat",
        "model.layers.26.attn.v_proj.MatMulNBits.qzeros.preformat",
        "model.layers.26.attn.o_proj.MatMulNBits.qweight.preformat",
        "model.layers.26.attn.o_proj.MatMulNBits.bias.preformat",
        "model.layers.26.attn.o_proj.MatMulNBits.scales.preformat",
        "model.layers.26.attn.o_proj.MatMulNBits.qzeros.preformat",
        "model.layers.26.post_attention_layernorm.weight.bf",
        "model.layers.26.mlp.gate_proj.MatMulNBits.qweight",
        "model.layers.26.mlp.gate_proj.MatMulNBits.scales.f",
        "model.layers.26.mlp.gate_proj.MatMulNBits.qzeros",
        "model.layers.26.mlp.gate_proj.MatMulNBits.bias.f",
        "model.layers.26.mlp.up_proj.MatMulNBits.qweight",
        "model.layers.26.mlp.up_proj.MatMulNBits.scales.f",
        "model.layers.26.mlp.up_proj.MatMulNBits.qzeros",
        "model.layers.26.mlp.up_proj.MatMulNBits.bias.f",
        "model.layers.26.mlp.down_proj.MatMulNBits.qweight.preformat",
        "model.layers.26.mlp.down_proj.MatMulNBits.bias.preformat",
        "model.layers.26.mlp.down_proj.MatMulNBits.scales.preformat",
        "model.layers.26.mlp.down_proj.MatMulNBits.qzeros.preformat",
        "model.layers.27.input_layernorm.weight.bf",
        "model.layers.27.attn.qk_proj.MatMulNBits.qweight.preformat",
        "model.layers.27.attn.qk_proj.MatMulNBits.bias.preformat",
        "model.layers.27.attn.qk_proj.MatMulNBits.scales.preformat",
        "model.layers.27.attn.qk_proj.MatMulNBits.qzeros.preformat",
        "model.layers.27.attn.v_proj.MatMulNBits.qweight.preformat",
        "model.layers.27.attn.v_proj.MatMulNBits.bias.preformat",
        "model.layers.27.attn.v_proj.MatMulNBits.scales.preformat",
        "model.layers.27.attn.v_proj.MatMulNBits.qzeros.preformat",
        "model.layers.27.attn.o_proj.MatMulNBits.qweight.preformat",
        "model.layers.27.attn.o_proj.MatMulNBits.bias.preformat",
        "model.layers.27.attn.o_proj.MatMulNBits.scales.preformat",
        "model.layers.27.attn.o_proj.MatMulNBits.qzeros.preformat",
        "model.layers.27.post_attention_layernorm.weight.bf",
        "model.layers.27.mlp.gate_proj.MatMulNBits.qweight",
        "model.layers.27.mlp.gate_proj.MatMulNBits.scales.f",
        "model.layers.27.mlp.gate_proj.MatMulNBits.qzeros",
        "model.layers.27.mlp.gate_proj.MatMulNBits.bias.f",
        "model.layers.27.mlp.up_proj.MatMulNBits.qweight",
        "model.layers.27.mlp.up_proj.MatMulNBits.scales.f",
        "model.layers.27.mlp.up_proj.MatMulNBits.qzeros",
        "model.layers.27.mlp.up_proj.MatMulNBits.bias.f",
        "model.layers.27.mlp.down_proj.MatMulNBits.qweight.preformat",
        "model.layers.27.mlp.down_proj.MatMulNBits.bias.preformat",
        "model.layers.27.mlp.down_proj.MatMulNBits.scales.preformat",
        "model.layers.27.mlp.down_proj.MatMulNBits.qzeros.preformat",
        "model.layers.28.input_layernorm.weight.bf",
        "model.layers.28.attn.qk_proj.MatMulNBits.qweight.preformat",
        "model.layers.28.attn.qk_proj.MatMulNBits.bias.preformat",
        "model.layers.28.attn.qk_proj.MatMulNBits.scales.preformat",
        "model.layers.28.attn.qk_proj.MatMulNBits.qzeros.preformat",
        "model.layers.28.attn.v_proj.MatMulNBits.qweight.preformat",
        "model.layers.28.attn.v_proj.MatMulNBits.bias.preformat",
        "model.layers.28.attn.v_proj.MatMulNBits.scales.preformat",
        "model.layers.28.attn.v_proj.MatMulNBits.qzeros.preformat",
        "model.layers.28.attn.o_proj.MatMulNBits.qweight.preformat",
        "model.layers.28.attn.o_proj.MatMulNBits.bias.preformat",
        "model.layers.28.attn.o_proj.MatMulNBits.scales.preformat",
        "model.layers.28.attn.o_proj.MatMulNBits.qzeros.preformat",
        "model.layers.28.post_attention_layernorm.weight.bf",
        "model.layers.28.mlp.gate_proj.MatMulNBits.qweight",
        "model.layers.28.mlp.gate_proj.MatMulNBits.scales.f",
        "model.layers.28.mlp.gate_proj.MatMulNBits.qzeros",
        "model.layers.28.mlp.gate_proj.MatMulNBits.bias.f",
        "model.layers.28.mlp.up_proj.MatMulNBits.qweight",
        "model.layers.28.mlp.up_proj.MatMulNBits.scales.f",
        "model.layers.28.mlp.up_proj.MatMulNBits.qzeros",
        "model.layers.28.mlp.up_proj.MatMulNBits.bias.f",
        "model.layers.28.mlp.down_proj.MatMulNBits.qweight.preformat",
        "model.layers.28.mlp.down_proj.MatMulNBits.bias.preformat",
        "model.layers.28.mlp.down_proj.MatMulNBits.scales.preformat",
        "model.layers.28.mlp.down_proj.MatMulNBits.qzeros.preformat",
        "model.layers.29.input_layernorm.weight.bf",
        "model.layers.29.attn.qk_proj.MatMulNBits.qweight.preformat",
        "model.layers.29.attn.qk_proj.MatMulNBits.bias.preformat",
        "model.layers.29.attn.qk_proj.MatMulNBits.scales.preformat",
        "model.layers.29.attn.qk_proj.MatMulNBits.qzeros.preformat",
        "model.layers.29.attn.v_proj.MatMulNBits.qweight.preformat",
        "model.layers.29.attn.v_proj.MatMulNBits.bias.preformat",
        "model.layers.29.attn.v_proj.MatMulNBits.scales.preformat",
        "model.layers.29.attn.v_proj.MatMulNBits.qzeros.preformat",
        "model.layers.29.attn.o_proj.MatMulNBits.qweight.preformat",
        "model.layers.29.attn.o_proj.MatMulNBits.bias.preformat",
        "model.layers.29.attn.o_proj.MatMulNBits.scales.preformat",
        "model.layers.29.attn.o_proj.MatMulNBits.qzeros.preformat",
        "model.layers.29.post_attention_layernorm.weight.bf",
        "model.layers.29.mlp.gate_proj.MatMulNBits.qweight",
        "model.layers.29.mlp.gate_proj.MatMulNBits.scales.f",
        "model.layers.29.mlp.gate_proj.MatMulNBits.qzeros",
        "model.layers.29.mlp.gate_proj.MatMulNBits.bias.f",
        "model.layers.29.mlp.up_proj.MatMulNBits.qweight",
        "model.layers.29.mlp.up_proj.MatMulNBits.scales.f",
        "model.layers.29.mlp.up_proj.MatMulNBits.qzeros",
        "model.layers.29.mlp.up_proj.MatMulNBits.bias.f",
        "model.layers.29.mlp.down_proj.MatMulNBits.qweight.preformat",
        "model.layers.29.mlp.down_proj.MatMulNBits.bias.preformat",
        "model.layers.29.mlp.down_proj.MatMulNBits.scales.preformat",
        "model.layers.29.mlp.down_proj.MatMulNBits.qzeros.preformat",
        "model.layers.30.input_layernorm.weight.bf",
        "model.layers.30.attn.qk_proj.MatMulNBits.qweight.preformat",
        "model.layers.30.attn.qk_proj.MatMulNBits.bias.preformat",
        "model.layers.30.attn.qk_proj.MatMulNBits.scales.preformat",
        "model.layers.30.attn.qk_proj.MatMulNBits.qzeros.preformat",
        "model.layers.30.attn.v_proj.MatMulNBits.qweight.preformat",
        "model.layers.30.attn.v_proj.MatMulNBits.bias.preformat",
        "model.layers.30.attn.v_proj.MatMulNBits.scales.preformat",
        "model.layers.30.attn.v_proj.MatMulNBits.qzeros.preformat",
        "model.layers.30.attn.o_proj.MatMulNBits.qweight.preformat",
        "model.layers.30.attn.o_proj.MatMulNBits.bias.preformat",
        "model.layers.30.attn.o_proj.MatMulNBits.scales.preformat",
        "model.layers.30.attn.o_proj.MatMulNBits.qzeros.preformat",
        "model.layers.30.post_attention_layernorm.weight.bf",
        "model.layers.30.mlp.gate_proj.MatMulNBits.qweight",
        "model.layers.30.mlp.gate_proj.MatMulNBits.scales.f",
        "model.layers.30.mlp.gate_proj.MatMulNBits.qzeros",
        "model.layers.30.mlp.gate_proj.MatMulNBits.bias.f",
        "model.layers.30.mlp.up_proj.MatMulNBits.qweight",
        "model.layers.30.mlp.up_proj.MatMulNBits.scales.f",
        "model.layers.30.mlp.up_proj.MatMulNBits.qzeros",
        "model.layers.30.mlp.up_proj.MatMulNBits.bias.f",
        "model.layers.30.mlp.down_proj.MatMulNBits.qweight.preformat",
        "model.layers.30.mlp.down_proj.MatMulNBits.bias.preformat",
        "model.layers.30.mlp.down_proj.MatMulNBits.scales.preformat",
        "model.layers.30.mlp.down_proj.MatMulNBits.qzeros.preformat",
        "model.layers.31.input_layernorm.weight.bf",
        "model.layers.31.attn.qk_proj.MatMulNBits.qweight.preformat",
        "model.layers.31.attn.qk_proj.MatMulNBits.bias.preformat",
        "model.layers.31.attn.qk_proj.MatMulNBits.scales.preformat",
        "model.layers.31.attn.qk_proj.MatMulNBits.qzeros.preformat",
        "model.layers.31.attn.v_proj.MatMulNBits.qweight.preformat",
        "model.layers.31.attn.v_proj.MatMulNBits.bias.preformat",
        "model.layers.31.attn.v_proj.MatMulNBits.scales.preformat",
        "model.layers.31.attn.v_proj.MatMulNBits.qzeros.preformat",
        "model.layers.31.attn.o_proj.MatMulNBits.qweight.preformat",
        "model.layers.31.attn.o_proj.MatMulNBits.bias.preformat",
        "model.layers.31.attn.o_proj.MatMulNBits.scales.preformat",
        "model.layers.31.attn.o_proj.MatMulNBits.qzeros.preformat",
        "model.layers.31.post_attention_layernorm.weight.bf",
        "model.layers.31.mlp.gate_proj.MatMulNBits.qweight",
        "model.layers.31.mlp.gate_proj.MatMulNBits.scales.f",
        "model.layers.31.mlp.gate_proj.MatMulNBits.qzeros",
        "model.layers.31.mlp.gate_proj.MatMulNBits.bias.f",
        "model.layers.31.mlp.up_proj.MatMulNBits.qweight",
        "model.layers.31.mlp.up_proj.MatMulNBits.scales.f",
        "model.layers.31.mlp.up_proj.MatMulNBits.qzeros",
        "model.layers.31.mlp.up_proj.MatMulNBits.bias.f",
        "model.layers.31.mlp.down_proj.MatMulNBits.qweight.preformat",
        "model.layers.31.mlp.down_proj.MatMulNBits.bias.preformat",
        "model.layers.31.mlp.down_proj.MatMulNBits.scales.preformat",
        "model.layers.31.mlp.down_proj.MatMulNBits.qzeros.preformat",
        "model.layers.32.final_norm_layernorm.weight.bf",
        "lm_head.MatMulNBits.qweight.preformat",
        "lm_head.MatMulNBits.bias.preformat",
        "lm_head.MatMulNBits.scales.preformat",
        "lm_head.MatMulNBits.qzeros.preformat"
      ]
    },
    "super_instr": {
      "buffer_size": 0,
      "xrt_arg_id": 4,
      "packed_tensors": []
    },
    "ext_buf_0": {
      "buffer_size": 1610612736,
      "xrt_arg_id": 5,
      "packed_tensors": [
        "past_key_values.0.key",
        "past_key_values.0.value",
        "present.0.key",
        "present.0.value",
        "past_key_values.1.key",
        "past_key_values.1.value",
        "present.1.key",
        "present.1.value",
        "past_key_values.2.key",
        "past_key_values.2.value",
        "present.2.key",
        "present.2.value",
        "past_key_values.3.key",
        "past_key_values.3.value",
        "present.3.key",
        "present.3.value",
        "past_key_values.4.key",
        "past_key_values.4.value",
        "present.4.key",
        "present.4.value",
        "past_key_values.5.key",
        "past_key_values.5.value",
        "present.5.key",
        "present.5.value",
        "past_key_values.6.key",
        "past_key_values.6.value",
        "present.6.key",
        "present.6.value",
        "past_key_values.7.key",
        "past_key_values.7.value",
        "present.7.key",
        "present.7.value",
        "past_key_values.8.key",
        "past_key_values.8.value",
        "present.8.key",
        "present.8.value",
        "past_key_values.9.key",
        "past_key_values.9.value",
        "present.9.key",
        "present.9.value",
        "past_key_values.10.key",
        "past_key_values.10.value",
        "present.10.key",
        "present.10.value",
        "past_key_values.11.key",
        "past_key_values.11.value",
        "present.11.key",
        "present.11.value",
        "past_key_values.12.key",
        "past_key_values.12.value",
        "present.12.key",
        "present.12.value",
        "past_key_values.13.key",
        "past_key_values.13.value",
        "present.13.key",
        "present.13.value",
        "past_key_values.14.key",
        "past_key_values.14.value",
        "present.14.key",
        "present.14.value",
        "past_key_values.15.key",
        "past_key_values.15.value",
        "present.15.key",
        "present.15.value",
        "past_key_values.16.key",
        "past_key_values.16.value",
        "present.16.key",
        "present.16.value",
        "past_key_values.17.key",
        "past_key_values.17.value",
        "present.17.key",
        "present.17.value",
        "past_key_values.18.key",
        "past_key_values.18.value",
        "present.18.key",
        "present.18.value",
        "past_key_values.19.key",
        "past_key_values.19.value",
        "present.19.key",
        "present.19.value",
        "past_key_values.20.key",
        "past_key_values.20.value",
        "present.20.key",
        "present.20.value",
        "past_key_values.21.key",
        "past_key_values.21.value",
        "present.21.key",
        "present.21.value",
        "past_key_values.22.key",
        "past_key_values.22.value",
        "present.22.key",
        "present.22.value",
        "past_key_values.23.key",
        "past_key_values.23.value",
        "present.23.key",
        "present.23.value",
        "past_key_values.24.key",
        "past_key_values.24.value",
        "present.24.key",
        "present.24.value",
        "past_key_values.25.key",
        "past_key_values.25.value",
        "present.25.key",
        "present.25.value",
        "past_key_values.26.key",
        "past_key_values.26.value",
        "present.26.key",
        "present.26.value",
        "past_key_values.27.key",
        "past_key_values.27.value",
        "present.27.key",
        "present.27.value",
        "past_key_values.28.key",
        "past_key_values.28.value",
        "present.28.key",
        "present.28.value",
        "past_key_values.29.key",
        "past_key_values.29.value",
        "present.29.key",
        "present.29.value",
        "past_key_values.30.key",
        "past_key_values.30.value",
        "present.30.key",
        "present.30.value",
        "past_key_values.31.key",
        "past_key_values.31.value",
        "present.31.key",
        "present.31.value"
      ]
    },
    "ext_buf_1": {
      "buffer_size": 25952256,
      "xrt_arg_id": 6,
      "packed_tensors": [
        "sin_cos_cache_token"
      ]
    }
  },
  "tensor_map": {
    "/model/layers.0/input_layernorm/output_0.out5_4_0": {
      "packed_buffer_label": "in",
      "xrt_arg_id": 0,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 6144
    },
    "attention_mask_const_uint": {
      "packed_buffer_label": "in",
      "xrt_arg_id": 0,
      "dtype": "uint32",
      "shape": [
        1
      ],
      "size_in_bytes": 4,
      "op_tensor_size": 4,
      "offset": 18556
    },
    "/model/embed_tokens/Gather/output_0.out4_0": {
      "packed_buffer_label": "in",
      "xrt_arg_id": 0,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 12292
    },
    "/model/layers.32/final_norm_layernorm/output_0.dummy": {
      "packed_buffer_label": "out",
      "xrt_arg_id": 1,
      "dtype": "float16",
      "shape": [
        1,
        1,
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 0
    },
    "logits.out5_4_96": {
      "packed_buffer_label": "out",
      "xrt_arg_id": 1,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        32064
      ],
      "size_in_bytes": 64128,
      "op_tensor_size": 64128,
      "offset": 6144
    },
    "/model/layers.0/attn/qk_proj/MatMulNBits/output_0.out5_4_0": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        6144
      ],
      "size_in_bytes": 12288,
      "op_tensor_size": 12288,
      "offset": 0
    },
    "/model/layers.0/attn/GroupQueryAttention/output_0.out2_0": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 12288
    },
    "/model/layers.0/attn/o_proj/MatMulNBits/output_0.out5_4_1": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 18432
    },
    "/model/layers.0/post_attention_layernorm/output_3.out4_0": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 24576
    },
    "/model/layers.0/post_attention_layernorm/output_0.out4_0": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 30720
    },
    "/model/layers.0/mlp/Mul/output_0.out3_0": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        8192
      ],
      "size_in_bytes": 16384,
      "op_tensor_size": 16384,
      "offset": 36864
    },
    "/model/layers.0/mlp/down_proj/MatMulNBits/output_0.out5_4_2": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 53248
    },
    "/model/layers.1/input_layernorm/output_3.out4_1": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 59392
    },
    "/model/layers.1/input_layernorm/output_0.out4_1": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 65536
    },
    "/model/layers.1/attn/qk_proj/MatMulNBits/output_0.out5_4_3": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        6144
      ],
      "size_in_bytes": 12288,
      "op_tensor_size": 12288,
      "offset": 71680
    },
    "/model/layers.1/attn/GroupQueryAttention/output_0.out2_1": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 83968
    },
    "/model/layers.1/attn/o_proj/MatMulNBits/output_0.out5_4_4": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 90112
    },
    "/model/layers.1/post_attention_layernorm/output_3.out4_2": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 96256
    },
    "/model/layers.1/post_attention_layernorm/output_0.out4_2": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 102400
    },
    "/model/layers.1/mlp/Mul/output_0.out3_1": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        8192
      ],
      "size_in_bytes": 16384,
      "op_tensor_size": 16384,
      "offset": 108544
    },
    "/model/layers.1/mlp/down_proj/MatMulNBits/output_0.out5_4_5": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 124928
    },
    "/model/layers.2/input_layernorm/output_3.out4_3": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 131072
    },
    "/model/layers.2/input_layernorm/output_0.out4_3": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 137216
    },
    "/model/layers.2/attn/qk_proj/MatMulNBits/output_0.out5_4_6": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        6144
      ],
      "size_in_bytes": 12288,
      "op_tensor_size": 12288,
      "offset": 143360
    },
    "/model/layers.2/attn/GroupQueryAttention/output_0.out2_2": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 155648
    },
    "/model/layers.2/attn/o_proj/MatMulNBits/output_0.out5_4_7": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 161792
    },
    "/model/layers.2/post_attention_layernorm/output_3.out4_4": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 167936
    },
    "/model/layers.2/post_attention_layernorm/output_0.out4_4": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 174080
    },
    "/model/layers.2/mlp/Mul/output_0.out3_2": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        8192
      ],
      "size_in_bytes": 16384,
      "op_tensor_size": 16384,
      "offset": 180224
    },
    "/model/layers.2/mlp/down_proj/MatMulNBits/output_0.out5_4_8": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 196608
    },
    "/model/layers.3/input_layernorm/output_3.out4_5": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 202752
    },
    "/model/layers.3/input_layernorm/output_0.out4_5": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 208896
    },
    "/model/layers.3/attn/qk_proj/MatMulNBits/output_0.out5_4_9": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        6144
      ],
      "size_in_bytes": 12288,
      "op_tensor_size": 12288,
      "offset": 215040
    },
    "/model/layers.3/attn/GroupQueryAttention/output_0.out2_3": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 227328
    },
    "/model/layers.3/attn/o_proj/MatMulNBits/output_0.out5_4_10": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 233472
    },
    "/model/layers.3/post_attention_layernorm/output_3.out4_6": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 239616
    },
    "/model/layers.3/post_attention_layernorm/output_0.out4_6": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 245760
    },
    "/model/layers.3/mlp/Mul/output_0.out3_3": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        8192
      ],
      "size_in_bytes": 16384,
      "op_tensor_size": 16384,
      "offset": 251904
    },
    "/model/layers.3/mlp/down_proj/MatMulNBits/output_0.out5_4_11": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 268288
    },
    "/model/layers.4/input_layernorm/output_3.out4_7": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 274432
    },
    "/model/layers.4/input_layernorm/output_0.out4_7": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 280576
    },
    "/model/layers.4/attn/qk_proj/MatMulNBits/output_0.out5_4_12": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        6144
      ],
      "size_in_bytes": 12288,
      "op_tensor_size": 12288,
      "offset": 286720
    },
    "/model/layers.4/attn/GroupQueryAttention/output_0.out2_4": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 299008
    },
    "/model/layers.4/attn/o_proj/MatMulNBits/output_0.out5_4_13": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 305152
    },
    "/model/layers.4/post_attention_layernorm/output_3.out4_8": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 311296
    },
    "/model/layers.4/post_attention_layernorm/output_0.out4_8": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 317440
    },
    "/model/layers.4/mlp/Mul/output_0.out3_4": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        8192
      ],
      "size_in_bytes": 16384,
      "op_tensor_size": 16384,
      "offset": 323584
    },
    "/model/layers.4/mlp/down_proj/MatMulNBits/output_0.out5_4_14": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 339968
    },
    "/model/layers.5/input_layernorm/output_3.out4_9": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 346112
    },
    "/model/layers.5/input_layernorm/output_0.out4_9": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 352256
    },
    "/model/layers.5/attn/qk_proj/MatMulNBits/output_0.out5_4_15": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        6144
      ],
      "size_in_bytes": 12288,
      "op_tensor_size": 12288,
      "offset": 358400
    },
    "/model/layers.5/attn/GroupQueryAttention/output_0.out2_5": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 370688
    },
    "/model/layers.5/attn/o_proj/MatMulNBits/output_0.out5_4_16": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 376832
    },
    "/model/layers.5/post_attention_layernorm/output_3.out4_10": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 382976
    },
    "/model/layers.5/post_attention_layernorm/output_0.out4_10": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 389120
    },
    "/model/layers.5/mlp/Mul/output_0.out3_5": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        8192
      ],
      "size_in_bytes": 16384,
      "op_tensor_size": 16384,
      "offset": 395264
    },
    "/model/layers.5/mlp/down_proj/MatMulNBits/output_0.out5_4_17": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 411648
    },
    "/model/layers.6/input_layernorm/output_3.out4_11": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 417792
    },
    "/model/layers.6/input_layernorm/output_0.out4_11": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 423936
    },
    "/model/layers.6/attn/qk_proj/MatMulNBits/output_0.out5_4_18": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        6144
      ],
      "size_in_bytes": 12288,
      "op_tensor_size": 12288,
      "offset": 430080
    },
    "/model/layers.6/attn/GroupQueryAttention/output_0.out2_6": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 442368
    },
    "/model/layers.6/attn/o_proj/MatMulNBits/output_0.out5_4_19": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 448512
    },
    "/model/layers.6/post_attention_layernorm/output_3.out4_12": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 454656
    },
    "/model/layers.6/post_attention_layernorm/output_0.out4_12": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 460800
    },
    "/model/layers.6/mlp/Mul/output_0.out3_6": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        8192
      ],
      "size_in_bytes": 16384,
      "op_tensor_size": 16384,
      "offset": 466944
    },
    "/model/layers.6/mlp/down_proj/MatMulNBits/output_0.out5_4_20": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 483328
    },
    "/model/layers.7/input_layernorm/output_3.out4_13": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 489472
    },
    "/model/layers.7/input_layernorm/output_0.out4_13": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 495616
    },
    "/model/layers.7/attn/qk_proj/MatMulNBits/output_0.out5_4_21": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        6144
      ],
      "size_in_bytes": 12288,
      "op_tensor_size": 12288,
      "offset": 501760
    },
    "/model/layers.7/attn/GroupQueryAttention/output_0.out2_7": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 514048
    },
    "/model/layers.7/attn/o_proj/MatMulNBits/output_0.out5_4_22": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 520192
    },
    "/model/layers.7/post_attention_layernorm/output_3.out4_14": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 526336
    },
    "/model/layers.7/post_attention_layernorm/output_0.out4_14": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 532480
    },
    "/model/layers.7/mlp/Mul/output_0.out3_7": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        8192
      ],
      "size_in_bytes": 16384,
      "op_tensor_size": 16384,
      "offset": 538624
    },
    "/model/layers.7/mlp/down_proj/MatMulNBits/output_0.out5_4_23": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 555008
    },
    "/model/layers.8/input_layernorm/output_3.out4_15": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 561152
    },
    "/model/layers.8/input_layernorm/output_0.out4_15": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 567296
    },
    "/model/layers.8/attn/qk_proj/MatMulNBits/output_0.out5_4_24": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        6144
      ],
      "size_in_bytes": 12288,
      "op_tensor_size": 12288,
      "offset": 573440
    },
    "/model/layers.8/attn/GroupQueryAttention/output_0.out2_8": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 585728
    },
    "/model/layers.8/attn/o_proj/MatMulNBits/output_0.out5_4_25": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 591872
    },
    "/model/layers.8/post_attention_layernorm/output_3.out4_16": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 598016
    },
    "/model/layers.8/post_attention_layernorm/output_0.out4_16": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 604160
    },
    "/model/layers.8/mlp/Mul/output_0.out3_8": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        8192
      ],
      "size_in_bytes": 16384,
      "op_tensor_size": 16384,
      "offset": 610304
    },
    "/model/layers.8/mlp/down_proj/MatMulNBits/output_0.out5_4_26": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 626688
    },
    "/model/layers.9/input_layernorm/output_3.out4_17": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 632832
    },
    "/model/layers.9/input_layernorm/output_0.out4_17": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 638976
    },
    "/model/layers.9/attn/qk_proj/MatMulNBits/output_0.out5_4_27": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        6144
      ],
      "size_in_bytes": 12288,
      "op_tensor_size": 12288,
      "offset": 645120
    },
    "/model/layers.9/attn/GroupQueryAttention/output_0.out2_9": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 657408
    },
    "/model/layers.9/attn/o_proj/MatMulNBits/output_0.out5_4_28": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 663552
    },
    "/model/layers.9/post_attention_layernorm/output_3.out4_18": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 669696
    },
    "/model/layers.9/post_attention_layernorm/output_0.out4_18": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 675840
    },
    "/model/layers.9/mlp/Mul/output_0.out3_9": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        8192
      ],
      "size_in_bytes": 16384,
      "op_tensor_size": 16384,
      "offset": 681984
    },
    "/model/layers.9/mlp/down_proj/MatMulNBits/output_0.out5_4_29": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 698368
    },
    "/model/layers.10/input_layernorm/output_3.out4_19": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 704512
    },
    "/model/layers.10/input_layernorm/output_0.out4_19": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 710656
    },
    "/model/layers.10/attn/qk_proj/MatMulNBits/output_0.out5_4_30": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        6144
      ],
      "size_in_bytes": 12288,
      "op_tensor_size": 12288,
      "offset": 716800
    },
    "/model/layers.10/attn/GroupQueryAttention/output_0.out2_10": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 729088
    },
    "/model/layers.10/attn/o_proj/MatMulNBits/output_0.out5_4_31": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 735232
    },
    "/model/layers.10/post_attention_layernorm/output_3.out4_20": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 741376
    },
    "/model/layers.10/post_attention_layernorm/output_0.out4_20": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 747520
    },
    "/model/layers.10/mlp/Mul/output_0.out3_10": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        8192
      ],
      "size_in_bytes": 16384,
      "op_tensor_size": 16384,
      "offset": 753664
    },
    "/model/layers.10/mlp/down_proj/MatMulNBits/output_0.out5_4_32": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 770048
    },
    "/model/layers.11/input_layernorm/output_3.out4_21": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 776192
    },
    "/model/layers.11/input_layernorm/output_0.out4_21": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 782336
    },
    "/model/layers.11/attn/qk_proj/MatMulNBits/output_0.out5_4_33": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        6144
      ],
      "size_in_bytes": 12288,
      "op_tensor_size": 12288,
      "offset": 788480
    },
    "/model/layers.11/attn/GroupQueryAttention/output_0.out2_11": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 800768
    },
    "/model/layers.11/attn/o_proj/MatMulNBits/output_0.out5_4_34": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 806912
    },
    "/model/layers.11/post_attention_layernorm/output_3.out4_22": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 813056
    },
    "/model/layers.11/post_attention_layernorm/output_0.out4_22": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 819200
    },
    "/model/layers.11/mlp/Mul/output_0.out3_11": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        8192
      ],
      "size_in_bytes": 16384,
      "op_tensor_size": 16384,
      "offset": 825344
    },
    "/model/layers.11/mlp/down_proj/MatMulNBits/output_0.out5_4_35": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 841728
    },
    "/model/layers.12/input_layernorm/output_3.out4_23": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 847872
    },
    "/model/layers.12/input_layernorm/output_0.out4_23": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 854016
    },
    "/model/layers.12/attn/qk_proj/MatMulNBits/output_0.out5_4_36": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        6144
      ],
      "size_in_bytes": 12288,
      "op_tensor_size": 12288,
      "offset": 860160
    },
    "/model/layers.12/attn/GroupQueryAttention/output_0.out2_12": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 872448
    },
    "/model/layers.12/attn/o_proj/MatMulNBits/output_0.out5_4_37": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 878592
    },
    "/model/layers.12/post_attention_layernorm/output_3.out4_24": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 884736
    },
    "/model/layers.12/post_attention_layernorm/output_0.out4_24": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 890880
    },
    "/model/layers.12/mlp/Mul/output_0.out3_12": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        8192
      ],
      "size_in_bytes": 16384,
      "op_tensor_size": 16384,
      "offset": 897024
    },
    "/model/layers.12/mlp/down_proj/MatMulNBits/output_0.out5_4_38": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 913408
    },
    "/model/layers.13/input_layernorm/output_3.out4_25": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 919552
    },
    "/model/layers.13/input_layernorm/output_0.out4_25": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 925696
    },
    "/model/layers.13/attn/qk_proj/MatMulNBits/output_0.out5_4_39": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        6144
      ],
      "size_in_bytes": 12288,
      "op_tensor_size": 12288,
      "offset": 931840
    },
    "/model/layers.13/attn/GroupQueryAttention/output_0.out2_13": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 944128
    },
    "/model/layers.13/attn/o_proj/MatMulNBits/output_0.out5_4_40": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 950272
    },
    "/model/layers.13/post_attention_layernorm/output_3.out4_26": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 956416
    },
    "/model/layers.13/post_attention_layernorm/output_0.out4_26": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 962560
    },
    "/model/layers.13/mlp/Mul/output_0.out3_13": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        8192
      ],
      "size_in_bytes": 16384,
      "op_tensor_size": 16384,
      "offset": 968704
    },
    "/model/layers.13/mlp/down_proj/MatMulNBits/output_0.out5_4_41": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 985088
    },
    "/model/layers.14/input_layernorm/output_3.out4_27": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 991232
    },
    "/model/layers.14/input_layernorm/output_0.out4_27": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 997376
    },
    "/model/layers.14/attn/qk_proj/MatMulNBits/output_0.out5_4_42": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        6144
      ],
      "size_in_bytes": 12288,
      "op_tensor_size": 12288,
      "offset": 1003520
    },
    "/model/layers.14/attn/GroupQueryAttention/output_0.out2_14": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 1015808
    },
    "/model/layers.14/attn/o_proj/MatMulNBits/output_0.out5_4_43": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 1021952
    },
    "/model/layers.14/post_attention_layernorm/output_3.out4_28": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 1028096
    },
    "/model/layers.14/post_attention_layernorm/output_0.out4_28": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 1034240
    },
    "/model/layers.14/mlp/Mul/output_0.out3_14": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        8192
      ],
      "size_in_bytes": 16384,
      "op_tensor_size": 16384,
      "offset": 1040384
    },
    "/model/layers.14/mlp/down_proj/MatMulNBits/output_0.out5_4_44": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 1056768
    },
    "/model/layers.15/input_layernorm/output_3.out4_29": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 1062912
    },
    "/model/layers.15/input_layernorm/output_0.out4_29": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 1069056
    },
    "/model/layers.15/attn/qk_proj/MatMulNBits/output_0.out5_4_45": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        6144
      ],
      "size_in_bytes": 12288,
      "op_tensor_size": 12288,
      "offset": 1075200
    },
    "/model/layers.15/attn/GroupQueryAttention/output_0.out2_15": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 1087488
    },
    "/model/layers.15/attn/o_proj/MatMulNBits/output_0.out5_4_46": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 1093632
    },
    "/model/layers.15/post_attention_layernorm/output_3.out4_30": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 1099776
    },
    "/model/layers.15/post_attention_layernorm/output_0.out4_30": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 1105920
    },
    "/model/layers.15/mlp/Mul/output_0.out3_15": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        8192
      ],
      "size_in_bytes": 16384,
      "op_tensor_size": 16384,
      "offset": 1112064
    },
    "/model/layers.15/mlp/down_proj/MatMulNBits/output_0.out5_4_47": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 1128448
    },
    "/model/layers.16/input_layernorm/output_3.out4_31": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 1134592
    },
    "/model/layers.16/input_layernorm/output_0.out4_31": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 1140736
    },
    "/model/layers.16/attn/qk_proj/MatMulNBits/output_0.out5_4_48": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        6144
      ],
      "size_in_bytes": 12288,
      "op_tensor_size": 12288,
      "offset": 1146880
    },
    "/model/layers.16/attn/GroupQueryAttention/output_0.out2_16": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 1159168
    },
    "/model/layers.16/attn/o_proj/MatMulNBits/output_0.out5_4_49": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 1165312
    },
    "/model/layers.16/post_attention_layernorm/output_3.out4_32": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 1171456
    },
    "/model/layers.16/post_attention_layernorm/output_0.out4_32": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 1177600
    },
    "/model/layers.16/mlp/Mul/output_0.out3_16": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        8192
      ],
      "size_in_bytes": 16384,
      "op_tensor_size": 16384,
      "offset": 1183744
    },
    "/model/layers.16/mlp/down_proj/MatMulNBits/output_0.out5_4_50": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 1200128
    },
    "/model/layers.17/input_layernorm/output_3.out4_33": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 1206272
    },
    "/model/layers.17/input_layernorm/output_0.out4_33": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 1212416
    },
    "/model/layers.17/attn/qk_proj/MatMulNBits/output_0.out5_4_51": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        6144
      ],
      "size_in_bytes": 12288,
      "op_tensor_size": 12288,
      "offset": 1218560
    },
    "/model/layers.17/attn/GroupQueryAttention/output_0.out2_17": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 1230848
    },
    "/model/layers.17/attn/o_proj/MatMulNBits/output_0.out5_4_52": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 1236992
    },
    "/model/layers.17/post_attention_layernorm/output_3.out4_34": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 1243136
    },
    "/model/layers.17/post_attention_layernorm/output_0.out4_34": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 1249280
    },
    "/model/layers.17/mlp/Mul/output_0.out3_17": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        8192
      ],
      "size_in_bytes": 16384,
      "op_tensor_size": 16384,
      "offset": 1255424
    },
    "/model/layers.17/mlp/down_proj/MatMulNBits/output_0.out5_4_53": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 1271808
    },
    "/model/layers.18/input_layernorm/output_3.out4_35": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 1277952
    },
    "/model/layers.18/input_layernorm/output_0.out4_35": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 1284096
    },
    "/model/layers.18/attn/qk_proj/MatMulNBits/output_0.out5_4_54": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        6144
      ],
      "size_in_bytes": 12288,
      "op_tensor_size": 12288,
      "offset": 1290240
    },
    "/model/layers.18/attn/GroupQueryAttention/output_0.out2_18": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 1302528
    },
    "/model/layers.18/attn/o_proj/MatMulNBits/output_0.out5_4_55": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 1308672
    },
    "/model/layers.18/post_attention_layernorm/output_3.out4_36": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 1314816
    },
    "/model/layers.18/post_attention_layernorm/output_0.out4_36": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 1320960
    },
    "/model/layers.18/mlp/Mul/output_0.out3_18": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        8192
      ],
      "size_in_bytes": 16384,
      "op_tensor_size": 16384,
      "offset": 1327104
    },
    "/model/layers.18/mlp/down_proj/MatMulNBits/output_0.out5_4_56": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 1343488
    },
    "/model/layers.19/input_layernorm/output_3.out4_37": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 1349632
    },
    "/model/layers.19/input_layernorm/output_0.out4_37": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 1355776
    },
    "/model/layers.19/attn/qk_proj/MatMulNBits/output_0.out5_4_57": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        6144
      ],
      "size_in_bytes": 12288,
      "op_tensor_size": 12288,
      "offset": 1361920
    },
    "/model/layers.19/attn/GroupQueryAttention/output_0.out2_19": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 1374208
    },
    "/model/layers.19/attn/o_proj/MatMulNBits/output_0.out5_4_58": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 1380352
    },
    "/model/layers.19/post_attention_layernorm/output_3.out4_38": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 1386496
    },
    "/model/layers.19/post_attention_layernorm/output_0.out4_38": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 1392640
    },
    "/model/layers.19/mlp/Mul/output_0.out3_19": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        8192
      ],
      "size_in_bytes": 16384,
      "op_tensor_size": 16384,
      "offset": 1398784
    },
    "/model/layers.19/mlp/down_proj/MatMulNBits/output_0.out5_4_59": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 1415168
    },
    "/model/layers.20/input_layernorm/output_3.out4_39": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 1421312
    },
    "/model/layers.20/input_layernorm/output_0.out4_39": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 1427456
    },
    "/model/layers.20/attn/qk_proj/MatMulNBits/output_0.out5_4_60": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        6144
      ],
      "size_in_bytes": 12288,
      "op_tensor_size": 12288,
      "offset": 1433600
    },
    "/model/layers.20/attn/GroupQueryAttention/output_0.out2_20": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 1445888
    },
    "/model/layers.20/attn/o_proj/MatMulNBits/output_0.out5_4_61": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 1452032
    },
    "/model/layers.20/post_attention_layernorm/output_3.out4_40": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 1458176
    },
    "/model/layers.20/post_attention_layernorm/output_0.out4_40": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 1464320
    },
    "/model/layers.20/mlp/Mul/output_0.out3_20": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        8192
      ],
      "size_in_bytes": 16384,
      "op_tensor_size": 16384,
      "offset": 1470464
    },
    "/model/layers.20/mlp/down_proj/MatMulNBits/output_0.out5_4_62": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 1486848
    },
    "/model/layers.21/input_layernorm/output_3.out4_41": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 1492992
    },
    "/model/layers.21/input_layernorm/output_0.out4_41": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 1499136
    },
    "/model/layers.21/attn/qk_proj/MatMulNBits/output_0.out5_4_63": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        6144
      ],
      "size_in_bytes": 12288,
      "op_tensor_size": 12288,
      "offset": 1505280
    },
    "/model/layers.21/attn/GroupQueryAttention/output_0.out2_21": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 1517568
    },
    "/model/layers.21/attn/o_proj/MatMulNBits/output_0.out5_4_64": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 1523712
    },
    "/model/layers.21/post_attention_layernorm/output_3.out4_42": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 1529856
    },
    "/model/layers.21/post_attention_layernorm/output_0.out4_42": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 1536000
    },
    "/model/layers.21/mlp/Mul/output_0.out3_21": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        8192
      ],
      "size_in_bytes": 16384,
      "op_tensor_size": 16384,
      "offset": 1542144
    },
    "/model/layers.21/mlp/down_proj/MatMulNBits/output_0.out5_4_65": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 1558528
    },
    "/model/layers.22/input_layernorm/output_3.out4_43": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 1564672
    },
    "/model/layers.22/input_layernorm/output_0.out4_43": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 1570816
    },
    "/model/layers.22/attn/qk_proj/MatMulNBits/output_0.out5_4_66": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        6144
      ],
      "size_in_bytes": 12288,
      "op_tensor_size": 12288,
      "offset": 1576960
    },
    "/model/layers.22/attn/GroupQueryAttention/output_0.out2_22": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 1589248
    },
    "/model/layers.22/attn/o_proj/MatMulNBits/output_0.out5_4_67": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 1595392
    },
    "/model/layers.22/post_attention_layernorm/output_3.out4_44": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 1601536
    },
    "/model/layers.22/post_attention_layernorm/output_0.out4_44": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 1607680
    },
    "/model/layers.22/mlp/Mul/output_0.out3_22": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        8192
      ],
      "size_in_bytes": 16384,
      "op_tensor_size": 16384,
      "offset": 1613824
    },
    "/model/layers.22/mlp/down_proj/MatMulNBits/output_0.out5_4_68": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 1630208
    },
    "/model/layers.23/input_layernorm/output_3.out4_45": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 1636352
    },
    "/model/layers.23/input_layernorm/output_0.out4_45": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 1642496
    },
    "/model/layers.23/attn/qk_proj/MatMulNBits/output_0.out5_4_69": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        6144
      ],
      "size_in_bytes": 12288,
      "op_tensor_size": 12288,
      "offset": 1648640
    },
    "/model/layers.23/attn/GroupQueryAttention/output_0.out2_23": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 1660928
    },
    "/model/layers.23/attn/o_proj/MatMulNBits/output_0.out5_4_70": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 1667072
    },
    "/model/layers.23/post_attention_layernorm/output_3.out4_46": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 1673216
    },
    "/model/layers.23/post_attention_layernorm/output_0.out4_46": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 1679360
    },
    "/model/layers.23/mlp/Mul/output_0.out3_23": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        8192
      ],
      "size_in_bytes": 16384,
      "op_tensor_size": 16384,
      "offset": 1685504
    },
    "/model/layers.23/mlp/down_proj/MatMulNBits/output_0.out5_4_71": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 1701888
    },
    "/model/layers.24/input_layernorm/output_3.out4_47": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 1708032
    },
    "/model/layers.24/input_layernorm/output_0.out4_47": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 1714176
    },
    "/model/layers.24/attn/qk_proj/MatMulNBits/output_0.out5_4_72": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        6144
      ],
      "size_in_bytes": 12288,
      "op_tensor_size": 12288,
      "offset": 1720320
    },
    "/model/layers.24/attn/GroupQueryAttention/output_0.out2_24": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 1732608
    },
    "/model/layers.24/attn/o_proj/MatMulNBits/output_0.out5_4_73": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 1738752
    },
    "/model/layers.24/post_attention_layernorm/output_3.out4_48": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 1744896
    },
    "/model/layers.24/post_attention_layernorm/output_0.out4_48": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 1751040
    },
    "/model/layers.24/mlp/Mul/output_0.out3_24": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        8192
      ],
      "size_in_bytes": 16384,
      "op_tensor_size": 16384,
      "offset": 1757184
    },
    "/model/layers.24/mlp/down_proj/MatMulNBits/output_0.out5_4_74": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 1773568
    },
    "/model/layers.25/input_layernorm/output_3.out4_49": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 1779712
    },
    "/model/layers.25/input_layernorm/output_0.out4_49": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 1785856
    },
    "/model/layers.25/attn/qk_proj/MatMulNBits/output_0.out5_4_75": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        6144
      ],
      "size_in_bytes": 12288,
      "op_tensor_size": 12288,
      "offset": 1792000
    },
    "/model/layers.25/attn/GroupQueryAttention/output_0.out2_25": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 1804288
    },
    "/model/layers.25/attn/o_proj/MatMulNBits/output_0.out5_4_76": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 1810432
    },
    "/model/layers.25/post_attention_layernorm/output_3.out4_50": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 1816576
    },
    "/model/layers.25/post_attention_layernorm/output_0.out4_50": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 1822720
    },
    "/model/layers.25/mlp/Mul/output_0.out3_25": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        8192
      ],
      "size_in_bytes": 16384,
      "op_tensor_size": 16384,
      "offset": 1828864
    },
    "/model/layers.25/mlp/down_proj/MatMulNBits/output_0.out5_4_77": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 1845248
    },
    "/model/layers.26/input_layernorm/output_3.out4_51": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 1851392
    },
    "/model/layers.26/input_layernorm/output_0.out4_51": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 1857536
    },
    "/model/layers.26/attn/qk_proj/MatMulNBits/output_0.out5_4_78": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        6144
      ],
      "size_in_bytes": 12288,
      "op_tensor_size": 12288,
      "offset": 1863680
    },
    "/model/layers.26/attn/GroupQueryAttention/output_0.out2_26": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 1875968
    },
    "/model/layers.26/attn/o_proj/MatMulNBits/output_0.out5_4_79": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 1882112
    },
    "/model/layers.26/post_attention_layernorm/output_3.out4_52": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 1888256
    },
    "/model/layers.26/post_attention_layernorm/output_0.out4_52": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 1894400
    },
    "/model/layers.26/mlp/Mul/output_0.out3_26": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        8192
      ],
      "size_in_bytes": 16384,
      "op_tensor_size": 16384,
      "offset": 1900544
    },
    "/model/layers.26/mlp/down_proj/MatMulNBits/output_0.out5_4_80": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 1916928
    },
    "/model/layers.27/input_layernorm/output_3.out4_53": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 1923072
    },
    "/model/layers.27/input_layernorm/output_0.out4_53": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 1929216
    },
    "/model/layers.27/attn/qk_proj/MatMulNBits/output_0.out5_4_81": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        6144
      ],
      "size_in_bytes": 12288,
      "op_tensor_size": 12288,
      "offset": 1935360
    },
    "/model/layers.27/attn/GroupQueryAttention/output_0.out2_27": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 1947648
    },
    "/model/layers.27/attn/o_proj/MatMulNBits/output_0.out5_4_82": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 1953792
    },
    "/model/layers.27/post_attention_layernorm/output_3.out4_54": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 1959936
    },
    "/model/layers.27/post_attention_layernorm/output_0.out4_54": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 1966080
    },
    "/model/layers.27/mlp/Mul/output_0.out3_27": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        8192
      ],
      "size_in_bytes": 16384,
      "op_tensor_size": 16384,
      "offset": 1972224
    },
    "/model/layers.27/mlp/down_proj/MatMulNBits/output_0.out5_4_83": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 1988608
    },
    "/model/layers.28/input_layernorm/output_3.out4_55": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 1994752
    },
    "/model/layers.28/input_layernorm/output_0.out4_55": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 2000896
    },
    "/model/layers.28/attn/qk_proj/MatMulNBits/output_0.out5_4_84": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        6144
      ],
      "size_in_bytes": 12288,
      "op_tensor_size": 12288,
      "offset": 2007040
    },
    "/model/layers.28/attn/GroupQueryAttention/output_0.out2_28": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 2019328
    },
    "/model/layers.28/attn/o_proj/MatMulNBits/output_0.out5_4_85": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 2025472
    },
    "/model/layers.28/post_attention_layernorm/output_3.out4_56": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 2031616
    },
    "/model/layers.28/post_attention_layernorm/output_0.out4_56": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 2037760
    },
    "/model/layers.28/mlp/Mul/output_0.out3_28": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        8192
      ],
      "size_in_bytes": 16384,
      "op_tensor_size": 16384,
      "offset": 2043904
    },
    "/model/layers.28/mlp/down_proj/MatMulNBits/output_0.out5_4_86": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 2060288
    },
    "/model/layers.29/input_layernorm/output_3.out4_57": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 2066432
    },
    "/model/layers.29/input_layernorm/output_0.out4_57": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 2072576
    },
    "/model/layers.29/attn/qk_proj/MatMulNBits/output_0.out5_4_87": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        6144
      ],
      "size_in_bytes": 12288,
      "op_tensor_size": 12288,
      "offset": 2078720
    },
    "/model/layers.29/attn/GroupQueryAttention/output_0.out2_29": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 2091008
    },
    "/model/layers.29/attn/o_proj/MatMulNBits/output_0.out5_4_88": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 2097152
    },
    "/model/layers.29/post_attention_layernorm/output_3.out4_58": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 2103296
    },
    "/model/layers.29/post_attention_layernorm/output_0.out4_58": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 2109440
    },
    "/model/layers.29/mlp/Mul/output_0.out3_29": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        8192
      ],
      "size_in_bytes": 16384,
      "op_tensor_size": 16384,
      "offset": 2115584
    },
    "/model/layers.29/mlp/down_proj/MatMulNBits/output_0.out5_4_89": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 2131968
    },
    "/model/layers.30/input_layernorm/output_3.out4_59": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 2138112
    },
    "/model/layers.30/input_layernorm/output_0.out4_59": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 2144256
    },
    "/model/layers.30/attn/qk_proj/MatMulNBits/output_0.out5_4_90": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        6144
      ],
      "size_in_bytes": 12288,
      "op_tensor_size": 12288,
      "offset": 2150400
    },
    "/model/layers.30/attn/GroupQueryAttention/output_0.out2_30": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 2162688
    },
    "/model/layers.30/attn/o_proj/MatMulNBits/output_0.out5_4_91": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 2168832
    },
    "/model/layers.30/post_attention_layernorm/output_3.out4_60": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 2174976
    },
    "/model/layers.30/post_attention_layernorm/output_0.out4_60": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 2181120
    },
    "/model/layers.30/mlp/Mul/output_0.out3_30": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        8192
      ],
      "size_in_bytes": 16384,
      "op_tensor_size": 16384,
      "offset": 2187264
    },
    "/model/layers.30/mlp/down_proj/MatMulNBits/output_0.out5_4_92": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 2203648
    },
    "/model/layers.31/input_layernorm/output_3.out4_61": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 2209792
    },
    "/model/layers.31/input_layernorm/output_0.out4_61": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 2215936
    },
    "/model/layers.31/attn/qk_proj/MatMulNBits/output_0.out5_4_93": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        6144
      ],
      "size_in_bytes": 12288,
      "op_tensor_size": 12288,
      "offset": 2222080
    },
    "/model/layers.31/attn/GroupQueryAttention/output_0.out2_31": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 2234368
    },
    "/model/layers.31/attn/o_proj/MatMulNBits/output_0.out5_4_94": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 2240512
    },
    "/model/layers.31/post_attention_layernorm/output_3.out4_62": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 2246656
    },
    "/model/layers.31/post_attention_layernorm/output_0.out4_62": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 2252800
    },
    "/model/layers.31/mlp/Mul/output_0.out3_31": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        8192
      ],
      "size_in_bytes": 16384,
      "op_tensor_size": 16384,
      "offset": 2258944
    },
    "/model/layers.31/mlp/down_proj/MatMulNBits/output_0.out5_4_95": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 2275328
    },
    "/model/layers.32/final_norm_layernorm/output_0.out4_63": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 2281472
    },
    "model.layers.0.attn.qk_proj.MatMulNBits.qweight.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        3072,
        6144
      ],
      "size_in_bytes": 18874368,
      "op_tensor_size": 18874368,
      "offset": 0,
      "file_name": ".cache\\MatMulNBits_2_0_0.const",
      "file_size": 18874368
    },
    "model.layers.0.attn.qk_proj.MatMulNBits.bias.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        6144
      ],
      "size_in_bytes": 24576,
      "op_tensor_size": 24576,
      "offset": 18874368,
      "file_name": ".cache\\MatMulNBits_2_0_1.const",
      "file_size": 24576
    },
    "model.layers.0.attn.qk_proj.MatMulNBits.scales.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        147456
      ],
      "size_in_bytes": 589824,
      "op_tensor_size": 589824,
      "offset": 18898944,
      "file_name": ".cache\\MatMulNBits_2_0_2.const",
      "file_size": 589824
    },
    "model.layers.0.attn.qk_proj.MatMulNBits.qzeros.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        147456
      ],
      "size_in_bytes": 147456,
      "op_tensor_size": 147456,
      "offset": 19488768,
      "file_name": ".cache\\MatMulNBits_2_0_3.const",
      "file_size": 147456
    },
    "model.layers.0.attn.v_proj.MatMulNBits.qweight.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        3072,
        3072
      ],
      "size_in_bytes": 9437184,
      "op_tensor_size": 9437184,
      "offset": 19636224,
      "file_name": ".cache\\MatMulNBits_2_0_4.const",
      "file_size": 9437184
    },
    "model.layers.0.attn.v_proj.MatMulNBits.bias.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        3072
      ],
      "size_in_bytes": 12288,
      "op_tensor_size": 12288,
      "offset": 29073408,
      "file_name": ".cache\\MatMulNBits_2_0_5.const",
      "file_size": 12288
    },
    "model.layers.0.attn.v_proj.MatMulNBits.scales.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        73728
      ],
      "size_in_bytes": 294912,
      "op_tensor_size": 294912,
      "offset": 29085696,
      "file_name": ".cache\\MatMulNBits_2_0_6.const",
      "file_size": 294912
    },
    "model.layers.0.attn.v_proj.MatMulNBits.qzeros.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        73728
      ],
      "size_in_bytes": 73728,
      "op_tensor_size": 73728,
      "offset": 29380608,
      "file_name": ".cache\\MatMulNBits_2_0_7.const",
      "file_size": 73728
    },
    "model.layers.0.attn.o_proj.MatMulNBits.qweight.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        3072,
        3072
      ],
      "size_in_bytes": 9437184,
      "op_tensor_size": 9437184,
      "offset": 29454336,
      "file_name": ".cache\\MatMulNBits_2_0_8.const",
      "file_size": 9437184
    },
    "model.layers.0.attn.o_proj.MatMulNBits.bias.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        3072
      ],
      "size_in_bytes": 12288,
      "op_tensor_size": 12288,
      "offset": 38891520,
      "file_name": ".cache\\MatMulNBits_2_0_9.const",
      "file_size": 12288
    },
    "model.layers.0.attn.o_proj.MatMulNBits.scales.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        73728
      ],
      "size_in_bytes": 294912,
      "op_tensor_size": 294912,
      "offset": 38903808,
      "file_name": ".cache\\MatMulNBits_2_0_10.const",
      "file_size": 294912
    },
    "model.layers.0.attn.o_proj.MatMulNBits.qzeros.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        73728
      ],
      "size_in_bytes": 73728,
      "op_tensor_size": 73728,
      "offset": 39198720,
      "file_name": ".cache\\MatMulNBits_2_0_11.const",
      "file_size": 73728
    },
    "model.layers.0.post_attention_layernorm.weight.bf": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "bfloat16",
      "shape": [
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 39272448,
      "file_name": ".cache\\MatMulNBits_2_0_12.const",
      "file_size": 6144
    },
    "model.layers.0.mlp.gate_proj.MatMulNBits.qweight": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "uint8",
      "shape": [
        8192,
        24,
        64
      ],
      "size_in_bytes": 12582912,
      "op_tensor_size": 12582912,
      "offset": 39278592,
      "file_name": ".cache\\MatMulNBits_2_0_13.const",
      "file_size": 12582912
    },
    "model.layers.0.mlp.gate_proj.MatMulNBits.scales.f": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        196608
      ],
      "size_in_bytes": 786432,
      "op_tensor_size": 786432,
      "offset": 51861504,
      "file_name": ".cache\\MatMulNBits_2_0_14.const",
      "file_size": 786432
    },
    "model.layers.0.mlp.gate_proj.MatMulNBits.qzeros": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "uint8",
      "shape": [
        98304
      ],
      "size_in_bytes": 98304,
      "op_tensor_size": 98304,
      "offset": 52647936,
      "file_name": ".cache\\MatMulNBits_2_0_15.const",
      "file_size": 98304
    },
    "model.layers.0.mlp.gate_proj.MatMulNBits.bias.f": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        8192
      ],
      "size_in_bytes": 32768,
      "op_tensor_size": 32768,
      "offset": 52746240,
      "file_name": ".cache\\MatMulNBits_2_0_16.const",
      "file_size": 32768
    },
    "model.layers.0.mlp.up_proj.MatMulNBits.qweight": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "uint8",
      "shape": [
        8192,
        24,
        64
      ],
      "size_in_bytes": 12582912,
      "op_tensor_size": 12582912,
      "offset": 52779008,
      "file_name": ".cache\\MatMulNBits_2_0_17.const",
      "file_size": 12582912
    },
    "model.layers.0.mlp.up_proj.MatMulNBits.scales.f": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        196608
      ],
      "size_in_bytes": 786432,
      "op_tensor_size": 786432,
      "offset": 65361920,
      "file_name": ".cache\\MatMulNBits_2_0_18.const",
      "file_size": 786432
    },
    "model.layers.0.mlp.up_proj.MatMulNBits.qzeros": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "uint8",
      "shape": [
        98304
      ],
      "size_in_bytes": 98304,
      "op_tensor_size": 98304,
      "offset": 66148352,
      "file_name": ".cache\\MatMulNBits_2_0_19.const",
      "file_size": 98304
    },
    "model.layers.0.mlp.up_proj.MatMulNBits.bias.f": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        8192
      ],
      "size_in_bytes": 32768,
      "op_tensor_size": 32768,
      "offset": 66246656,
      "file_name": ".cache\\MatMulNBits_2_0_20.const",
      "file_size": 32768
    },
    "model.layers.0.mlp.down_proj.MatMulNBits.qweight.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        8192,
        3072
      ],
      "size_in_bytes": 25165824,
      "op_tensor_size": 25165824,
      "offset": 66279424,
      "file_name": ".cache\\MatMulNBits_2_0_21.const",
      "file_size": 25165824
    },
    "model.layers.0.mlp.down_proj.MatMulNBits.bias.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        3072
      ],
      "size_in_bytes": 12288,
      "op_tensor_size": 12288,
      "offset": 91445248,
      "file_name": ".cache\\MatMulNBits_2_0_22.const",
      "file_size": 12288
    },
    "model.layers.0.mlp.down_proj.MatMulNBits.scales.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        196608
      ],
      "size_in_bytes": 786432,
      "op_tensor_size": 786432,
      "offset": 91457536,
      "file_name": ".cache\\MatMulNBits_2_0_23.const",
      "file_size": 786432
    },
    "model.layers.0.mlp.down_proj.MatMulNBits.qzeros.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        196608
      ],
      "size_in_bytes": 196608,
      "op_tensor_size": 196608,
      "offset": 92243968,
      "file_name": ".cache\\MatMulNBits_2_0_24.const",
      "file_size": 196608
    },
    "model.layers.1.input_layernorm.weight.bf": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "bfloat16",
      "shape": [
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 92440576,
      "file_name": ".cache\\MatMulNBits_2_0_25.const",
      "file_size": 6144
    },
    "model.layers.1.attn.qk_proj.MatMulNBits.qweight.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        3072,
        6144
      ],
      "size_in_bytes": 18874368,
      "op_tensor_size": 18874368,
      "offset": 92446720,
      "file_name": ".cache\\MatMulNBits_2_0_26.const",
      "file_size": 18874368
    },
    "model.layers.1.attn.qk_proj.MatMulNBits.bias.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        6144
      ],
      "size_in_bytes": 24576,
      "op_tensor_size": 24576,
      "offset": 111321088,
      "file_name": ".cache\\MatMulNBits_2_0_27.const",
      "file_size": 24576
    },
    "model.layers.1.attn.qk_proj.MatMulNBits.scales.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        147456
      ],
      "size_in_bytes": 589824,
      "op_tensor_size": 589824,
      "offset": 111345664,
      "file_name": ".cache\\MatMulNBits_2_0_28.const",
      "file_size": 589824
    },
    "model.layers.1.attn.qk_proj.MatMulNBits.qzeros.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        147456
      ],
      "size_in_bytes": 147456,
      "op_tensor_size": 147456,
      "offset": 111935488,
      "file_name": ".cache\\MatMulNBits_2_0_29.const",
      "file_size": 147456
    },
    "model.layers.1.attn.v_proj.MatMulNBits.qweight.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        3072,
        3072
      ],
      "size_in_bytes": 9437184,
      "op_tensor_size": 9437184,
      "offset": 112082944,
      "file_name": ".cache\\MatMulNBits_2_0_30.const",
      "file_size": 9437184
    },
    "model.layers.1.attn.v_proj.MatMulNBits.bias.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        3072
      ],
      "size_in_bytes": 12288,
      "op_tensor_size": 12288,
      "offset": 121520128,
      "file_name": ".cache\\MatMulNBits_2_0_31.const",
      "file_size": 12288
    },
    "model.layers.1.attn.v_proj.MatMulNBits.scales.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        73728
      ],
      "size_in_bytes": 294912,
      "op_tensor_size": 294912,
      "offset": 121532416,
      "file_name": ".cache\\MatMulNBits_2_0_32.const",
      "file_size": 294912
    },
    "model.layers.1.attn.v_proj.MatMulNBits.qzeros.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        73728
      ],
      "size_in_bytes": 73728,
      "op_tensor_size": 73728,
      "offset": 121827328,
      "file_name": ".cache\\MatMulNBits_2_0_33.const",
      "file_size": 73728
    },
    "model.layers.1.attn.o_proj.MatMulNBits.qweight.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        3072,
        3072
      ],
      "size_in_bytes": 9437184,
      "op_tensor_size": 9437184,
      "offset": 121901056,
      "file_name": ".cache\\MatMulNBits_2_0_34.const",
      "file_size": 9437184
    },
    "model.layers.1.attn.o_proj.MatMulNBits.bias.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        3072
      ],
      "size_in_bytes": 12288,
      "op_tensor_size": 12288,
      "offset": 131338240,
      "file_name": ".cache\\MatMulNBits_2_0_35.const",
      "file_size": 12288
    },
    "model.layers.1.attn.o_proj.MatMulNBits.scales.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        73728
      ],
      "size_in_bytes": 294912,
      "op_tensor_size": 294912,
      "offset": 131350528,
      "file_name": ".cache\\MatMulNBits_2_0_36.const",
      "file_size": 294912
    },
    "model.layers.1.attn.o_proj.MatMulNBits.qzeros.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        73728
      ],
      "size_in_bytes": 73728,
      "op_tensor_size": 73728,
      "offset": 131645440,
      "file_name": ".cache\\MatMulNBits_2_0_37.const",
      "file_size": 73728
    },
    "model.layers.1.post_attention_layernorm.weight.bf": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "bfloat16",
      "shape": [
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 131719168,
      "file_name": ".cache\\MatMulNBits_2_0_38.const",
      "file_size": 6144
    },
    "model.layers.1.mlp.gate_proj.MatMulNBits.qweight": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "uint8",
      "shape": [
        8192,
        24,
        64
      ],
      "size_in_bytes": 12582912,
      "op_tensor_size": 12582912,
      "offset": 131725312,
      "file_name": ".cache\\MatMulNBits_2_0_39.const",
      "file_size": 12582912
    },
    "model.layers.1.mlp.gate_proj.MatMulNBits.scales.f": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        196608
      ],
      "size_in_bytes": 786432,
      "op_tensor_size": 786432,
      "offset": 144308224,
      "file_name": ".cache\\MatMulNBits_2_0_40.const",
      "file_size": 786432
    },
    "model.layers.1.mlp.gate_proj.MatMulNBits.qzeros": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "uint8",
      "shape": [
        98304
      ],
      "size_in_bytes": 98304,
      "op_tensor_size": 98304,
      "offset": 145094656,
      "file_name": ".cache\\MatMulNBits_2_0_41.const",
      "file_size": 98304
    },
    "model.layers.1.mlp.gate_proj.MatMulNBits.bias.f": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        8192
      ],
      "size_in_bytes": 32768,
      "op_tensor_size": 32768,
      "offset": 145192960,
      "file_name": ".cache\\MatMulNBits_2_0_42.const",
      "file_size": 32768
    },
    "model.layers.1.mlp.up_proj.MatMulNBits.qweight": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "uint8",
      "shape": [
        8192,
        24,
        64
      ],
      "size_in_bytes": 12582912,
      "op_tensor_size": 12582912,
      "offset": 145225728,
      "file_name": ".cache\\MatMulNBits_2_0_43.const",
      "file_size": 12582912
    },
    "model.layers.1.mlp.up_proj.MatMulNBits.scales.f": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        196608
      ],
      "size_in_bytes": 786432,
      "op_tensor_size": 786432,
      "offset": 157808640,
      "file_name": ".cache\\MatMulNBits_2_0_44.const",
      "file_size": 786432
    },
    "model.layers.1.mlp.up_proj.MatMulNBits.qzeros": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "uint8",
      "shape": [
        98304
      ],
      "size_in_bytes": 98304,
      "op_tensor_size": 98304,
      "offset": 158595072,
      "file_name": ".cache\\MatMulNBits_2_0_45.const",
      "file_size": 98304
    },
    "model.layers.1.mlp.up_proj.MatMulNBits.bias.f": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        8192
      ],
      "size_in_bytes": 32768,
      "op_tensor_size": 32768,
      "offset": 158693376,
      "file_name": ".cache\\MatMulNBits_2_0_46.const",
      "file_size": 32768
    },
    "model.layers.1.mlp.down_proj.MatMulNBits.qweight.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        8192,
        3072
      ],
      "size_in_bytes": 25165824,
      "op_tensor_size": 25165824,
      "offset": 158726144,
      "file_name": ".cache\\MatMulNBits_2_0_47.const",
      "file_size": 25165824
    },
    "model.layers.1.mlp.down_proj.MatMulNBits.bias.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        3072
      ],
      "size_in_bytes": 12288,
      "op_tensor_size": 12288,
      "offset": 183891968,
      "file_name": ".cache\\MatMulNBits_2_0_48.const",
      "file_size": 12288
    },
    "model.layers.1.mlp.down_proj.MatMulNBits.scales.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        196608
      ],
      "size_in_bytes": 786432,
      "op_tensor_size": 786432,
      "offset": 183904256,
      "file_name": ".cache\\MatMulNBits_2_0_49.const",
      "file_size": 786432
    },
    "model.layers.1.mlp.down_proj.MatMulNBits.qzeros.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        196608
      ],
      "size_in_bytes": 196608,
      "op_tensor_size": 196608,
      "offset": 184690688,
      "file_name": ".cache\\MatMulNBits_2_0_50.const",
      "file_size": 196608
    },
    "model.layers.2.input_layernorm.weight.bf": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "bfloat16",
      "shape": [
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 184887296,
      "file_name": ".cache\\MatMulNBits_2_0_51.const",
      "file_size": 6144
    },
    "model.layers.2.attn.qk_proj.MatMulNBits.qweight.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        3072,
        6144
      ],
      "size_in_bytes": 18874368,
      "op_tensor_size": 18874368,
      "offset": 184893440,
      "file_name": ".cache\\MatMulNBits_2_0_52.const",
      "file_size": 18874368
    },
    "model.layers.2.attn.qk_proj.MatMulNBits.bias.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        6144
      ],
      "size_in_bytes": 24576,
      "op_tensor_size": 24576,
      "offset": 203767808,
      "file_name": ".cache\\MatMulNBits_2_0_53.const",
      "file_size": 24576
    },
    "model.layers.2.attn.qk_proj.MatMulNBits.scales.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        147456
      ],
      "size_in_bytes": 589824,
      "op_tensor_size": 589824,
      "offset": 203792384,
      "file_name": ".cache\\MatMulNBits_2_0_54.const",
      "file_size": 589824
    },
    "model.layers.2.attn.qk_proj.MatMulNBits.qzeros.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        147456
      ],
      "size_in_bytes": 147456,
      "op_tensor_size": 147456,
      "offset": 204382208,
      "file_name": ".cache\\MatMulNBits_2_0_55.const",
      "file_size": 147456
    },
    "model.layers.2.attn.v_proj.MatMulNBits.qweight.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        3072,
        3072
      ],
      "size_in_bytes": 9437184,
      "op_tensor_size": 9437184,
      "offset": 204529664,
      "file_name": ".cache\\MatMulNBits_2_0_56.const",
      "file_size": 9437184
    },
    "model.layers.2.attn.v_proj.MatMulNBits.bias.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        3072
      ],
      "size_in_bytes": 12288,
      "op_tensor_size": 12288,
      "offset": 213966848,
      "file_name": ".cache\\MatMulNBits_2_0_57.const",
      "file_size": 12288
    },
    "model.layers.2.attn.v_proj.MatMulNBits.scales.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        73728
      ],
      "size_in_bytes": 294912,
      "op_tensor_size": 294912,
      "offset": 213979136,
      "file_name": ".cache\\MatMulNBits_2_0_58.const",
      "file_size": 294912
    },
    "model.layers.2.attn.v_proj.MatMulNBits.qzeros.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        73728
      ],
      "size_in_bytes": 73728,
      "op_tensor_size": 73728,
      "offset": 214274048,
      "file_name": ".cache\\MatMulNBits_2_0_59.const",
      "file_size": 73728
    },
    "model.layers.2.attn.o_proj.MatMulNBits.qweight.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        3072,
        3072
      ],
      "size_in_bytes": 9437184,
      "op_tensor_size": 9437184,
      "offset": 214347776,
      "file_name": ".cache\\MatMulNBits_2_0_60.const",
      "file_size": 9437184
    },
    "model.layers.2.attn.o_proj.MatMulNBits.bias.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        3072
      ],
      "size_in_bytes": 12288,
      "op_tensor_size": 12288,
      "offset": 223784960,
      "file_name": ".cache\\MatMulNBits_2_0_61.const",
      "file_size": 12288
    },
    "model.layers.2.attn.o_proj.MatMulNBits.scales.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        73728
      ],
      "size_in_bytes": 294912,
      "op_tensor_size": 294912,
      "offset": 223797248,
      "file_name": ".cache\\MatMulNBits_2_0_62.const",
      "file_size": 294912
    },
    "model.layers.2.attn.o_proj.MatMulNBits.qzeros.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        73728
      ],
      "size_in_bytes": 73728,
      "op_tensor_size": 73728,
      "offset": 224092160,
      "file_name": ".cache\\MatMulNBits_2_0_63.const",
      "file_size": 73728
    },
    "model.layers.2.post_attention_layernorm.weight.bf": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "bfloat16",
      "shape": [
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 224165888,
      "file_name": ".cache\\MatMulNBits_2_0_64.const",
      "file_size": 6144
    },
    "model.layers.2.mlp.gate_proj.MatMulNBits.qweight": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "uint8",
      "shape": [
        8192,
        24,
        64
      ],
      "size_in_bytes": 12582912,
      "op_tensor_size": 12582912,
      "offset": 224172032,
      "file_name": ".cache\\MatMulNBits_2_0_65.const",
      "file_size": 12582912
    },
    "model.layers.2.mlp.gate_proj.MatMulNBits.scales.f": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        196608
      ],
      "size_in_bytes": 786432,
      "op_tensor_size": 786432,
      "offset": 236754944,
      "file_name": ".cache\\MatMulNBits_2_0_66.const",
      "file_size": 786432
    },
    "model.layers.2.mlp.gate_proj.MatMulNBits.qzeros": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "uint8",
      "shape": [
        98304
      ],
      "size_in_bytes": 98304,
      "op_tensor_size": 98304,
      "offset": 237541376,
      "file_name": ".cache\\MatMulNBits_2_0_67.const",
      "file_size": 98304
    },
    "model.layers.2.mlp.gate_proj.MatMulNBits.bias.f": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        8192
      ],
      "size_in_bytes": 32768,
      "op_tensor_size": 32768,
      "offset": 237639680,
      "file_name": ".cache\\MatMulNBits_2_0_68.const",
      "file_size": 32768
    },
    "model.layers.2.mlp.up_proj.MatMulNBits.qweight": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "uint8",
      "shape": [
        8192,
        24,
        64
      ],
      "size_in_bytes": 12582912,
      "op_tensor_size": 12582912,
      "offset": 237672448,
      "file_name": ".cache\\MatMulNBits_2_0_69.const",
      "file_size": 12582912
    },
    "model.layers.2.mlp.up_proj.MatMulNBits.scales.f": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        196608
      ],
      "size_in_bytes": 786432,
      "op_tensor_size": 786432,
      "offset": 250255360,
      "file_name": ".cache\\MatMulNBits_2_0_70.const",
      "file_size": 786432
    },
    "model.layers.2.mlp.up_proj.MatMulNBits.qzeros": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "uint8",
      "shape": [
        98304
      ],
      "size_in_bytes": 98304,
      "op_tensor_size": 98304,
      "offset": 251041792,
      "file_name": ".cache\\MatMulNBits_2_0_71.const",
      "file_size": 98304
    },
    "model.layers.2.mlp.up_proj.MatMulNBits.bias.f": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        8192
      ],
      "size_in_bytes": 32768,
      "op_tensor_size": 32768,
      "offset": 251140096,
      "file_name": ".cache\\MatMulNBits_2_0_72.const",
      "file_size": 32768
    },
    "model.layers.2.mlp.down_proj.MatMulNBits.qweight.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        8192,
        3072
      ],
      "size_in_bytes": 25165824,
      "op_tensor_size": 25165824,
      "offset": 251172864,
      "file_name": ".cache\\MatMulNBits_2_0_73.const",
      "file_size": 25165824
    },
    "model.layers.2.mlp.down_proj.MatMulNBits.bias.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        3072
      ],
      "size_in_bytes": 12288,
      "op_tensor_size": 12288,
      "offset": 276338688,
      "file_name": ".cache\\MatMulNBits_2_0_74.const",
      "file_size": 12288
    },
    "model.layers.2.mlp.down_proj.MatMulNBits.scales.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        196608
      ],
      "size_in_bytes": 786432,
      "op_tensor_size": 786432,
      "offset": 276350976,
      "file_name": ".cache\\MatMulNBits_2_0_75.const",
      "file_size": 786432
    },
    "model.layers.2.mlp.down_proj.MatMulNBits.qzeros.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        196608
      ],
      "size_in_bytes": 196608,
      "op_tensor_size": 196608,
      "offset": 277137408,
      "file_name": ".cache\\MatMulNBits_2_0_76.const",
      "file_size": 196608
    },
    "model.layers.3.input_layernorm.weight.bf": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "bfloat16",
      "shape": [
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 277334016,
      "file_name": ".cache\\MatMulNBits_2_0_77.const",
      "file_size": 6144
    },
    "model.layers.3.attn.qk_proj.MatMulNBits.qweight.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        3072,
        6144
      ],
      "size_in_bytes": 18874368,
      "op_tensor_size": 18874368,
      "offset": 277340160,
      "file_name": ".cache\\MatMulNBits_2_0_78.const",
      "file_size": 18874368
    },
    "model.layers.3.attn.qk_proj.MatMulNBits.bias.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        6144
      ],
      "size_in_bytes": 24576,
      "op_tensor_size": 24576,
      "offset": 296214528,
      "file_name": ".cache\\MatMulNBits_2_0_79.const",
      "file_size": 24576
    },
    "model.layers.3.attn.qk_proj.MatMulNBits.scales.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        147456
      ],
      "size_in_bytes": 589824,
      "op_tensor_size": 589824,
      "offset": 296239104,
      "file_name": ".cache\\MatMulNBits_2_0_80.const",
      "file_size": 589824
    },
    "model.layers.3.attn.qk_proj.MatMulNBits.qzeros.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        147456
      ],
      "size_in_bytes": 147456,
      "op_tensor_size": 147456,
      "offset": 296828928,
      "file_name": ".cache\\MatMulNBits_2_0_81.const",
      "file_size": 147456
    },
    "model.layers.3.attn.v_proj.MatMulNBits.qweight.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        3072,
        3072
      ],
      "size_in_bytes": 9437184,
      "op_tensor_size": 9437184,
      "offset": 296976384,
      "file_name": ".cache\\MatMulNBits_2_0_82.const",
      "file_size": 9437184
    },
    "model.layers.3.attn.v_proj.MatMulNBits.bias.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        3072
      ],
      "size_in_bytes": 12288,
      "op_tensor_size": 12288,
      "offset": 306413568,
      "file_name": ".cache\\MatMulNBits_2_0_83.const",
      "file_size": 12288
    },
    "model.layers.3.attn.v_proj.MatMulNBits.scales.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        73728
      ],
      "size_in_bytes": 294912,
      "op_tensor_size": 294912,
      "offset": 306425856,
      "file_name": ".cache\\MatMulNBits_2_0_84.const",
      "file_size": 294912
    },
    "model.layers.3.attn.v_proj.MatMulNBits.qzeros.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        73728
      ],
      "size_in_bytes": 73728,
      "op_tensor_size": 73728,
      "offset": 306720768,
      "file_name": ".cache\\MatMulNBits_2_0_85.const",
      "file_size": 73728
    },
    "model.layers.3.attn.o_proj.MatMulNBits.qweight.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        3072,
        3072
      ],
      "size_in_bytes": 9437184,
      "op_tensor_size": 9437184,
      "offset": 306794496,
      "file_name": ".cache\\MatMulNBits_2_0_86.const",
      "file_size": 9437184
    },
    "model.layers.3.attn.o_proj.MatMulNBits.bias.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        3072
      ],
      "size_in_bytes": 12288,
      "op_tensor_size": 12288,
      "offset": 316231680,
      "file_name": ".cache\\MatMulNBits_2_0_87.const",
      "file_size": 12288
    },
    "model.layers.3.attn.o_proj.MatMulNBits.scales.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        73728
      ],
      "size_in_bytes": 294912,
      "op_tensor_size": 294912,
      "offset": 316243968,
      "file_name": ".cache\\MatMulNBits_2_0_88.const",
      "file_size": 294912
    },
    "model.layers.3.attn.o_proj.MatMulNBits.qzeros.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        73728
      ],
      "size_in_bytes": 73728,
      "op_tensor_size": 73728,
      "offset": 316538880,
      "file_name": ".cache\\MatMulNBits_2_0_89.const",
      "file_size": 73728
    },
    "model.layers.3.post_attention_layernorm.weight.bf": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "bfloat16",
      "shape": [
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 316612608,
      "file_name": ".cache\\MatMulNBits_2_0_90.const",
      "file_size": 6144
    },
    "model.layers.3.mlp.gate_proj.MatMulNBits.qweight": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "uint8",
      "shape": [
        8192,
        24,
        64
      ],
      "size_in_bytes": 12582912,
      "op_tensor_size": 12582912,
      "offset": 316618752,
      "file_name": ".cache\\MatMulNBits_2_0_91.const",
      "file_size": 12582912
    },
    "model.layers.3.mlp.gate_proj.MatMulNBits.scales.f": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        196608
      ],
      "size_in_bytes": 786432,
      "op_tensor_size": 786432,
      "offset": 329201664,
      "file_name": ".cache\\MatMulNBits_2_0_92.const",
      "file_size": 786432
    },
    "model.layers.3.mlp.gate_proj.MatMulNBits.qzeros": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "uint8",
      "shape": [
        98304
      ],
      "size_in_bytes": 98304,
      "op_tensor_size": 98304,
      "offset": 329988096,
      "file_name": ".cache\\MatMulNBits_2_0_93.const",
      "file_size": 98304
    },
    "model.layers.3.mlp.gate_proj.MatMulNBits.bias.f": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        8192
      ],
      "size_in_bytes": 32768,
      "op_tensor_size": 32768,
      "offset": 330086400,
      "file_name": ".cache\\MatMulNBits_2_0_94.const",
      "file_size": 32768
    },
    "model.layers.3.mlp.up_proj.MatMulNBits.qweight": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "uint8",
      "shape": [
        8192,
        24,
        64
      ],
      "size_in_bytes": 12582912,
      "op_tensor_size": 12582912,
      "offset": 330119168,
      "file_name": ".cache\\MatMulNBits_2_0_95.const",
      "file_size": 12582912
    },
    "model.layers.3.mlp.up_proj.MatMulNBits.scales.f": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        196608
      ],
      "size_in_bytes": 786432,
      "op_tensor_size": 786432,
      "offset": 342702080,
      "file_name": ".cache\\MatMulNBits_2_0_96.const",
      "file_size": 786432
    },
    "model.layers.3.mlp.up_proj.MatMulNBits.qzeros": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "uint8",
      "shape": [
        98304
      ],
      "size_in_bytes": 98304,
      "op_tensor_size": 98304,
      "offset": 343488512,
      "file_name": ".cache\\MatMulNBits_2_0_97.const",
      "file_size": 98304
    },
    "model.layers.3.mlp.up_proj.MatMulNBits.bias.f": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        8192
      ],
      "size_in_bytes": 32768,
      "op_tensor_size": 32768,
      "offset": 343586816,
      "file_name": ".cache\\MatMulNBits_2_0_98.const",
      "file_size": 32768
    },
    "model.layers.3.mlp.down_proj.MatMulNBits.qweight.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        8192,
        3072
      ],
      "size_in_bytes": 25165824,
      "op_tensor_size": 25165824,
      "offset": 343619584,
      "file_name": ".cache\\MatMulNBits_2_0_99.const",
      "file_size": 25165824
    },
    "model.layers.3.mlp.down_proj.MatMulNBits.bias.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        3072
      ],
      "size_in_bytes": 12288,
      "op_tensor_size": 12288,
      "offset": 368785408,
      "file_name": ".cache\\MatMulNBits_2_0_100.const",
      "file_size": 12288
    },
    "model.layers.3.mlp.down_proj.MatMulNBits.scales.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        196608
      ],
      "size_in_bytes": 786432,
      "op_tensor_size": 786432,
      "offset": 368797696,
      "file_name": ".cache\\MatMulNBits_2_0_101.const",
      "file_size": 786432
    },
    "model.layers.3.mlp.down_proj.MatMulNBits.qzeros.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        196608
      ],
      "size_in_bytes": 196608,
      "op_tensor_size": 196608,
      "offset": 369584128,
      "file_name": ".cache\\MatMulNBits_2_0_102.const",
      "file_size": 196608
    },
    "model.layers.4.input_layernorm.weight.bf": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "bfloat16",
      "shape": [
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 369780736,
      "file_name": ".cache\\MatMulNBits_2_0_103.const",
      "file_size": 6144
    },
    "model.layers.4.attn.qk_proj.MatMulNBits.qweight.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        3072,
        6144
      ],
      "size_in_bytes": 18874368,
      "op_tensor_size": 18874368,
      "offset": 369786880,
      "file_name": ".cache\\MatMulNBits_2_0_104.const",
      "file_size": 18874368
    },
    "model.layers.4.attn.qk_proj.MatMulNBits.bias.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        6144
      ],
      "size_in_bytes": 24576,
      "op_tensor_size": 24576,
      "offset": 388661248,
      "file_name": ".cache\\MatMulNBits_2_0_105.const",
      "file_size": 24576
    },
    "model.layers.4.attn.qk_proj.MatMulNBits.scales.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        147456
      ],
      "size_in_bytes": 589824,
      "op_tensor_size": 589824,
      "offset": 388685824,
      "file_name": ".cache\\MatMulNBits_2_0_106.const",
      "file_size": 589824
    },
    "model.layers.4.attn.qk_proj.MatMulNBits.qzeros.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        147456
      ],
      "size_in_bytes": 147456,
      "op_tensor_size": 147456,
      "offset": 389275648,
      "file_name": ".cache\\MatMulNBits_2_0_107.const",
      "file_size": 147456
    },
    "model.layers.4.attn.v_proj.MatMulNBits.qweight.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        3072,
        3072
      ],
      "size_in_bytes": 9437184,
      "op_tensor_size": 9437184,
      "offset": 389423104,
      "file_name": ".cache\\MatMulNBits_2_0_108.const",
      "file_size": 9437184
    },
    "model.layers.4.attn.v_proj.MatMulNBits.bias.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        3072
      ],
      "size_in_bytes": 12288,
      "op_tensor_size": 12288,
      "offset": 398860288,
      "file_name": ".cache\\MatMulNBits_2_0_109.const",
      "file_size": 12288
    },
    "model.layers.4.attn.v_proj.MatMulNBits.scales.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        73728
      ],
      "size_in_bytes": 294912,
      "op_tensor_size": 294912,
      "offset": 398872576,
      "file_name": ".cache\\MatMulNBits_2_0_110.const",
      "file_size": 294912
    },
    "model.layers.4.attn.v_proj.MatMulNBits.qzeros.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        73728
      ],
      "size_in_bytes": 73728,
      "op_tensor_size": 73728,
      "offset": 399167488,
      "file_name": ".cache\\MatMulNBits_2_0_111.const",
      "file_size": 73728
    },
    "model.layers.4.attn.o_proj.MatMulNBits.qweight.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        3072,
        3072
      ],
      "size_in_bytes": 9437184,
      "op_tensor_size": 9437184,
      "offset": 399241216,
      "file_name": ".cache\\MatMulNBits_2_0_112.const",
      "file_size": 9437184
    },
    "model.layers.4.attn.o_proj.MatMulNBits.bias.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        3072
      ],
      "size_in_bytes": 12288,
      "op_tensor_size": 12288,
      "offset": 408678400,
      "file_name": ".cache\\MatMulNBits_2_0_113.const",
      "file_size": 12288
    },
    "model.layers.4.attn.o_proj.MatMulNBits.scales.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        73728
      ],
      "size_in_bytes": 294912,
      "op_tensor_size": 294912,
      "offset": 408690688,
      "file_name": ".cache\\MatMulNBits_2_0_114.const",
      "file_size": 294912
    },
    "model.layers.4.attn.o_proj.MatMulNBits.qzeros.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        73728
      ],
      "size_in_bytes": 73728,
      "op_tensor_size": 73728,
      "offset": 408985600,
      "file_name": ".cache\\MatMulNBits_2_0_115.const",
      "file_size": 73728
    },
    "model.layers.4.post_attention_layernorm.weight.bf": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "bfloat16",
      "shape": [
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 409059328,
      "file_name": ".cache\\MatMulNBits_2_0_116.const",
      "file_size": 6144
    },
    "model.layers.4.mlp.gate_proj.MatMulNBits.qweight": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "uint8",
      "shape": [
        8192,
        24,
        64
      ],
      "size_in_bytes": 12582912,
      "op_tensor_size": 12582912,
      "offset": 409065472,
      "file_name": ".cache\\MatMulNBits_2_0_117.const",
      "file_size": 12582912
    },
    "model.layers.4.mlp.gate_proj.MatMulNBits.scales.f": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        196608
      ],
      "size_in_bytes": 786432,
      "op_tensor_size": 786432,
      "offset": 421648384,
      "file_name": ".cache\\MatMulNBits_2_0_118.const",
      "file_size": 786432
    },
    "model.layers.4.mlp.gate_proj.MatMulNBits.qzeros": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "uint8",
      "shape": [
        98304
      ],
      "size_in_bytes": 98304,
      "op_tensor_size": 98304,
      "offset": 422434816,
      "file_name": ".cache\\MatMulNBits_2_0_119.const",
      "file_size": 98304
    },
    "model.layers.4.mlp.gate_proj.MatMulNBits.bias.f": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        8192
      ],
      "size_in_bytes": 32768,
      "op_tensor_size": 32768,
      "offset": 422533120,
      "file_name": ".cache\\MatMulNBits_2_0_120.const",
      "file_size": 32768
    },
    "model.layers.4.mlp.up_proj.MatMulNBits.qweight": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "uint8",
      "shape": [
        8192,
        24,
        64
      ],
      "size_in_bytes": 12582912,
      "op_tensor_size": 12582912,
      "offset": 422565888,
      "file_name": ".cache\\MatMulNBits_2_0_121.const",
      "file_size": 12582912
    },
    "model.layers.4.mlp.up_proj.MatMulNBits.scales.f": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        196608
      ],
      "size_in_bytes": 786432,
      "op_tensor_size": 786432,
      "offset": 435148800,
      "file_name": ".cache\\MatMulNBits_2_0_122.const",
      "file_size": 786432
    },
    "model.layers.4.mlp.up_proj.MatMulNBits.qzeros": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "uint8",
      "shape": [
        98304
      ],
      "size_in_bytes": 98304,
      "op_tensor_size": 98304,
      "offset": 435935232,
      "file_name": ".cache\\MatMulNBits_2_0_123.const",
      "file_size": 98304
    },
    "model.layers.4.mlp.up_proj.MatMulNBits.bias.f": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        8192
      ],
      "size_in_bytes": 32768,
      "op_tensor_size": 32768,
      "offset": 436033536,
      "file_name": ".cache\\MatMulNBits_2_0_124.const",
      "file_size": 32768
    },
    "model.layers.4.mlp.down_proj.MatMulNBits.qweight.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        8192,
        3072
      ],
      "size_in_bytes": 25165824,
      "op_tensor_size": 25165824,
      "offset": 436066304,
      "file_name": ".cache\\MatMulNBits_2_0_125.const",
      "file_size": 25165824
    },
    "model.layers.4.mlp.down_proj.MatMulNBits.bias.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        3072
      ],
      "size_in_bytes": 12288,
      "op_tensor_size": 12288,
      "offset": 461232128,
      "file_name": ".cache\\MatMulNBits_2_0_126.const",
      "file_size": 12288
    },
    "model.layers.4.mlp.down_proj.MatMulNBits.scales.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        196608
      ],
      "size_in_bytes": 786432,
      "op_tensor_size": 786432,
      "offset": 461244416,
      "file_name": ".cache\\MatMulNBits_2_0_127.const",
      "file_size": 786432
    },
    "model.layers.4.mlp.down_proj.MatMulNBits.qzeros.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        196608
      ],
      "size_in_bytes": 196608,
      "op_tensor_size": 196608,
      "offset": 462030848,
      "file_name": ".cache\\MatMulNBits_2_0_128.const",
      "file_size": 196608
    },
    "model.layers.5.input_layernorm.weight.bf": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "bfloat16",
      "shape": [
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 462227456,
      "file_name": ".cache\\MatMulNBits_2_0_129.const",
      "file_size": 6144
    },
    "model.layers.5.attn.qk_proj.MatMulNBits.qweight.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        3072,
        6144
      ],
      "size_in_bytes": 18874368,
      "op_tensor_size": 18874368,
      "offset": 462233600,
      "file_name": ".cache\\MatMulNBits_2_0_130.const",
      "file_size": 18874368
    },
    "model.layers.5.attn.qk_proj.MatMulNBits.bias.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        6144
      ],
      "size_in_bytes": 24576,
      "op_tensor_size": 24576,
      "offset": 481107968,
      "file_name": ".cache\\MatMulNBits_2_0_131.const",
      "file_size": 24576
    },
    "model.layers.5.attn.qk_proj.MatMulNBits.scales.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        147456
      ],
      "size_in_bytes": 589824,
      "op_tensor_size": 589824,
      "offset": 481132544,
      "file_name": ".cache\\MatMulNBits_2_0_132.const",
      "file_size": 589824
    },
    "model.layers.5.attn.qk_proj.MatMulNBits.qzeros.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        147456
      ],
      "size_in_bytes": 147456,
      "op_tensor_size": 147456,
      "offset": 481722368,
      "file_name": ".cache\\MatMulNBits_2_0_133.const",
      "file_size": 147456
    },
    "model.layers.5.attn.v_proj.MatMulNBits.qweight.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        3072,
        3072
      ],
      "size_in_bytes": 9437184,
      "op_tensor_size": 9437184,
      "offset": 481869824,
      "file_name": ".cache\\MatMulNBits_2_0_134.const",
      "file_size": 9437184
    },
    "model.layers.5.attn.v_proj.MatMulNBits.bias.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        3072
      ],
      "size_in_bytes": 12288,
      "op_tensor_size": 12288,
      "offset": 491307008,
      "file_name": ".cache\\MatMulNBits_2_0_135.const",
      "file_size": 12288
    },
    "model.layers.5.attn.v_proj.MatMulNBits.scales.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        73728
      ],
      "size_in_bytes": 294912,
      "op_tensor_size": 294912,
      "offset": 491319296,
      "file_name": ".cache\\MatMulNBits_2_0_136.const",
      "file_size": 294912
    },
    "model.layers.5.attn.v_proj.MatMulNBits.qzeros.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        73728
      ],
      "size_in_bytes": 73728,
      "op_tensor_size": 73728,
      "offset": 491614208,
      "file_name": ".cache\\MatMulNBits_2_0_137.const",
      "file_size": 73728
    },
    "model.layers.5.attn.o_proj.MatMulNBits.qweight.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        3072,
        3072
      ],
      "size_in_bytes": 9437184,
      "op_tensor_size": 9437184,
      "offset": 491687936,
      "file_name": ".cache\\MatMulNBits_2_0_138.const",
      "file_size": 9437184
    },
    "model.layers.5.attn.o_proj.MatMulNBits.bias.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        3072
      ],
      "size_in_bytes": 12288,
      "op_tensor_size": 12288,
      "offset": 501125120,
      "file_name": ".cache\\MatMulNBits_2_0_139.const",
      "file_size": 12288
    },
    "model.layers.5.attn.o_proj.MatMulNBits.scales.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        73728
      ],
      "size_in_bytes": 294912,
      "op_tensor_size": 294912,
      "offset": 501137408,
      "file_name": ".cache\\MatMulNBits_2_0_140.const",
      "file_size": 294912
    },
    "model.layers.5.attn.o_proj.MatMulNBits.qzeros.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        73728
      ],
      "size_in_bytes": 73728,
      "op_tensor_size": 73728,
      "offset": 501432320,
      "file_name": ".cache\\MatMulNBits_2_0_141.const",
      "file_size": 73728
    },
    "model.layers.5.post_attention_layernorm.weight.bf": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "bfloat16",
      "shape": [
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 501506048,
      "file_name": ".cache\\MatMulNBits_2_0_142.const",
      "file_size": 6144
    },
    "model.layers.5.mlp.gate_proj.MatMulNBits.qweight": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "uint8",
      "shape": [
        8192,
        24,
        64
      ],
      "size_in_bytes": 12582912,
      "op_tensor_size": 12582912,
      "offset": 501512192,
      "file_name": ".cache\\MatMulNBits_2_0_143.const",
      "file_size": 12582912
    },
    "model.layers.5.mlp.gate_proj.MatMulNBits.scales.f": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        196608
      ],
      "size_in_bytes": 786432,
      "op_tensor_size": 786432,
      "offset": 514095104,
      "file_name": ".cache\\MatMulNBits_2_0_144.const",
      "file_size": 786432
    },
    "model.layers.5.mlp.gate_proj.MatMulNBits.qzeros": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "uint8",
      "shape": [
        98304
      ],
      "size_in_bytes": 98304,
      "op_tensor_size": 98304,
      "offset": 514881536,
      "file_name": ".cache\\MatMulNBits_2_0_145.const",
      "file_size": 98304
    },
    "model.layers.5.mlp.gate_proj.MatMulNBits.bias.f": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        8192
      ],
      "size_in_bytes": 32768,
      "op_tensor_size": 32768,
      "offset": 514979840,
      "file_name": ".cache\\MatMulNBits_2_0_146.const",
      "file_size": 32768
    },
    "model.layers.5.mlp.up_proj.MatMulNBits.qweight": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "uint8",
      "shape": [
        8192,
        24,
        64
      ],
      "size_in_bytes": 12582912,
      "op_tensor_size": 12582912,
      "offset": 515012608,
      "file_name": ".cache\\MatMulNBits_2_0_147.const",
      "file_size": 12582912
    },
    "model.layers.5.mlp.up_proj.MatMulNBits.scales.f": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        196608
      ],
      "size_in_bytes": 786432,
      "op_tensor_size": 786432,
      "offset": 527595520,
      "file_name": ".cache\\MatMulNBits_2_0_148.const",
      "file_size": 786432
    },
    "model.layers.5.mlp.up_proj.MatMulNBits.qzeros": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "uint8",
      "shape": [
        98304
      ],
      "size_in_bytes": 98304,
      "op_tensor_size": 98304,
      "offset": 528381952,
      "file_name": ".cache\\MatMulNBits_2_0_149.const",
      "file_size": 98304
    },
    "model.layers.5.mlp.up_proj.MatMulNBits.bias.f": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        8192
      ],
      "size_in_bytes": 32768,
      "op_tensor_size": 32768,
      "offset": 528480256,
      "file_name": ".cache\\MatMulNBits_2_0_150.const",
      "file_size": 32768
    },
    "model.layers.5.mlp.down_proj.MatMulNBits.qweight.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        8192,
        3072
      ],
      "size_in_bytes": 25165824,
      "op_tensor_size": 25165824,
      "offset": 528513024,
      "file_name": ".cache\\MatMulNBits_2_0_151.const",
      "file_size": 25165824
    },
    "model.layers.5.mlp.down_proj.MatMulNBits.bias.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        3072
      ],
      "size_in_bytes": 12288,
      "op_tensor_size": 12288,
      "offset": 553678848,
      "file_name": ".cache\\MatMulNBits_2_0_152.const",
      "file_size": 12288
    },
    "model.layers.5.mlp.down_proj.MatMulNBits.scales.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        196608
      ],
      "size_in_bytes": 786432,
      "op_tensor_size": 786432,
      "offset": 553691136,
      "file_name": ".cache\\MatMulNBits_2_0_153.const",
      "file_size": 786432
    },
    "model.layers.5.mlp.down_proj.MatMulNBits.qzeros.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        196608
      ],
      "size_in_bytes": 196608,
      "op_tensor_size": 196608,
      "offset": 554477568,
      "file_name": ".cache\\MatMulNBits_2_0_154.const",
      "file_size": 196608
    },
    "model.layers.6.input_layernorm.weight.bf": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "bfloat16",
      "shape": [
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 554674176,
      "file_name": ".cache\\MatMulNBits_2_0_155.const",
      "file_size": 6144
    },
    "model.layers.6.attn.qk_proj.MatMulNBits.qweight.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        3072,
        6144
      ],
      "size_in_bytes": 18874368,
      "op_tensor_size": 18874368,
      "offset": 554680320,
      "file_name": ".cache\\MatMulNBits_2_0_156.const",
      "file_size": 18874368
    },
    "model.layers.6.attn.qk_proj.MatMulNBits.bias.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        6144
      ],
      "size_in_bytes": 24576,
      "op_tensor_size": 24576,
      "offset": 573554688,
      "file_name": ".cache\\MatMulNBits_2_0_157.const",
      "file_size": 24576
    },
    "model.layers.6.attn.qk_proj.MatMulNBits.scales.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        147456
      ],
      "size_in_bytes": 589824,
      "op_tensor_size": 589824,
      "offset": 573579264,
      "file_name": ".cache\\MatMulNBits_2_0_158.const",
      "file_size": 589824
    },
    "model.layers.6.attn.qk_proj.MatMulNBits.qzeros.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        147456
      ],
      "size_in_bytes": 147456,
      "op_tensor_size": 147456,
      "offset": 574169088,
      "file_name": ".cache\\MatMulNBits_2_0_159.const",
      "file_size": 147456
    },
    "model.layers.6.attn.v_proj.MatMulNBits.qweight.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        3072,
        3072
      ],
      "size_in_bytes": 9437184,
      "op_tensor_size": 9437184,
      "offset": 574316544,
      "file_name": ".cache\\MatMulNBits_2_0_160.const",
      "file_size": 9437184
    },
    "model.layers.6.attn.v_proj.MatMulNBits.bias.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        3072
      ],
      "size_in_bytes": 12288,
      "op_tensor_size": 12288,
      "offset": 583753728,
      "file_name": ".cache\\MatMulNBits_2_0_161.const",
      "file_size": 12288
    },
    "model.layers.6.attn.v_proj.MatMulNBits.scales.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        73728
      ],
      "size_in_bytes": 294912,
      "op_tensor_size": 294912,
      "offset": 583766016,
      "file_name": ".cache\\MatMulNBits_2_0_162.const",
      "file_size": 294912
    },
    "model.layers.6.attn.v_proj.MatMulNBits.qzeros.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        73728
      ],
      "size_in_bytes": 73728,
      "op_tensor_size": 73728,
      "offset": 584060928,
      "file_name": ".cache\\MatMulNBits_2_0_163.const",
      "file_size": 73728
    },
    "model.layers.6.attn.o_proj.MatMulNBits.qweight.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        3072,
        3072
      ],
      "size_in_bytes": 9437184,
      "op_tensor_size": 9437184,
      "offset": 584134656,
      "file_name": ".cache\\MatMulNBits_2_0_164.const",
      "file_size": 9437184
    },
    "model.layers.6.attn.o_proj.MatMulNBits.bias.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        3072
      ],
      "size_in_bytes": 12288,
      "op_tensor_size": 12288,
      "offset": 593571840,
      "file_name": ".cache\\MatMulNBits_2_0_165.const",
      "file_size": 12288
    },
    "model.layers.6.attn.o_proj.MatMulNBits.scales.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        73728
      ],
      "size_in_bytes": 294912,
      "op_tensor_size": 294912,
      "offset": 593584128,
      "file_name": ".cache\\MatMulNBits_2_0_166.const",
      "file_size": 294912
    },
    "model.layers.6.attn.o_proj.MatMulNBits.qzeros.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        73728
      ],
      "size_in_bytes": 73728,
      "op_tensor_size": 73728,
      "offset": 593879040,
      "file_name": ".cache\\MatMulNBits_2_0_167.const",
      "file_size": 73728
    },
    "model.layers.6.post_attention_layernorm.weight.bf": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "bfloat16",
      "shape": [
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 593952768,
      "file_name": ".cache\\MatMulNBits_2_0_168.const",
      "file_size": 6144
    },
    "model.layers.6.mlp.gate_proj.MatMulNBits.qweight": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "uint8",
      "shape": [
        8192,
        24,
        64
      ],
      "size_in_bytes": 12582912,
      "op_tensor_size": 12582912,
      "offset": 593958912,
      "file_name": ".cache\\MatMulNBits_2_0_169.const",
      "file_size": 12582912
    },
    "model.layers.6.mlp.gate_proj.MatMulNBits.scales.f": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        196608
      ],
      "size_in_bytes": 786432,
      "op_tensor_size": 786432,
      "offset": 606541824,
      "file_name": ".cache\\MatMulNBits_2_0_170.const",
      "file_size": 786432
    },
    "model.layers.6.mlp.gate_proj.MatMulNBits.qzeros": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "uint8",
      "shape": [
        98304
      ],
      "size_in_bytes": 98304,
      "op_tensor_size": 98304,
      "offset": 607328256,
      "file_name": ".cache\\MatMulNBits_2_0_171.const",
      "file_size": 98304
    },
    "model.layers.6.mlp.gate_proj.MatMulNBits.bias.f": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        8192
      ],
      "size_in_bytes": 32768,
      "op_tensor_size": 32768,
      "offset": 607426560,
      "file_name": ".cache\\MatMulNBits_2_0_172.const",
      "file_size": 32768
    },
    "model.layers.6.mlp.up_proj.MatMulNBits.qweight": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "uint8",
      "shape": [
        8192,
        24,
        64
      ],
      "size_in_bytes": 12582912,
      "op_tensor_size": 12582912,
      "offset": 607459328,
      "file_name": ".cache\\MatMulNBits_2_0_173.const",
      "file_size": 12582912
    },
    "model.layers.6.mlp.up_proj.MatMulNBits.scales.f": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        196608
      ],
      "size_in_bytes": 786432,
      "op_tensor_size": 786432,
      "offset": 620042240,
      "file_name": ".cache\\MatMulNBits_2_0_174.const",
      "file_size": 786432
    },
    "model.layers.6.mlp.up_proj.MatMulNBits.qzeros": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "uint8",
      "shape": [
        98304
      ],
      "size_in_bytes": 98304,
      "op_tensor_size": 98304,
      "offset": 620828672,
      "file_name": ".cache\\MatMulNBits_2_0_175.const",
      "file_size": 98304
    },
    "model.layers.6.mlp.up_proj.MatMulNBits.bias.f": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        8192
      ],
      "size_in_bytes": 32768,
      "op_tensor_size": 32768,
      "offset": 620926976,
      "file_name": ".cache\\MatMulNBits_2_0_176.const",
      "file_size": 32768
    },
    "model.layers.6.mlp.down_proj.MatMulNBits.qweight.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        8192,
        3072
      ],
      "size_in_bytes": 25165824,
      "op_tensor_size": 25165824,
      "offset": 620959744,
      "file_name": ".cache\\MatMulNBits_2_0_177.const",
      "file_size": 25165824
    },
    "model.layers.6.mlp.down_proj.MatMulNBits.bias.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        3072
      ],
      "size_in_bytes": 12288,
      "op_tensor_size": 12288,
      "offset": 646125568,
      "file_name": ".cache\\MatMulNBits_2_0_178.const",
      "file_size": 12288
    },
    "model.layers.6.mlp.down_proj.MatMulNBits.scales.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        196608
      ],
      "size_in_bytes": 786432,
      "op_tensor_size": 786432,
      "offset": 646137856,
      "file_name": ".cache\\MatMulNBits_2_0_179.const",
      "file_size": 786432
    },
    "model.layers.6.mlp.down_proj.MatMulNBits.qzeros.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        196608
      ],
      "size_in_bytes": 196608,
      "op_tensor_size": 196608,
      "offset": 646924288,
      "file_name": ".cache\\MatMulNBits_2_0_180.const",
      "file_size": 196608
    },
    "model.layers.7.input_layernorm.weight.bf": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "bfloat16",
      "shape": [
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 647120896,
      "file_name": ".cache\\MatMulNBits_2_0_181.const",
      "file_size": 6144
    },
    "model.layers.7.attn.qk_proj.MatMulNBits.qweight.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        3072,
        6144
      ],
      "size_in_bytes": 18874368,
      "op_tensor_size": 18874368,
      "offset": 647127040,
      "file_name": ".cache\\MatMulNBits_2_0_182.const",
      "file_size": 18874368
    },
    "model.layers.7.attn.qk_proj.MatMulNBits.bias.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        6144
      ],
      "size_in_bytes": 24576,
      "op_tensor_size": 24576,
      "offset": 666001408,
      "file_name": ".cache\\MatMulNBits_2_0_183.const",
      "file_size": 24576
    },
    "model.layers.7.attn.qk_proj.MatMulNBits.scales.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        147456
      ],
      "size_in_bytes": 589824,
      "op_tensor_size": 589824,
      "offset": 666025984,
      "file_name": ".cache\\MatMulNBits_2_0_184.const",
      "file_size": 589824
    },
    "model.layers.7.attn.qk_proj.MatMulNBits.qzeros.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        147456
      ],
      "size_in_bytes": 147456,
      "op_tensor_size": 147456,
      "offset": 666615808,
      "file_name": ".cache\\MatMulNBits_2_0_185.const",
      "file_size": 147456
    },
    "model.layers.7.attn.v_proj.MatMulNBits.qweight.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        3072,
        3072
      ],
      "size_in_bytes": 9437184,
      "op_tensor_size": 9437184,
      "offset": 666763264,
      "file_name": ".cache\\MatMulNBits_2_0_186.const",
      "file_size": 9437184
    },
    "model.layers.7.attn.v_proj.MatMulNBits.bias.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        3072
      ],
      "size_in_bytes": 12288,
      "op_tensor_size": 12288,
      "offset": 676200448,
      "file_name": ".cache\\MatMulNBits_2_0_187.const",
      "file_size": 12288
    },
    "model.layers.7.attn.v_proj.MatMulNBits.scales.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        73728
      ],
      "size_in_bytes": 294912,
      "op_tensor_size": 294912,
      "offset": 676212736,
      "file_name": ".cache\\MatMulNBits_2_0_188.const",
      "file_size": 294912
    },
    "model.layers.7.attn.v_proj.MatMulNBits.qzeros.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        73728
      ],
      "size_in_bytes": 73728,
      "op_tensor_size": 73728,
      "offset": 676507648,
      "file_name": ".cache\\MatMulNBits_2_0_189.const",
      "file_size": 73728
    },
    "model.layers.7.attn.o_proj.MatMulNBits.qweight.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        3072,
        3072
      ],
      "size_in_bytes": 9437184,
      "op_tensor_size": 9437184,
      "offset": 676581376,
      "file_name": ".cache\\MatMulNBits_2_0_190.const",
      "file_size": 9437184
    },
    "model.layers.7.attn.o_proj.MatMulNBits.bias.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        3072
      ],
      "size_in_bytes": 12288,
      "op_tensor_size": 12288,
      "offset": 686018560,
      "file_name": ".cache\\MatMulNBits_2_0_191.const",
      "file_size": 12288
    },
    "model.layers.7.attn.o_proj.MatMulNBits.scales.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        73728
      ],
      "size_in_bytes": 294912,
      "op_tensor_size": 294912,
      "offset": 686030848,
      "file_name": ".cache\\MatMulNBits_2_0_192.const",
      "file_size": 294912
    },
    "model.layers.7.attn.o_proj.MatMulNBits.qzeros.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        73728
      ],
      "size_in_bytes": 73728,
      "op_tensor_size": 73728,
      "offset": 686325760,
      "file_name": ".cache\\MatMulNBits_2_0_193.const",
      "file_size": 73728
    },
    "model.layers.7.post_attention_layernorm.weight.bf": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "bfloat16",
      "shape": [
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 686399488,
      "file_name": ".cache\\MatMulNBits_2_0_194.const",
      "file_size": 6144
    },
    "model.layers.7.mlp.gate_proj.MatMulNBits.qweight": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "uint8",
      "shape": [
        8192,
        24,
        64
      ],
      "size_in_bytes": 12582912,
      "op_tensor_size": 12582912,
      "offset": 686405632,
      "file_name": ".cache\\MatMulNBits_2_0_195.const",
      "file_size": 12582912
    },
    "model.layers.7.mlp.gate_proj.MatMulNBits.scales.f": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        196608
      ],
      "size_in_bytes": 786432,
      "op_tensor_size": 786432,
      "offset": 698988544,
      "file_name": ".cache\\MatMulNBits_2_0_196.const",
      "file_size": 786432
    },
    "model.layers.7.mlp.gate_proj.MatMulNBits.qzeros": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "uint8",
      "shape": [
        98304
      ],
      "size_in_bytes": 98304,
      "op_tensor_size": 98304,
      "offset": 699774976,
      "file_name": ".cache\\MatMulNBits_2_0_197.const",
      "file_size": 98304
    },
    "model.layers.7.mlp.gate_proj.MatMulNBits.bias.f": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        8192
      ],
      "size_in_bytes": 32768,
      "op_tensor_size": 32768,
      "offset": 699873280,
      "file_name": ".cache\\MatMulNBits_2_0_198.const",
      "file_size": 32768
    },
    "model.layers.7.mlp.up_proj.MatMulNBits.qweight": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "uint8",
      "shape": [
        8192,
        24,
        64
      ],
      "size_in_bytes": 12582912,
      "op_tensor_size": 12582912,
      "offset": 699906048,
      "file_name": ".cache\\MatMulNBits_2_0_199.const",
      "file_size": 12582912
    },
    "model.layers.7.mlp.up_proj.MatMulNBits.scales.f": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        196608
      ],
      "size_in_bytes": 786432,
      "op_tensor_size": 786432,
      "offset": 712488960,
      "file_name": ".cache\\MatMulNBits_2_0_200.const",
      "file_size": 786432
    },
    "model.layers.7.mlp.up_proj.MatMulNBits.qzeros": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "uint8",
      "shape": [
        98304
      ],
      "size_in_bytes": 98304,
      "op_tensor_size": 98304,
      "offset": 713275392,
      "file_name": ".cache\\MatMulNBits_2_0_201.const",
      "file_size": 98304
    },
    "model.layers.7.mlp.up_proj.MatMulNBits.bias.f": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        8192
      ],
      "size_in_bytes": 32768,
      "op_tensor_size": 32768,
      "offset": 713373696,
      "file_name": ".cache\\MatMulNBits_2_0_202.const",
      "file_size": 32768
    },
    "model.layers.7.mlp.down_proj.MatMulNBits.qweight.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        8192,
        3072
      ],
      "size_in_bytes": 25165824,
      "op_tensor_size": 25165824,
      "offset": 713406464,
      "file_name": ".cache\\MatMulNBits_2_0_203.const",
      "file_size": 25165824
    },
    "model.layers.7.mlp.down_proj.MatMulNBits.bias.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        3072
      ],
      "size_in_bytes": 12288,
      "op_tensor_size": 12288,
      "offset": 738572288,
      "file_name": ".cache\\MatMulNBits_2_0_204.const",
      "file_size": 12288
    },
    "model.layers.7.mlp.down_proj.MatMulNBits.scales.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        196608
      ],
      "size_in_bytes": 786432,
      "op_tensor_size": 786432,
      "offset": 738584576,
      "file_name": ".cache\\MatMulNBits_2_0_205.const",
      "file_size": 786432
    },
    "model.layers.7.mlp.down_proj.MatMulNBits.qzeros.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        196608
      ],
      "size_in_bytes": 196608,
      "op_tensor_size": 196608,
      "offset": 739371008,
      "file_name": ".cache\\MatMulNBits_2_0_206.const",
      "file_size": 196608
    },
    "model.layers.8.input_layernorm.weight.bf": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "bfloat16",
      "shape": [
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 739567616,
      "file_name": ".cache\\MatMulNBits_2_0_207.const",
      "file_size": 6144
    },
    "model.layers.8.attn.qk_proj.MatMulNBits.qweight.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        3072,
        6144
      ],
      "size_in_bytes": 18874368,
      "op_tensor_size": 18874368,
      "offset": 739573760,
      "file_name": ".cache\\MatMulNBits_2_0_208.const",
      "file_size": 18874368
    },
    "model.layers.8.attn.qk_proj.MatMulNBits.bias.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        6144
      ],
      "size_in_bytes": 24576,
      "op_tensor_size": 24576,
      "offset": 758448128,
      "file_name": ".cache\\MatMulNBits_2_0_209.const",
      "file_size": 24576
    },
    "model.layers.8.attn.qk_proj.MatMulNBits.scales.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        147456
      ],
      "size_in_bytes": 589824,
      "op_tensor_size": 589824,
      "offset": 758472704,
      "file_name": ".cache\\MatMulNBits_2_0_210.const",
      "file_size": 589824
    },
    "model.layers.8.attn.qk_proj.MatMulNBits.qzeros.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        147456
      ],
      "size_in_bytes": 147456,
      "op_tensor_size": 147456,
      "offset": 759062528,
      "file_name": ".cache\\MatMulNBits_2_0_211.const",
      "file_size": 147456
    },
    "model.layers.8.attn.v_proj.MatMulNBits.qweight.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        3072,
        3072
      ],
      "size_in_bytes": 9437184,
      "op_tensor_size": 9437184,
      "offset": 759209984,
      "file_name": ".cache\\MatMulNBits_2_0_212.const",
      "file_size": 9437184
    },
    "model.layers.8.attn.v_proj.MatMulNBits.bias.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        3072
      ],
      "size_in_bytes": 12288,
      "op_tensor_size": 12288,
      "offset": 768647168,
      "file_name": ".cache\\MatMulNBits_2_0_213.const",
      "file_size": 12288
    },
    "model.layers.8.attn.v_proj.MatMulNBits.scales.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        73728
      ],
      "size_in_bytes": 294912,
      "op_tensor_size": 294912,
      "offset": 768659456,
      "file_name": ".cache\\MatMulNBits_2_0_214.const",
      "file_size": 294912
    },
    "model.layers.8.attn.v_proj.MatMulNBits.qzeros.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        73728
      ],
      "size_in_bytes": 73728,
      "op_tensor_size": 73728,
      "offset": 768954368,
      "file_name": ".cache\\MatMulNBits_2_0_215.const",
      "file_size": 73728
    },
    "model.layers.8.attn.o_proj.MatMulNBits.qweight.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        3072,
        3072
      ],
      "size_in_bytes": 9437184,
      "op_tensor_size": 9437184,
      "offset": 769028096,
      "file_name": ".cache\\MatMulNBits_2_0_216.const",
      "file_size": 9437184
    },
    "model.layers.8.attn.o_proj.MatMulNBits.bias.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        3072
      ],
      "size_in_bytes": 12288,
      "op_tensor_size": 12288,
      "offset": 778465280,
      "file_name": ".cache\\MatMulNBits_2_0_217.const",
      "file_size": 12288
    },
    "model.layers.8.attn.o_proj.MatMulNBits.scales.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        73728
      ],
      "size_in_bytes": 294912,
      "op_tensor_size": 294912,
      "offset": 778477568,
      "file_name": ".cache\\MatMulNBits_2_0_218.const",
      "file_size": 294912
    },
    "model.layers.8.attn.o_proj.MatMulNBits.qzeros.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        73728
      ],
      "size_in_bytes": 73728,
      "op_tensor_size": 73728,
      "offset": 778772480,
      "file_name": ".cache\\MatMulNBits_2_0_219.const",
      "file_size": 73728
    },
    "model.layers.8.post_attention_layernorm.weight.bf": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "bfloat16",
      "shape": [
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 778846208,
      "file_name": ".cache\\MatMulNBits_2_0_220.const",
      "file_size": 6144
    },
    "model.layers.8.mlp.gate_proj.MatMulNBits.qweight": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "uint8",
      "shape": [
        8192,
        24,
        64
      ],
      "size_in_bytes": 12582912,
      "op_tensor_size": 12582912,
      "offset": 778852352,
      "file_name": ".cache\\MatMulNBits_2_0_221.const",
      "file_size": 12582912
    },
    "model.layers.8.mlp.gate_proj.MatMulNBits.scales.f": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        196608
      ],
      "size_in_bytes": 786432,
      "op_tensor_size": 786432,
      "offset": 791435264,
      "file_name": ".cache\\MatMulNBits_2_0_222.const",
      "file_size": 786432
    },
    "model.layers.8.mlp.gate_proj.MatMulNBits.qzeros": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "uint8",
      "shape": [
        98304
      ],
      "size_in_bytes": 98304,
      "op_tensor_size": 98304,
      "offset": 792221696,
      "file_name": ".cache\\MatMulNBits_2_0_223.const",
      "file_size": 98304
    },
    "model.layers.8.mlp.gate_proj.MatMulNBits.bias.f": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        8192
      ],
      "size_in_bytes": 32768,
      "op_tensor_size": 32768,
      "offset": 792320000,
      "file_name": ".cache\\MatMulNBits_2_0_224.const",
      "file_size": 32768
    },
    "model.layers.8.mlp.up_proj.MatMulNBits.qweight": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "uint8",
      "shape": [
        8192,
        24,
        64
      ],
      "size_in_bytes": 12582912,
      "op_tensor_size": 12582912,
      "offset": 792352768,
      "file_name": ".cache\\MatMulNBits_2_0_225.const",
      "file_size": 12582912
    },
    "model.layers.8.mlp.up_proj.MatMulNBits.scales.f": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        196608
      ],
      "size_in_bytes": 786432,
      "op_tensor_size": 786432,
      "offset": 804935680,
      "file_name": ".cache\\MatMulNBits_2_0_226.const",
      "file_size": 786432
    },
    "model.layers.8.mlp.up_proj.MatMulNBits.qzeros": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "uint8",
      "shape": [
        98304
      ],
      "size_in_bytes": 98304,
      "op_tensor_size": 98304,
      "offset": 805722112,
      "file_name": ".cache\\MatMulNBits_2_0_227.const",
      "file_size": 98304
    },
    "model.layers.8.mlp.up_proj.MatMulNBits.bias.f": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        8192
      ],
      "size_in_bytes": 32768,
      "op_tensor_size": 32768,
      "offset": 805820416,
      "file_name": ".cache\\MatMulNBits_2_0_228.const",
      "file_size": 32768
    },
    "model.layers.8.mlp.down_proj.MatMulNBits.qweight.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        8192,
        3072
      ],
      "size_in_bytes": 25165824,
      "op_tensor_size": 25165824,
      "offset": 805853184,
      "file_name": ".cache\\MatMulNBits_2_0_229.const",
      "file_size": 25165824
    },
    "model.layers.8.mlp.down_proj.MatMulNBits.bias.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        3072
      ],
      "size_in_bytes": 12288,
      "op_tensor_size": 12288,
      "offset": 831019008,
      "file_name": ".cache\\MatMulNBits_2_0_230.const",
      "file_size": 12288
    },
    "model.layers.8.mlp.down_proj.MatMulNBits.scales.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        196608
      ],
      "size_in_bytes": 786432,
      "op_tensor_size": 786432,
      "offset": 831031296,
      "file_name": ".cache\\MatMulNBits_2_0_231.const",
      "file_size": 786432
    },
    "model.layers.8.mlp.down_proj.MatMulNBits.qzeros.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        196608
      ],
      "size_in_bytes": 196608,
      "op_tensor_size": 196608,
      "offset": 831817728,
      "file_name": ".cache\\MatMulNBits_2_0_232.const",
      "file_size": 196608
    },
    "model.layers.9.input_layernorm.weight.bf": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "bfloat16",
      "shape": [
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 832014336,
      "file_name": ".cache\\MatMulNBits_2_0_233.const",
      "file_size": 6144
    },
    "model.layers.9.attn.qk_proj.MatMulNBits.qweight.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        3072,
        6144
      ],
      "size_in_bytes": 18874368,
      "op_tensor_size": 18874368,
      "offset": 832020480,
      "file_name": ".cache\\MatMulNBits_2_0_234.const",
      "file_size": 18874368
    },
    "model.layers.9.attn.qk_proj.MatMulNBits.bias.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        6144
      ],
      "size_in_bytes": 24576,
      "op_tensor_size": 24576,
      "offset": 850894848,
      "file_name": ".cache\\MatMulNBits_2_0_235.const",
      "file_size": 24576
    },
    "model.layers.9.attn.qk_proj.MatMulNBits.scales.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        147456
      ],
      "size_in_bytes": 589824,
      "op_tensor_size": 589824,
      "offset": 850919424,
      "file_name": ".cache\\MatMulNBits_2_0_236.const",
      "file_size": 589824
    },
    "model.layers.9.attn.qk_proj.MatMulNBits.qzeros.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        147456
      ],
      "size_in_bytes": 147456,
      "op_tensor_size": 147456,
      "offset": 851509248,
      "file_name": ".cache\\MatMulNBits_2_0_237.const",
      "file_size": 147456
    },
    "model.layers.9.attn.v_proj.MatMulNBits.qweight.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        3072,
        3072
      ],
      "size_in_bytes": 9437184,
      "op_tensor_size": 9437184,
      "offset": 851656704,
      "file_name": ".cache\\MatMulNBits_2_0_238.const",
      "file_size": 9437184
    },
    "model.layers.9.attn.v_proj.MatMulNBits.bias.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        3072
      ],
      "size_in_bytes": 12288,
      "op_tensor_size": 12288,
      "offset": 861093888,
      "file_name": ".cache\\MatMulNBits_2_0_239.const",
      "file_size": 12288
    },
    "model.layers.9.attn.v_proj.MatMulNBits.scales.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        73728
      ],
      "size_in_bytes": 294912,
      "op_tensor_size": 294912,
      "offset": 861106176,
      "file_name": ".cache\\MatMulNBits_2_0_240.const",
      "file_size": 294912
    },
    "model.layers.9.attn.v_proj.MatMulNBits.qzeros.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        73728
      ],
      "size_in_bytes": 73728,
      "op_tensor_size": 73728,
      "offset": 861401088,
      "file_name": ".cache\\MatMulNBits_2_0_241.const",
      "file_size": 73728
    },
    "model.layers.9.attn.o_proj.MatMulNBits.qweight.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        3072,
        3072
      ],
      "size_in_bytes": 9437184,
      "op_tensor_size": 9437184,
      "offset": 861474816,
      "file_name": ".cache\\MatMulNBits_2_0_242.const",
      "file_size": 9437184
    },
    "model.layers.9.attn.o_proj.MatMulNBits.bias.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        3072
      ],
      "size_in_bytes": 12288,
      "op_tensor_size": 12288,
      "offset": 870912000,
      "file_name": ".cache\\MatMulNBits_2_0_243.const",
      "file_size": 12288
    },
    "model.layers.9.attn.o_proj.MatMulNBits.scales.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        73728
      ],
      "size_in_bytes": 294912,
      "op_tensor_size": 294912,
      "offset": 870924288,
      "file_name": ".cache\\MatMulNBits_2_0_244.const",
      "file_size": 294912
    },
    "model.layers.9.attn.o_proj.MatMulNBits.qzeros.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        73728
      ],
      "size_in_bytes": 73728,
      "op_tensor_size": 73728,
      "offset": 871219200,
      "file_name": ".cache\\MatMulNBits_2_0_245.const",
      "file_size": 73728
    },
    "model.layers.9.post_attention_layernorm.weight.bf": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "bfloat16",
      "shape": [
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 871292928,
      "file_name": ".cache\\MatMulNBits_2_0_246.const",
      "file_size": 6144
    },
    "model.layers.9.mlp.gate_proj.MatMulNBits.qweight": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "uint8",
      "shape": [
        8192,
        24,
        64
      ],
      "size_in_bytes": 12582912,
      "op_tensor_size": 12582912,
      "offset": 871299072,
      "file_name": ".cache\\MatMulNBits_2_0_247.const",
      "file_size": 12582912
    },
    "model.layers.9.mlp.gate_proj.MatMulNBits.scales.f": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        196608
      ],
      "size_in_bytes": 786432,
      "op_tensor_size": 786432,
      "offset": 883881984,
      "file_name": ".cache\\MatMulNBits_2_0_248.const",
      "file_size": 786432
    },
    "model.layers.9.mlp.gate_proj.MatMulNBits.qzeros": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "uint8",
      "shape": [
        98304
      ],
      "size_in_bytes": 98304,
      "op_tensor_size": 98304,
      "offset": 884668416,
      "file_name": ".cache\\MatMulNBits_2_0_249.const",
      "file_size": 98304
    },
    "model.layers.9.mlp.gate_proj.MatMulNBits.bias.f": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        8192
      ],
      "size_in_bytes": 32768,
      "op_tensor_size": 32768,
      "offset": 884766720,
      "file_name": ".cache\\MatMulNBits_2_0_250.const",
      "file_size": 32768
    },
    "model.layers.9.mlp.up_proj.MatMulNBits.qweight": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "uint8",
      "shape": [
        8192,
        24,
        64
      ],
      "size_in_bytes": 12582912,
      "op_tensor_size": 12582912,
      "offset": 884799488,
      "file_name": ".cache\\MatMulNBits_2_0_251.const",
      "file_size": 12582912
    },
    "model.layers.9.mlp.up_proj.MatMulNBits.scales.f": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        196608
      ],
      "size_in_bytes": 786432,
      "op_tensor_size": 786432,
      "offset": 897382400,
      "file_name": ".cache\\MatMulNBits_2_0_252.const",
      "file_size": 786432
    },
    "model.layers.9.mlp.up_proj.MatMulNBits.qzeros": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "uint8",
      "shape": [
        98304
      ],
      "size_in_bytes": 98304,
      "op_tensor_size": 98304,
      "offset": 898168832,
      "file_name": ".cache\\MatMulNBits_2_0_253.const",
      "file_size": 98304
    },
    "model.layers.9.mlp.up_proj.MatMulNBits.bias.f": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        8192
      ],
      "size_in_bytes": 32768,
      "op_tensor_size": 32768,
      "offset": 898267136,
      "file_name": ".cache\\MatMulNBits_2_0_254.const",
      "file_size": 32768
    },
    "model.layers.9.mlp.down_proj.MatMulNBits.qweight.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        8192,
        3072
      ],
      "size_in_bytes": 25165824,
      "op_tensor_size": 25165824,
      "offset": 898299904,
      "file_name": ".cache\\MatMulNBits_2_0_255.const",
      "file_size": 25165824
    },
    "model.layers.9.mlp.down_proj.MatMulNBits.bias.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        3072
      ],
      "size_in_bytes": 12288,
      "op_tensor_size": 12288,
      "offset": 923465728,
      "file_name": ".cache\\MatMulNBits_2_0_256.const",
      "file_size": 12288
    },
    "model.layers.9.mlp.down_proj.MatMulNBits.scales.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        196608
      ],
      "size_in_bytes": 786432,
      "op_tensor_size": 786432,
      "offset": 923478016,
      "file_name": ".cache\\MatMulNBits_2_0_257.const",
      "file_size": 786432
    },
    "model.layers.9.mlp.down_proj.MatMulNBits.qzeros.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        196608
      ],
      "size_in_bytes": 196608,
      "op_tensor_size": 196608,
      "offset": 924264448,
      "file_name": ".cache\\MatMulNBits_2_0_258.const",
      "file_size": 196608
    },
    "model.layers.10.input_layernorm.weight.bf": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "bfloat16",
      "shape": [
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 924461056,
      "file_name": ".cache\\MatMulNBits_2_0_259.const",
      "file_size": 6144
    },
    "model.layers.10.attn.qk_proj.MatMulNBits.qweight.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        3072,
        6144
      ],
      "size_in_bytes": 18874368,
      "op_tensor_size": 18874368,
      "offset": 924467200,
      "file_name": ".cache\\MatMulNBits_2_0_260.const",
      "file_size": 18874368
    },
    "model.layers.10.attn.qk_proj.MatMulNBits.bias.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        6144
      ],
      "size_in_bytes": 24576,
      "op_tensor_size": 24576,
      "offset": 943341568,
      "file_name": ".cache\\MatMulNBits_2_0_261.const",
      "file_size": 24576
    },
    "model.layers.10.attn.qk_proj.MatMulNBits.scales.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        147456
      ],
      "size_in_bytes": 589824,
      "op_tensor_size": 589824,
      "offset": 943366144,
      "file_name": ".cache\\MatMulNBits_2_0_262.const",
      "file_size": 589824
    },
    "model.layers.10.attn.qk_proj.MatMulNBits.qzeros.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        147456
      ],
      "size_in_bytes": 147456,
      "op_tensor_size": 147456,
      "offset": 943955968,
      "file_name": ".cache\\MatMulNBits_2_0_263.const",
      "file_size": 147456
    },
    "model.layers.10.attn.v_proj.MatMulNBits.qweight.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        3072,
        3072
      ],
      "size_in_bytes": 9437184,
      "op_tensor_size": 9437184,
      "offset": 944103424,
      "file_name": ".cache\\MatMulNBits_2_0_264.const",
      "file_size": 9437184
    },
    "model.layers.10.attn.v_proj.MatMulNBits.bias.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        3072
      ],
      "size_in_bytes": 12288,
      "op_tensor_size": 12288,
      "offset": 953540608,
      "file_name": ".cache\\MatMulNBits_2_0_265.const",
      "file_size": 12288
    },
    "model.layers.10.attn.v_proj.MatMulNBits.scales.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        73728
      ],
      "size_in_bytes": 294912,
      "op_tensor_size": 294912,
      "offset": 953552896,
      "file_name": ".cache\\MatMulNBits_2_0_266.const",
      "file_size": 294912
    },
    "model.layers.10.attn.v_proj.MatMulNBits.qzeros.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        73728
      ],
      "size_in_bytes": 73728,
      "op_tensor_size": 73728,
      "offset": 953847808,
      "file_name": ".cache\\MatMulNBits_2_0_267.const",
      "file_size": 73728
    },
    "model.layers.10.attn.o_proj.MatMulNBits.qweight.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        3072,
        3072
      ],
      "size_in_bytes": 9437184,
      "op_tensor_size": 9437184,
      "offset": 953921536,
      "file_name": ".cache\\MatMulNBits_2_0_268.const",
      "file_size": 9437184
    },
    "model.layers.10.attn.o_proj.MatMulNBits.bias.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        3072
      ],
      "size_in_bytes": 12288,
      "op_tensor_size": 12288,
      "offset": 963358720,
      "file_name": ".cache\\MatMulNBits_2_0_269.const",
      "file_size": 12288
    },
    "model.layers.10.attn.o_proj.MatMulNBits.scales.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        73728
      ],
      "size_in_bytes": 294912,
      "op_tensor_size": 294912,
      "offset": 963371008,
      "file_name": ".cache\\MatMulNBits_2_0_270.const",
      "file_size": 294912
    },
    "model.layers.10.attn.o_proj.MatMulNBits.qzeros.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        73728
      ],
      "size_in_bytes": 73728,
      "op_tensor_size": 73728,
      "offset": 963665920,
      "file_name": ".cache\\MatMulNBits_2_0_271.const",
      "file_size": 73728
    },
    "model.layers.10.post_attention_layernorm.weight.bf": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "bfloat16",
      "shape": [
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 963739648,
      "file_name": ".cache\\MatMulNBits_2_0_272.const",
      "file_size": 6144
    },
    "model.layers.10.mlp.gate_proj.MatMulNBits.qweight": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "uint8",
      "shape": [
        8192,
        24,
        64
      ],
      "size_in_bytes": 12582912,
      "op_tensor_size": 12582912,
      "offset": 963745792,
      "file_name": ".cache\\MatMulNBits_2_0_273.const",
      "file_size": 12582912
    },
    "model.layers.10.mlp.gate_proj.MatMulNBits.scales.f": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        196608
      ],
      "size_in_bytes": 786432,
      "op_tensor_size": 786432,
      "offset": 976328704,
      "file_name": ".cache\\MatMulNBits_2_0_274.const",
      "file_size": 786432
    },
    "model.layers.10.mlp.gate_proj.MatMulNBits.qzeros": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "uint8",
      "shape": [
        98304
      ],
      "size_in_bytes": 98304,
      "op_tensor_size": 98304,
      "offset": 977115136,
      "file_name": ".cache\\MatMulNBits_2_0_275.const",
      "file_size": 98304
    },
    "model.layers.10.mlp.gate_proj.MatMulNBits.bias.f": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        8192
      ],
      "size_in_bytes": 32768,
      "op_tensor_size": 32768,
      "offset": 977213440,
      "file_name": ".cache\\MatMulNBits_2_0_276.const",
      "file_size": 32768
    },
    "model.layers.10.mlp.up_proj.MatMulNBits.qweight": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "uint8",
      "shape": [
        8192,
        24,
        64
      ],
      "size_in_bytes": 12582912,
      "op_tensor_size": 12582912,
      "offset": 977246208,
      "file_name": ".cache\\MatMulNBits_2_0_277.const",
      "file_size": 12582912
    },
    "model.layers.10.mlp.up_proj.MatMulNBits.scales.f": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        196608
      ],
      "size_in_bytes": 786432,
      "op_tensor_size": 786432,
      "offset": 989829120,
      "file_name": ".cache\\MatMulNBits_2_0_278.const",
      "file_size": 786432
    },
    "model.layers.10.mlp.up_proj.MatMulNBits.qzeros": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "uint8",
      "shape": [
        98304
      ],
      "size_in_bytes": 98304,
      "op_tensor_size": 98304,
      "offset": 990615552,
      "file_name": ".cache\\MatMulNBits_2_0_279.const",
      "file_size": 98304
    },
    "model.layers.10.mlp.up_proj.MatMulNBits.bias.f": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        8192
      ],
      "size_in_bytes": 32768,
      "op_tensor_size": 32768,
      "offset": 990713856,
      "file_name": ".cache\\MatMulNBits_2_0_280.const",
      "file_size": 32768
    },
    "model.layers.10.mlp.down_proj.MatMulNBits.qweight.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        8192,
        3072
      ],
      "size_in_bytes": 25165824,
      "op_tensor_size": 25165824,
      "offset": 990746624,
      "file_name": ".cache\\MatMulNBits_2_0_281.const",
      "file_size": 25165824
    },
    "model.layers.10.mlp.down_proj.MatMulNBits.bias.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        3072
      ],
      "size_in_bytes": 12288,
      "op_tensor_size": 12288,
      "offset": 1015912448,
      "file_name": ".cache\\MatMulNBits_2_0_282.const",
      "file_size": 12288
    },
    "model.layers.10.mlp.down_proj.MatMulNBits.scales.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        196608
      ],
      "size_in_bytes": 786432,
      "op_tensor_size": 786432,
      "offset": 1015924736,
      "file_name": ".cache\\MatMulNBits_2_0_283.const",
      "file_size": 786432
    },
    "model.layers.10.mlp.down_proj.MatMulNBits.qzeros.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        196608
      ],
      "size_in_bytes": 196608,
      "op_tensor_size": 196608,
      "offset": 1016711168,
      "file_name": ".cache\\MatMulNBits_2_0_284.const",
      "file_size": 196608
    },
    "model.layers.11.input_layernorm.weight.bf": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "bfloat16",
      "shape": [
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 1016907776,
      "file_name": ".cache\\MatMulNBits_2_0_285.const",
      "file_size": 6144
    },
    "model.layers.11.attn.qk_proj.MatMulNBits.qweight.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        3072,
        6144
      ],
      "size_in_bytes": 18874368,
      "op_tensor_size": 18874368,
      "offset": 1016913920,
      "file_name": ".cache\\MatMulNBits_2_0_286.const",
      "file_size": 18874368
    },
    "model.layers.11.attn.qk_proj.MatMulNBits.bias.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        6144
      ],
      "size_in_bytes": 24576,
      "op_tensor_size": 24576,
      "offset": 1035788288,
      "file_name": ".cache\\MatMulNBits_2_0_287.const",
      "file_size": 24576
    },
    "model.layers.11.attn.qk_proj.MatMulNBits.scales.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        147456
      ],
      "size_in_bytes": 589824,
      "op_tensor_size": 589824,
      "offset": 1035812864,
      "file_name": ".cache\\MatMulNBits_2_0_288.const",
      "file_size": 589824
    },
    "model.layers.11.attn.qk_proj.MatMulNBits.qzeros.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        147456
      ],
      "size_in_bytes": 147456,
      "op_tensor_size": 147456,
      "offset": 1036402688,
      "file_name": ".cache\\MatMulNBits_2_0_289.const",
      "file_size": 147456
    },
    "model.layers.11.attn.v_proj.MatMulNBits.qweight.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        3072,
        3072
      ],
      "size_in_bytes": 9437184,
      "op_tensor_size": 9437184,
      "offset": 1036550144,
      "file_name": ".cache\\MatMulNBits_2_0_290.const",
      "file_size": 9437184
    },
    "model.layers.11.attn.v_proj.MatMulNBits.bias.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        3072
      ],
      "size_in_bytes": 12288,
      "op_tensor_size": 12288,
      "offset": 1045987328,
      "file_name": ".cache\\MatMulNBits_2_0_291.const",
      "file_size": 12288
    },
    "model.layers.11.attn.v_proj.MatMulNBits.scales.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        73728
      ],
      "size_in_bytes": 294912,
      "op_tensor_size": 294912,
      "offset": 1045999616,
      "file_name": ".cache\\MatMulNBits_2_0_292.const",
      "file_size": 294912
    },
    "model.layers.11.attn.v_proj.MatMulNBits.qzeros.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        73728
      ],
      "size_in_bytes": 73728,
      "op_tensor_size": 73728,
      "offset": 1046294528,
      "file_name": ".cache\\MatMulNBits_2_0_293.const",
      "file_size": 73728
    },
    "model.layers.11.attn.o_proj.MatMulNBits.qweight.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        3072,
        3072
      ],
      "size_in_bytes": 9437184,
      "op_tensor_size": 9437184,
      "offset": 1046368256,
      "file_name": ".cache\\MatMulNBits_2_0_294.const",
      "file_size": 9437184
    },
    "model.layers.11.attn.o_proj.MatMulNBits.bias.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        3072
      ],
      "size_in_bytes": 12288,
      "op_tensor_size": 12288,
      "offset": 1055805440,
      "file_name": ".cache\\MatMulNBits_2_0_295.const",
      "file_size": 12288
    },
    "model.layers.11.attn.o_proj.MatMulNBits.scales.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        73728
      ],
      "size_in_bytes": 294912,
      "op_tensor_size": 294912,
      "offset": 1055817728,
      "file_name": ".cache\\MatMulNBits_2_0_296.const",
      "file_size": 294912
    },
    "model.layers.11.attn.o_proj.MatMulNBits.qzeros.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        73728
      ],
      "size_in_bytes": 73728,
      "op_tensor_size": 73728,
      "offset": 1056112640,
      "file_name": ".cache\\MatMulNBits_2_0_297.const",
      "file_size": 73728
    },
    "model.layers.11.post_attention_layernorm.weight.bf": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "bfloat16",
      "shape": [
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 1056186368,
      "file_name": ".cache\\MatMulNBits_2_0_298.const",
      "file_size": 6144
    },
    "model.layers.11.mlp.gate_proj.MatMulNBits.qweight": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "uint8",
      "shape": [
        8192,
        24,
        64
      ],
      "size_in_bytes": 12582912,
      "op_tensor_size": 12582912,
      "offset": 1056192512,
      "file_name": ".cache\\MatMulNBits_2_0_299.const",
      "file_size": 12582912
    },
    "model.layers.11.mlp.gate_proj.MatMulNBits.scales.f": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        196608
      ],
      "size_in_bytes": 786432,
      "op_tensor_size": 786432,
      "offset": 1068775424,
      "file_name": ".cache\\MatMulNBits_2_0_300.const",
      "file_size": 786432
    },
    "model.layers.11.mlp.gate_proj.MatMulNBits.qzeros": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "uint8",
      "shape": [
        98304
      ],
      "size_in_bytes": 98304,
      "op_tensor_size": 98304,
      "offset": 1069561856,
      "file_name": ".cache\\MatMulNBits_2_0_301.const",
      "file_size": 98304
    },
    "model.layers.11.mlp.gate_proj.MatMulNBits.bias.f": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        8192
      ],
      "size_in_bytes": 32768,
      "op_tensor_size": 32768,
      "offset": 1069660160,
      "file_name": ".cache\\MatMulNBits_2_0_302.const",
      "file_size": 32768
    },
    "model.layers.11.mlp.up_proj.MatMulNBits.qweight": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "uint8",
      "shape": [
        8192,
        24,
        64
      ],
      "size_in_bytes": 12582912,
      "op_tensor_size": 12582912,
      "offset": 1069692928,
      "file_name": ".cache\\MatMulNBits_2_0_303.const",
      "file_size": 12582912
    },
    "model.layers.11.mlp.up_proj.MatMulNBits.scales.f": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        196608
      ],
      "size_in_bytes": 786432,
      "op_tensor_size": 786432,
      "offset": 1082275840,
      "file_name": ".cache\\MatMulNBits_2_0_304.const",
      "file_size": 786432
    },
    "model.layers.11.mlp.up_proj.MatMulNBits.qzeros": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "uint8",
      "shape": [
        98304
      ],
      "size_in_bytes": 98304,
      "op_tensor_size": 98304,
      "offset": 1083062272,
      "file_name": ".cache\\MatMulNBits_2_0_305.const",
      "file_size": 98304
    },
    "model.layers.11.mlp.up_proj.MatMulNBits.bias.f": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        8192
      ],
      "size_in_bytes": 32768,
      "op_tensor_size": 32768,
      "offset": 1083160576,
      "file_name": ".cache\\MatMulNBits_2_0_306.const",
      "file_size": 32768
    },
    "model.layers.11.mlp.down_proj.MatMulNBits.qweight.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        8192,
        3072
      ],
      "size_in_bytes": 25165824,
      "op_tensor_size": 25165824,
      "offset": 1083193344,
      "file_name": ".cache\\MatMulNBits_2_0_307.const",
      "file_size": 25165824
    },
    "model.layers.11.mlp.down_proj.MatMulNBits.bias.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        3072
      ],
      "size_in_bytes": 12288,
      "op_tensor_size": 12288,
      "offset": 1108359168,
      "file_name": ".cache\\MatMulNBits_2_0_308.const",
      "file_size": 12288
    },
    "model.layers.11.mlp.down_proj.MatMulNBits.scales.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        196608
      ],
      "size_in_bytes": 786432,
      "op_tensor_size": 786432,
      "offset": 1108371456,
      "file_name": ".cache\\MatMulNBits_2_0_309.const",
      "file_size": 786432
    },
    "model.layers.11.mlp.down_proj.MatMulNBits.qzeros.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        196608
      ],
      "size_in_bytes": 196608,
      "op_tensor_size": 196608,
      "offset": 1109157888,
      "file_name": ".cache\\MatMulNBits_2_0_310.const",
      "file_size": 196608
    },
    "model.layers.12.input_layernorm.weight.bf": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "bfloat16",
      "shape": [
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 1109354496,
      "file_name": ".cache\\MatMulNBits_2_0_311.const",
      "file_size": 6144
    },
    "model.layers.12.attn.qk_proj.MatMulNBits.qweight.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        3072,
        6144
      ],
      "size_in_bytes": 18874368,
      "op_tensor_size": 18874368,
      "offset": 1109360640,
      "file_name": ".cache\\MatMulNBits_2_0_312.const",
      "file_size": 18874368
    },
    "model.layers.12.attn.qk_proj.MatMulNBits.bias.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        6144
      ],
      "size_in_bytes": 24576,
      "op_tensor_size": 24576,
      "offset": 1128235008,
      "file_name": ".cache\\MatMulNBits_2_0_313.const",
      "file_size": 24576
    },
    "model.layers.12.attn.qk_proj.MatMulNBits.scales.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        147456
      ],
      "size_in_bytes": 589824,
      "op_tensor_size": 589824,
      "offset": 1128259584,
      "file_name": ".cache\\MatMulNBits_2_0_314.const",
      "file_size": 589824
    },
    "model.layers.12.attn.qk_proj.MatMulNBits.qzeros.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        147456
      ],
      "size_in_bytes": 147456,
      "op_tensor_size": 147456,
      "offset": 1128849408,
      "file_name": ".cache\\MatMulNBits_2_0_315.const",
      "file_size": 147456
    },
    "model.layers.12.attn.v_proj.MatMulNBits.qweight.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        3072,
        3072
      ],
      "size_in_bytes": 9437184,
      "op_tensor_size": 9437184,
      "offset": 1128996864,
      "file_name": ".cache\\MatMulNBits_2_0_316.const",
      "file_size": 9437184
    },
    "model.layers.12.attn.v_proj.MatMulNBits.bias.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        3072
      ],
      "size_in_bytes": 12288,
      "op_tensor_size": 12288,
      "offset": 1138434048,
      "file_name": ".cache\\MatMulNBits_2_0_317.const",
      "file_size": 12288
    },
    "model.layers.12.attn.v_proj.MatMulNBits.scales.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        73728
      ],
      "size_in_bytes": 294912,
      "op_tensor_size": 294912,
      "offset": 1138446336,
      "file_name": ".cache\\MatMulNBits_2_0_318.const",
      "file_size": 294912
    },
    "model.layers.12.attn.v_proj.MatMulNBits.qzeros.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        73728
      ],
      "size_in_bytes": 73728,
      "op_tensor_size": 73728,
      "offset": 1138741248,
      "file_name": ".cache\\MatMulNBits_2_0_319.const",
      "file_size": 73728
    },
    "model.layers.12.attn.o_proj.MatMulNBits.qweight.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        3072,
        3072
      ],
      "size_in_bytes": 9437184,
      "op_tensor_size": 9437184,
      "offset": 1138814976,
      "file_name": ".cache\\MatMulNBits_2_0_320.const",
      "file_size": 9437184
    },
    "model.layers.12.attn.o_proj.MatMulNBits.bias.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        3072
      ],
      "size_in_bytes": 12288,
      "op_tensor_size": 12288,
      "offset": 1148252160,
      "file_name": ".cache\\MatMulNBits_2_0_321.const",
      "file_size": 12288
    },
    "model.layers.12.attn.o_proj.MatMulNBits.scales.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        73728
      ],
      "size_in_bytes": 294912,
      "op_tensor_size": 294912,
      "offset": 1148264448,
      "file_name": ".cache\\MatMulNBits_2_0_322.const",
      "file_size": 294912
    },
    "model.layers.12.attn.o_proj.MatMulNBits.qzeros.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        73728
      ],
      "size_in_bytes": 73728,
      "op_tensor_size": 73728,
      "offset": 1148559360,
      "file_name": ".cache\\MatMulNBits_2_0_323.const",
      "file_size": 73728
    },
    "model.layers.12.post_attention_layernorm.weight.bf": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "bfloat16",
      "shape": [
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 1148633088,
      "file_name": ".cache\\MatMulNBits_2_0_324.const",
      "file_size": 6144
    },
    "model.layers.12.mlp.gate_proj.MatMulNBits.qweight": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "uint8",
      "shape": [
        8192,
        24,
        64
      ],
      "size_in_bytes": 12582912,
      "op_tensor_size": 12582912,
      "offset": 1148639232,
      "file_name": ".cache\\MatMulNBits_2_0_325.const",
      "file_size": 12582912
    },
    "model.layers.12.mlp.gate_proj.MatMulNBits.scales.f": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        196608
      ],
      "size_in_bytes": 786432,
      "op_tensor_size": 786432,
      "offset": 1161222144,
      "file_name": ".cache\\MatMulNBits_2_0_326.const",
      "file_size": 786432
    },
    "model.layers.12.mlp.gate_proj.MatMulNBits.qzeros": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "uint8",
      "shape": [
        98304
      ],
      "size_in_bytes": 98304,
      "op_tensor_size": 98304,
      "offset": 1162008576,
      "file_name": ".cache\\MatMulNBits_2_0_327.const",
      "file_size": 98304
    },
    "model.layers.12.mlp.gate_proj.MatMulNBits.bias.f": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        8192
      ],
      "size_in_bytes": 32768,
      "op_tensor_size": 32768,
      "offset": 1162106880,
      "file_name": ".cache\\MatMulNBits_2_0_328.const",
      "file_size": 32768
    },
    "model.layers.12.mlp.up_proj.MatMulNBits.qweight": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "uint8",
      "shape": [
        8192,
        24,
        64
      ],
      "size_in_bytes": 12582912,
      "op_tensor_size": 12582912,
      "offset": 1162139648,
      "file_name": ".cache\\MatMulNBits_2_0_329.const",
      "file_size": 12582912
    },
    "model.layers.12.mlp.up_proj.MatMulNBits.scales.f": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        196608
      ],
      "size_in_bytes": 786432,
      "op_tensor_size": 786432,
      "offset": 1174722560,
      "file_name": ".cache\\MatMulNBits_2_0_330.const",
      "file_size": 786432
    },
    "model.layers.12.mlp.up_proj.MatMulNBits.qzeros": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "uint8",
      "shape": [
        98304
      ],
      "size_in_bytes": 98304,
      "op_tensor_size": 98304,
      "offset": 1175508992,
      "file_name": ".cache\\MatMulNBits_2_0_331.const",
      "file_size": 98304
    },
    "model.layers.12.mlp.up_proj.MatMulNBits.bias.f": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        8192
      ],
      "size_in_bytes": 32768,
      "op_tensor_size": 32768,
      "offset": 1175607296,
      "file_name": ".cache\\MatMulNBits_2_0_332.const",
      "file_size": 32768
    },
    "model.layers.12.mlp.down_proj.MatMulNBits.qweight.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        8192,
        3072
      ],
      "size_in_bytes": 25165824,
      "op_tensor_size": 25165824,
      "offset": 1175640064,
      "file_name": ".cache\\MatMulNBits_2_0_333.const",
      "file_size": 25165824
    },
    "model.layers.12.mlp.down_proj.MatMulNBits.bias.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        3072
      ],
      "size_in_bytes": 12288,
      "op_tensor_size": 12288,
      "offset": 1200805888,
      "file_name": ".cache\\MatMulNBits_2_0_334.const",
      "file_size": 12288
    },
    "model.layers.12.mlp.down_proj.MatMulNBits.scales.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        196608
      ],
      "size_in_bytes": 786432,
      "op_tensor_size": 786432,
      "offset": 1200818176,
      "file_name": ".cache\\MatMulNBits_2_0_335.const",
      "file_size": 786432
    },
    "model.layers.12.mlp.down_proj.MatMulNBits.qzeros.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        196608
      ],
      "size_in_bytes": 196608,
      "op_tensor_size": 196608,
      "offset": 1201604608,
      "file_name": ".cache\\MatMulNBits_2_0_336.const",
      "file_size": 196608
    },
    "model.layers.13.input_layernorm.weight.bf": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "bfloat16",
      "shape": [
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 1201801216,
      "file_name": ".cache\\MatMulNBits_2_0_337.const",
      "file_size": 6144
    },
    "model.layers.13.attn.qk_proj.MatMulNBits.qweight.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        3072,
        6144
      ],
      "size_in_bytes": 18874368,
      "op_tensor_size": 18874368,
      "offset": 1201807360,
      "file_name": ".cache\\MatMulNBits_2_0_338.const",
      "file_size": 18874368
    },
    "model.layers.13.attn.qk_proj.MatMulNBits.bias.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        6144
      ],
      "size_in_bytes": 24576,
      "op_tensor_size": 24576,
      "offset": 1220681728,
      "file_name": ".cache\\MatMulNBits_2_0_339.const",
      "file_size": 24576
    },
    "model.layers.13.attn.qk_proj.MatMulNBits.scales.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        147456
      ],
      "size_in_bytes": 589824,
      "op_tensor_size": 589824,
      "offset": 1220706304,
      "file_name": ".cache\\MatMulNBits_2_0_340.const",
      "file_size": 589824
    },
    "model.layers.13.attn.qk_proj.MatMulNBits.qzeros.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        147456
      ],
      "size_in_bytes": 147456,
      "op_tensor_size": 147456,
      "offset": 1221296128,
      "file_name": ".cache\\MatMulNBits_2_0_341.const",
      "file_size": 147456
    },
    "model.layers.13.attn.v_proj.MatMulNBits.qweight.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        3072,
        3072
      ],
      "size_in_bytes": 9437184,
      "op_tensor_size": 9437184,
      "offset": 1221443584,
      "file_name": ".cache\\MatMulNBits_2_0_342.const",
      "file_size": 9437184
    },
    "model.layers.13.attn.v_proj.MatMulNBits.bias.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        3072
      ],
      "size_in_bytes": 12288,
      "op_tensor_size": 12288,
      "offset": 1230880768,
      "file_name": ".cache\\MatMulNBits_2_0_343.const",
      "file_size": 12288
    },
    "model.layers.13.attn.v_proj.MatMulNBits.scales.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        73728
      ],
      "size_in_bytes": 294912,
      "op_tensor_size": 294912,
      "offset": 1230893056,
      "file_name": ".cache\\MatMulNBits_2_0_344.const",
      "file_size": 294912
    },
    "model.layers.13.attn.v_proj.MatMulNBits.qzeros.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        73728
      ],
      "size_in_bytes": 73728,
      "op_tensor_size": 73728,
      "offset": 1231187968,
      "file_name": ".cache\\MatMulNBits_2_0_345.const",
      "file_size": 73728
    },
    "model.layers.13.attn.o_proj.MatMulNBits.qweight.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        3072,
        3072
      ],
      "size_in_bytes": 9437184,
      "op_tensor_size": 9437184,
      "offset": 1231261696,
      "file_name": ".cache\\MatMulNBits_2_0_346.const",
      "file_size": 9437184
    },
    "model.layers.13.attn.o_proj.MatMulNBits.bias.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        3072
      ],
      "size_in_bytes": 12288,
      "op_tensor_size": 12288,
      "offset": 1240698880,
      "file_name": ".cache\\MatMulNBits_2_0_347.const",
      "file_size": 12288
    },
    "model.layers.13.attn.o_proj.MatMulNBits.scales.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        73728
      ],
      "size_in_bytes": 294912,
      "op_tensor_size": 294912,
      "offset": 1240711168,
      "file_name": ".cache\\MatMulNBits_2_0_348.const",
      "file_size": 294912
    },
    "model.layers.13.attn.o_proj.MatMulNBits.qzeros.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        73728
      ],
      "size_in_bytes": 73728,
      "op_tensor_size": 73728,
      "offset": 1241006080,
      "file_name": ".cache\\MatMulNBits_2_0_349.const",
      "file_size": 73728
    },
    "model.layers.13.post_attention_layernorm.weight.bf": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "bfloat16",
      "shape": [
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 1241079808,
      "file_name": ".cache\\MatMulNBits_2_0_350.const",
      "file_size": 6144
    },
    "model.layers.13.mlp.gate_proj.MatMulNBits.qweight": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "uint8",
      "shape": [
        8192,
        24,
        64
      ],
      "size_in_bytes": 12582912,
      "op_tensor_size": 12582912,
      "offset": 1241085952,
      "file_name": ".cache\\MatMulNBits_2_0_351.const",
      "file_size": 12582912
    },
    "model.layers.13.mlp.gate_proj.MatMulNBits.scales.f": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        196608
      ],
      "size_in_bytes": 786432,
      "op_tensor_size": 786432,
      "offset": 1253668864,
      "file_name": ".cache\\MatMulNBits_2_0_352.const",
      "file_size": 786432
    },
    "model.layers.13.mlp.gate_proj.MatMulNBits.qzeros": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "uint8",
      "shape": [
        98304
      ],
      "size_in_bytes": 98304,
      "op_tensor_size": 98304,
      "offset": 1254455296,
      "file_name": ".cache\\MatMulNBits_2_0_353.const",
      "file_size": 98304
    },
    "model.layers.13.mlp.gate_proj.MatMulNBits.bias.f": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        8192
      ],
      "size_in_bytes": 32768,
      "op_tensor_size": 32768,
      "offset": 1254553600,
      "file_name": ".cache\\MatMulNBits_2_0_354.const",
      "file_size": 32768
    },
    "model.layers.13.mlp.up_proj.MatMulNBits.qweight": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "uint8",
      "shape": [
        8192,
        24,
        64
      ],
      "size_in_bytes": 12582912,
      "op_tensor_size": 12582912,
      "offset": 1254586368,
      "file_name": ".cache\\MatMulNBits_2_0_355.const",
      "file_size": 12582912
    },
    "model.layers.13.mlp.up_proj.MatMulNBits.scales.f": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        196608
      ],
      "size_in_bytes": 786432,
      "op_tensor_size": 786432,
      "offset": 1267169280,
      "file_name": ".cache\\MatMulNBits_2_0_356.const",
      "file_size": 786432
    },
    "model.layers.13.mlp.up_proj.MatMulNBits.qzeros": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "uint8",
      "shape": [
        98304
      ],
      "size_in_bytes": 98304,
      "op_tensor_size": 98304,
      "offset": 1267955712,
      "file_name": ".cache\\MatMulNBits_2_0_357.const",
      "file_size": 98304
    },
    "model.layers.13.mlp.up_proj.MatMulNBits.bias.f": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        8192
      ],
      "size_in_bytes": 32768,
      "op_tensor_size": 32768,
      "offset": 1268054016,
      "file_name": ".cache\\MatMulNBits_2_0_358.const",
      "file_size": 32768
    },
    "model.layers.13.mlp.down_proj.MatMulNBits.qweight.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        8192,
        3072
      ],
      "size_in_bytes": 25165824,
      "op_tensor_size": 25165824,
      "offset": 1268086784,
      "file_name": ".cache\\MatMulNBits_2_0_359.const",
      "file_size": 25165824
    },
    "model.layers.13.mlp.down_proj.MatMulNBits.bias.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        3072
      ],
      "size_in_bytes": 12288,
      "op_tensor_size": 12288,
      "offset": 1293252608,
      "file_name": ".cache\\MatMulNBits_2_0_360.const",
      "file_size": 12288
    },
    "model.layers.13.mlp.down_proj.MatMulNBits.scales.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        196608
      ],
      "size_in_bytes": 786432,
      "op_tensor_size": 786432,
      "offset": 1293264896,
      "file_name": ".cache\\MatMulNBits_2_0_361.const",
      "file_size": 786432
    },
    "model.layers.13.mlp.down_proj.MatMulNBits.qzeros.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        196608
      ],
      "size_in_bytes": 196608,
      "op_tensor_size": 196608,
      "offset": 1294051328,
      "file_name": ".cache\\MatMulNBits_2_0_362.const",
      "file_size": 196608
    },
    "model.layers.14.input_layernorm.weight.bf": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "bfloat16",
      "shape": [
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 1294247936,
      "file_name": ".cache\\MatMulNBits_2_0_363.const",
      "file_size": 6144
    },
    "model.layers.14.attn.qk_proj.MatMulNBits.qweight.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        3072,
        6144
      ],
      "size_in_bytes": 18874368,
      "op_tensor_size": 18874368,
      "offset": 1294254080,
      "file_name": ".cache\\MatMulNBits_2_0_364.const",
      "file_size": 18874368
    },
    "model.layers.14.attn.qk_proj.MatMulNBits.bias.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        6144
      ],
      "size_in_bytes": 24576,
      "op_tensor_size": 24576,
      "offset": 1313128448,
      "file_name": ".cache\\MatMulNBits_2_0_365.const",
      "file_size": 24576
    },
    "model.layers.14.attn.qk_proj.MatMulNBits.scales.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        147456
      ],
      "size_in_bytes": 589824,
      "op_tensor_size": 589824,
      "offset": 1313153024,
      "file_name": ".cache\\MatMulNBits_2_0_366.const",
      "file_size": 589824
    },
    "model.layers.14.attn.qk_proj.MatMulNBits.qzeros.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        147456
      ],
      "size_in_bytes": 147456,
      "op_tensor_size": 147456,
      "offset": 1313742848,
      "file_name": ".cache\\MatMulNBits_2_0_367.const",
      "file_size": 147456
    },
    "model.layers.14.attn.v_proj.MatMulNBits.qweight.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        3072,
        3072
      ],
      "size_in_bytes": 9437184,
      "op_tensor_size": 9437184,
      "offset": 1313890304,
      "file_name": ".cache\\MatMulNBits_2_0_368.const",
      "file_size": 9437184
    },
    "model.layers.14.attn.v_proj.MatMulNBits.bias.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        3072
      ],
      "size_in_bytes": 12288,
      "op_tensor_size": 12288,
      "offset": 1323327488,
      "file_name": ".cache\\MatMulNBits_2_0_369.const",
      "file_size": 12288
    },
    "model.layers.14.attn.v_proj.MatMulNBits.scales.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        73728
      ],
      "size_in_bytes": 294912,
      "op_tensor_size": 294912,
      "offset": 1323339776,
      "file_name": ".cache\\MatMulNBits_2_0_370.const",
      "file_size": 294912
    },
    "model.layers.14.attn.v_proj.MatMulNBits.qzeros.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        73728
      ],
      "size_in_bytes": 73728,
      "op_tensor_size": 73728,
      "offset": 1323634688,
      "file_name": ".cache\\MatMulNBits_2_0_371.const",
      "file_size": 73728
    },
    "model.layers.14.attn.o_proj.MatMulNBits.qweight.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        3072,
        3072
      ],
      "size_in_bytes": 9437184,
      "op_tensor_size": 9437184,
      "offset": 1323708416,
      "file_name": ".cache\\MatMulNBits_2_0_372.const",
      "file_size": 9437184
    },
    "model.layers.14.attn.o_proj.MatMulNBits.bias.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        3072
      ],
      "size_in_bytes": 12288,
      "op_tensor_size": 12288,
      "offset": 1333145600,
      "file_name": ".cache\\MatMulNBits_2_0_373.const",
      "file_size": 12288
    },
    "model.layers.14.attn.o_proj.MatMulNBits.scales.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        73728
      ],
      "size_in_bytes": 294912,
      "op_tensor_size": 294912,
      "offset": 1333157888,
      "file_name": ".cache\\MatMulNBits_2_0_374.const",
      "file_size": 294912
    },
    "model.layers.14.attn.o_proj.MatMulNBits.qzeros.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        73728
      ],
      "size_in_bytes": 73728,
      "op_tensor_size": 73728,
      "offset": 1333452800,
      "file_name": ".cache\\MatMulNBits_2_0_375.const",
      "file_size": 73728
    },
    "model.layers.14.post_attention_layernorm.weight.bf": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "bfloat16",
      "shape": [
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 1333526528,
      "file_name": ".cache\\MatMulNBits_2_0_376.const",
      "file_size": 6144
    },
    "model.layers.14.mlp.gate_proj.MatMulNBits.qweight": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "uint8",
      "shape": [
        8192,
        24,
        64
      ],
      "size_in_bytes": 12582912,
      "op_tensor_size": 12582912,
      "offset": 1333532672,
      "file_name": ".cache\\MatMulNBits_2_0_377.const",
      "file_size": 12582912
    },
    "model.layers.14.mlp.gate_proj.MatMulNBits.scales.f": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        196608
      ],
      "size_in_bytes": 786432,
      "op_tensor_size": 786432,
      "offset": 1346115584,
      "file_name": ".cache\\MatMulNBits_2_0_378.const",
      "file_size": 786432
    },
    "model.layers.14.mlp.gate_proj.MatMulNBits.qzeros": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "uint8",
      "shape": [
        98304
      ],
      "size_in_bytes": 98304,
      "op_tensor_size": 98304,
      "offset": 1346902016,
      "file_name": ".cache\\MatMulNBits_2_0_379.const",
      "file_size": 98304
    },
    "model.layers.14.mlp.gate_proj.MatMulNBits.bias.f": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        8192
      ],
      "size_in_bytes": 32768,
      "op_tensor_size": 32768,
      "offset": 1347000320,
      "file_name": ".cache\\MatMulNBits_2_0_380.const",
      "file_size": 32768
    },
    "model.layers.14.mlp.up_proj.MatMulNBits.qweight": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "uint8",
      "shape": [
        8192,
        24,
        64
      ],
      "size_in_bytes": 12582912,
      "op_tensor_size": 12582912,
      "offset": 1347033088,
      "file_name": ".cache\\MatMulNBits_2_0_381.const",
      "file_size": 12582912
    },
    "model.layers.14.mlp.up_proj.MatMulNBits.scales.f": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        196608
      ],
      "size_in_bytes": 786432,
      "op_tensor_size": 786432,
      "offset": 1359616000,
      "file_name": ".cache\\MatMulNBits_2_0_382.const",
      "file_size": 786432
    },
    "model.layers.14.mlp.up_proj.MatMulNBits.qzeros": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "uint8",
      "shape": [
        98304
      ],
      "size_in_bytes": 98304,
      "op_tensor_size": 98304,
      "offset": 1360402432,
      "file_name": ".cache\\MatMulNBits_2_0_383.const",
      "file_size": 98304
    },
    "model.layers.14.mlp.up_proj.MatMulNBits.bias.f": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        8192
      ],
      "size_in_bytes": 32768,
      "op_tensor_size": 32768,
      "offset": 1360500736,
      "file_name": ".cache\\MatMulNBits_2_0_384.const",
      "file_size": 32768
    },
    "model.layers.14.mlp.down_proj.MatMulNBits.qweight.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        8192,
        3072
      ],
      "size_in_bytes": 25165824,
      "op_tensor_size": 25165824,
      "offset": 1360533504,
      "file_name": ".cache\\MatMulNBits_2_0_385.const",
      "file_size": 25165824
    },
    "model.layers.14.mlp.down_proj.MatMulNBits.bias.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        3072
      ],
      "size_in_bytes": 12288,
      "op_tensor_size": 12288,
      "offset": 1385699328,
      "file_name": ".cache\\MatMulNBits_2_0_386.const",
      "file_size": 12288
    },
    "model.layers.14.mlp.down_proj.MatMulNBits.scales.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        196608
      ],
      "size_in_bytes": 786432,
      "op_tensor_size": 786432,
      "offset": 1385711616,
      "file_name": ".cache\\MatMulNBits_2_0_387.const",
      "file_size": 786432
    },
    "model.layers.14.mlp.down_proj.MatMulNBits.qzeros.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        196608
      ],
      "size_in_bytes": 196608,
      "op_tensor_size": 196608,
      "offset": 1386498048,
      "file_name": ".cache\\MatMulNBits_2_0_388.const",
      "file_size": 196608
    },
    "model.layers.15.input_layernorm.weight.bf": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "bfloat16",
      "shape": [
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 1386694656,
      "file_name": ".cache\\MatMulNBits_2_0_389.const",
      "file_size": 6144
    },
    "model.layers.15.attn.qk_proj.MatMulNBits.qweight.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        3072,
        6144
      ],
      "size_in_bytes": 18874368,
      "op_tensor_size": 18874368,
      "offset": 1386700800,
      "file_name": ".cache\\MatMulNBits_2_0_390.const",
      "file_size": 18874368
    },
    "model.layers.15.attn.qk_proj.MatMulNBits.bias.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        6144
      ],
      "size_in_bytes": 24576,
      "op_tensor_size": 24576,
      "offset": 1405575168,
      "file_name": ".cache\\MatMulNBits_2_0_391.const",
      "file_size": 24576
    },
    "model.layers.15.attn.qk_proj.MatMulNBits.scales.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        147456
      ],
      "size_in_bytes": 589824,
      "op_tensor_size": 589824,
      "offset": 1405599744,
      "file_name": ".cache\\MatMulNBits_2_0_392.const",
      "file_size": 589824
    },
    "model.layers.15.attn.qk_proj.MatMulNBits.qzeros.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        147456
      ],
      "size_in_bytes": 147456,
      "op_tensor_size": 147456,
      "offset": 1406189568,
      "file_name": ".cache\\MatMulNBits_2_0_393.const",
      "file_size": 147456
    },
    "model.layers.15.attn.v_proj.MatMulNBits.qweight.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        3072,
        3072
      ],
      "size_in_bytes": 9437184,
      "op_tensor_size": 9437184,
      "offset": 1406337024,
      "file_name": ".cache\\MatMulNBits_2_0_394.const",
      "file_size": 9437184
    },
    "model.layers.15.attn.v_proj.MatMulNBits.bias.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        3072
      ],
      "size_in_bytes": 12288,
      "op_tensor_size": 12288,
      "offset": 1415774208,
      "file_name": ".cache\\MatMulNBits_2_0_395.const",
      "file_size": 12288
    },
    "model.layers.15.attn.v_proj.MatMulNBits.scales.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        73728
      ],
      "size_in_bytes": 294912,
      "op_tensor_size": 294912,
      "offset": 1415786496,
      "file_name": ".cache\\MatMulNBits_2_0_396.const",
      "file_size": 294912
    },
    "model.layers.15.attn.v_proj.MatMulNBits.qzeros.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        73728
      ],
      "size_in_bytes": 73728,
      "op_tensor_size": 73728,
      "offset": 1416081408,
      "file_name": ".cache\\MatMulNBits_2_0_397.const",
      "file_size": 73728
    },
    "model.layers.15.attn.o_proj.MatMulNBits.qweight.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        3072,
        3072
      ],
      "size_in_bytes": 9437184,
      "op_tensor_size": 9437184,
      "offset": 1416155136,
      "file_name": ".cache\\MatMulNBits_2_0_398.const",
      "file_size": 9437184
    },
    "model.layers.15.attn.o_proj.MatMulNBits.bias.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        3072
      ],
      "size_in_bytes": 12288,
      "op_tensor_size": 12288,
      "offset": 1425592320,
      "file_name": ".cache\\MatMulNBits_2_0_399.const",
      "file_size": 12288
    },
    "model.layers.15.attn.o_proj.MatMulNBits.scales.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        73728
      ],
      "size_in_bytes": 294912,
      "op_tensor_size": 294912,
      "offset": 1425604608,
      "file_name": ".cache\\MatMulNBits_2_0_400.const",
      "file_size": 294912
    },
    "model.layers.15.attn.o_proj.MatMulNBits.qzeros.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        73728
      ],
      "size_in_bytes": 73728,
      "op_tensor_size": 73728,
      "offset": 1425899520,
      "file_name": ".cache\\MatMulNBits_2_0_401.const",
      "file_size": 73728
    },
    "model.layers.15.post_attention_layernorm.weight.bf": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "bfloat16",
      "shape": [
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 1425973248,
      "file_name": ".cache\\MatMulNBits_2_0_402.const",
      "file_size": 6144
    },
    "model.layers.15.mlp.gate_proj.MatMulNBits.qweight": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "uint8",
      "shape": [
        8192,
        24,
        64
      ],
      "size_in_bytes": 12582912,
      "op_tensor_size": 12582912,
      "offset": 1425979392,
      "file_name": ".cache\\MatMulNBits_2_0_403.const",
      "file_size": 12582912
    },
    "model.layers.15.mlp.gate_proj.MatMulNBits.scales.f": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        196608
      ],
      "size_in_bytes": 786432,
      "op_tensor_size": 786432,
      "offset": 1438562304,
      "file_name": ".cache\\MatMulNBits_2_0_404.const",
      "file_size": 786432
    },
    "model.layers.15.mlp.gate_proj.MatMulNBits.qzeros": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "uint8",
      "shape": [
        98304
      ],
      "size_in_bytes": 98304,
      "op_tensor_size": 98304,
      "offset": 1439348736,
      "file_name": ".cache\\MatMulNBits_2_0_405.const",
      "file_size": 98304
    },
    "model.layers.15.mlp.gate_proj.MatMulNBits.bias.f": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        8192
      ],
      "size_in_bytes": 32768,
      "op_tensor_size": 32768,
      "offset": 1439447040,
      "file_name": ".cache\\MatMulNBits_2_0_406.const",
      "file_size": 32768
    },
    "model.layers.15.mlp.up_proj.MatMulNBits.qweight": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "uint8",
      "shape": [
        8192,
        24,
        64
      ],
      "size_in_bytes": 12582912,
      "op_tensor_size": 12582912,
      "offset": 1439479808,
      "file_name": ".cache\\MatMulNBits_2_0_407.const",
      "file_size": 12582912
    },
    "model.layers.15.mlp.up_proj.MatMulNBits.scales.f": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        196608
      ],
      "size_in_bytes": 786432,
      "op_tensor_size": 786432,
      "offset": 1452062720,
      "file_name": ".cache\\MatMulNBits_2_0_408.const",
      "file_size": 786432
    },
    "model.layers.15.mlp.up_proj.MatMulNBits.qzeros": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "uint8",
      "shape": [
        98304
      ],
      "size_in_bytes": 98304,
      "op_tensor_size": 98304,
      "offset": 1452849152,
      "file_name": ".cache\\MatMulNBits_2_0_409.const",
      "file_size": 98304
    },
    "model.layers.15.mlp.up_proj.MatMulNBits.bias.f": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        8192
      ],
      "size_in_bytes": 32768,
      "op_tensor_size": 32768,
      "offset": 1452947456,
      "file_name": ".cache\\MatMulNBits_2_0_410.const",
      "file_size": 32768
    },
    "model.layers.15.mlp.down_proj.MatMulNBits.qweight.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        8192,
        3072
      ],
      "size_in_bytes": 25165824,
      "op_tensor_size": 25165824,
      "offset": 1452980224,
      "file_name": ".cache\\MatMulNBits_2_0_411.const",
      "file_size": 25165824
    },
    "model.layers.15.mlp.down_proj.MatMulNBits.bias.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        3072
      ],
      "size_in_bytes": 12288,
      "op_tensor_size": 12288,
      "offset": 1478146048,
      "file_name": ".cache\\MatMulNBits_2_0_412.const",
      "file_size": 12288
    },
    "model.layers.15.mlp.down_proj.MatMulNBits.scales.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        196608
      ],
      "size_in_bytes": 786432,
      "op_tensor_size": 786432,
      "offset": 1478158336,
      "file_name": ".cache\\MatMulNBits_2_0_413.const",
      "file_size": 786432
    },
    "model.layers.15.mlp.down_proj.MatMulNBits.qzeros.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        196608
      ],
      "size_in_bytes": 196608,
      "op_tensor_size": 196608,
      "offset": 1478944768,
      "file_name": ".cache\\MatMulNBits_2_0_414.const",
      "file_size": 196608
    },
    "model.layers.16.input_layernorm.weight.bf": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "bfloat16",
      "shape": [
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 1479141376,
      "file_name": ".cache\\MatMulNBits_2_0_415.const",
      "file_size": 6144
    },
    "model.layers.16.attn.qk_proj.MatMulNBits.qweight.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        3072,
        6144
      ],
      "size_in_bytes": 18874368,
      "op_tensor_size": 18874368,
      "offset": 1479147520,
      "file_name": ".cache\\MatMulNBits_2_0_416.const",
      "file_size": 18874368
    },
    "model.layers.16.attn.qk_proj.MatMulNBits.bias.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        6144
      ],
      "size_in_bytes": 24576,
      "op_tensor_size": 24576,
      "offset": 1498021888,
      "file_name": ".cache\\MatMulNBits_2_0_417.const",
      "file_size": 24576
    },
    "model.layers.16.attn.qk_proj.MatMulNBits.scales.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        147456
      ],
      "size_in_bytes": 589824,
      "op_tensor_size": 589824,
      "offset": 1498046464,
      "file_name": ".cache\\MatMulNBits_2_0_418.const",
      "file_size": 589824
    },
    "model.layers.16.attn.qk_proj.MatMulNBits.qzeros.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        147456
      ],
      "size_in_bytes": 147456,
      "op_tensor_size": 147456,
      "offset": 1498636288,
      "file_name": ".cache\\MatMulNBits_2_0_419.const",
      "file_size": 147456
    },
    "model.layers.16.attn.v_proj.MatMulNBits.qweight.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        3072,
        3072
      ],
      "size_in_bytes": 9437184,
      "op_tensor_size": 9437184,
      "offset": 1498783744,
      "file_name": ".cache\\MatMulNBits_2_0_420.const",
      "file_size": 9437184
    },
    "model.layers.16.attn.v_proj.MatMulNBits.bias.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        3072
      ],
      "size_in_bytes": 12288,
      "op_tensor_size": 12288,
      "offset": 1508220928,
      "file_name": ".cache\\MatMulNBits_2_0_421.const",
      "file_size": 12288
    },
    "model.layers.16.attn.v_proj.MatMulNBits.scales.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        73728
      ],
      "size_in_bytes": 294912,
      "op_tensor_size": 294912,
      "offset": 1508233216,
      "file_name": ".cache\\MatMulNBits_2_0_422.const",
      "file_size": 294912
    },
    "model.layers.16.attn.v_proj.MatMulNBits.qzeros.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        73728
      ],
      "size_in_bytes": 73728,
      "op_tensor_size": 73728,
      "offset": 1508528128,
      "file_name": ".cache\\MatMulNBits_2_0_423.const",
      "file_size": 73728
    },
    "model.layers.16.attn.o_proj.MatMulNBits.qweight.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        3072,
        3072
      ],
      "size_in_bytes": 9437184,
      "op_tensor_size": 9437184,
      "offset": 1508601856,
      "file_name": ".cache\\MatMulNBits_2_0_424.const",
      "file_size": 9437184
    },
    "model.layers.16.attn.o_proj.MatMulNBits.bias.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        3072
      ],
      "size_in_bytes": 12288,
      "op_tensor_size": 12288,
      "offset": 1518039040,
      "file_name": ".cache\\MatMulNBits_2_0_425.const",
      "file_size": 12288
    },
    "model.layers.16.attn.o_proj.MatMulNBits.scales.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        73728
      ],
      "size_in_bytes": 294912,
      "op_tensor_size": 294912,
      "offset": 1518051328,
      "file_name": ".cache\\MatMulNBits_2_0_426.const",
      "file_size": 294912
    },
    "model.layers.16.attn.o_proj.MatMulNBits.qzeros.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        73728
      ],
      "size_in_bytes": 73728,
      "op_tensor_size": 73728,
      "offset": 1518346240,
      "file_name": ".cache\\MatMulNBits_2_0_427.const",
      "file_size": 73728
    },
    "model.layers.16.post_attention_layernorm.weight.bf": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "bfloat16",
      "shape": [
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 1518419968,
      "file_name": ".cache\\MatMulNBits_2_0_428.const",
      "file_size": 6144
    },
    "model.layers.16.mlp.gate_proj.MatMulNBits.qweight": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "uint8",
      "shape": [
        8192,
        24,
        64
      ],
      "size_in_bytes": 12582912,
      "op_tensor_size": 12582912,
      "offset": 1518426112,
      "file_name": ".cache\\MatMulNBits_2_0_429.const",
      "file_size": 12582912
    },
    "model.layers.16.mlp.gate_proj.MatMulNBits.scales.f": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        196608
      ],
      "size_in_bytes": 786432,
      "op_tensor_size": 786432,
      "offset": 1531009024,
      "file_name": ".cache\\MatMulNBits_2_0_430.const",
      "file_size": 786432
    },
    "model.layers.16.mlp.gate_proj.MatMulNBits.qzeros": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "uint8",
      "shape": [
        98304
      ],
      "size_in_bytes": 98304,
      "op_tensor_size": 98304,
      "offset": 1531795456,
      "file_name": ".cache\\MatMulNBits_2_0_431.const",
      "file_size": 98304
    },
    "model.layers.16.mlp.gate_proj.MatMulNBits.bias.f": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        8192
      ],
      "size_in_bytes": 32768,
      "op_tensor_size": 32768,
      "offset": 1531893760,
      "file_name": ".cache\\MatMulNBits_2_0_432.const",
      "file_size": 32768
    },
    "model.layers.16.mlp.up_proj.MatMulNBits.qweight": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "uint8",
      "shape": [
        8192,
        24,
        64
      ],
      "size_in_bytes": 12582912,
      "op_tensor_size": 12582912,
      "offset": 1531926528,
      "file_name": ".cache\\MatMulNBits_2_0_433.const",
      "file_size": 12582912
    },
    "model.layers.16.mlp.up_proj.MatMulNBits.scales.f": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        196608
      ],
      "size_in_bytes": 786432,
      "op_tensor_size": 786432,
      "offset": 1544509440,
      "file_name": ".cache\\MatMulNBits_2_0_434.const",
      "file_size": 786432
    },
    "model.layers.16.mlp.up_proj.MatMulNBits.qzeros": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "uint8",
      "shape": [
        98304
      ],
      "size_in_bytes": 98304,
      "op_tensor_size": 98304,
      "offset": 1545295872,
      "file_name": ".cache\\MatMulNBits_2_0_435.const",
      "file_size": 98304
    },
    "model.layers.16.mlp.up_proj.MatMulNBits.bias.f": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        8192
      ],
      "size_in_bytes": 32768,
      "op_tensor_size": 32768,
      "offset": 1545394176,
      "file_name": ".cache\\MatMulNBits_2_0_436.const",
      "file_size": 32768
    },
    "model.layers.16.mlp.down_proj.MatMulNBits.qweight.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        8192,
        3072
      ],
      "size_in_bytes": 25165824,
      "op_tensor_size": 25165824,
      "offset": 1545426944,
      "file_name": ".cache\\MatMulNBits_2_0_437.const",
      "file_size": 25165824
    },
    "model.layers.16.mlp.down_proj.MatMulNBits.bias.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        3072
      ],
      "size_in_bytes": 12288,
      "op_tensor_size": 12288,
      "offset": 1570592768,
      "file_name": ".cache\\MatMulNBits_2_0_438.const",
      "file_size": 12288
    },
    "model.layers.16.mlp.down_proj.MatMulNBits.scales.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        196608
      ],
      "size_in_bytes": 786432,
      "op_tensor_size": 786432,
      "offset": 1570605056,
      "file_name": ".cache\\MatMulNBits_2_0_439.const",
      "file_size": 786432
    },
    "model.layers.16.mlp.down_proj.MatMulNBits.qzeros.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        196608
      ],
      "size_in_bytes": 196608,
      "op_tensor_size": 196608,
      "offset": 1571391488,
      "file_name": ".cache\\MatMulNBits_2_0_440.const",
      "file_size": 196608
    },
    "model.layers.17.input_layernorm.weight.bf": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "bfloat16",
      "shape": [
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 1571588096,
      "file_name": ".cache\\MatMulNBits_2_0_441.const",
      "file_size": 6144
    },
    "model.layers.17.attn.qk_proj.MatMulNBits.qweight.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        3072,
        6144
      ],
      "size_in_bytes": 18874368,
      "op_tensor_size": 18874368,
      "offset": 1571594240,
      "file_name": ".cache\\MatMulNBits_2_0_442.const",
      "file_size": 18874368
    },
    "model.layers.17.attn.qk_proj.MatMulNBits.bias.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        6144
      ],
      "size_in_bytes": 24576,
      "op_tensor_size": 24576,
      "offset": 1590468608,
      "file_name": ".cache\\MatMulNBits_2_0_443.const",
      "file_size": 24576
    },
    "model.layers.17.attn.qk_proj.MatMulNBits.scales.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        147456
      ],
      "size_in_bytes": 589824,
      "op_tensor_size": 589824,
      "offset": 1590493184,
      "file_name": ".cache\\MatMulNBits_2_0_444.const",
      "file_size": 589824
    },
    "model.layers.17.attn.qk_proj.MatMulNBits.qzeros.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        147456
      ],
      "size_in_bytes": 147456,
      "op_tensor_size": 147456,
      "offset": 1591083008,
      "file_name": ".cache\\MatMulNBits_2_0_445.const",
      "file_size": 147456
    },
    "model.layers.17.attn.v_proj.MatMulNBits.qweight.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        3072,
        3072
      ],
      "size_in_bytes": 9437184,
      "op_tensor_size": 9437184,
      "offset": 1591230464,
      "file_name": ".cache\\MatMulNBits_2_0_446.const",
      "file_size": 9437184
    },
    "model.layers.17.attn.v_proj.MatMulNBits.bias.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        3072
      ],
      "size_in_bytes": 12288,
      "op_tensor_size": 12288,
      "offset": 1600667648,
      "file_name": ".cache\\MatMulNBits_2_0_447.const",
      "file_size": 12288
    },
    "model.layers.17.attn.v_proj.MatMulNBits.scales.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        73728
      ],
      "size_in_bytes": 294912,
      "op_tensor_size": 294912,
      "offset": 1600679936,
      "file_name": ".cache\\MatMulNBits_2_0_448.const",
      "file_size": 294912
    },
    "model.layers.17.attn.v_proj.MatMulNBits.qzeros.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        73728
      ],
      "size_in_bytes": 73728,
      "op_tensor_size": 73728,
      "offset": 1600974848,
      "file_name": ".cache\\MatMulNBits_2_0_449.const",
      "file_size": 73728
    },
    "model.layers.17.attn.o_proj.MatMulNBits.qweight.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        3072,
        3072
      ],
      "size_in_bytes": 9437184,
      "op_tensor_size": 9437184,
      "offset": 1601048576,
      "file_name": ".cache\\MatMulNBits_2_0_450.const",
      "file_size": 9437184
    },
    "model.layers.17.attn.o_proj.MatMulNBits.bias.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        3072
      ],
      "size_in_bytes": 12288,
      "op_tensor_size": 12288,
      "offset": 1610485760,
      "file_name": ".cache\\MatMulNBits_2_0_451.const",
      "file_size": 12288
    },
    "model.layers.17.attn.o_proj.MatMulNBits.scales.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        73728
      ],
      "size_in_bytes": 294912,
      "op_tensor_size": 294912,
      "offset": 1610498048,
      "file_name": ".cache\\MatMulNBits_2_0_452.const",
      "file_size": 294912
    },
    "model.layers.17.attn.o_proj.MatMulNBits.qzeros.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        73728
      ],
      "size_in_bytes": 73728,
      "op_tensor_size": 73728,
      "offset": 1610792960,
      "file_name": ".cache\\MatMulNBits_2_0_453.const",
      "file_size": 73728
    },
    "model.layers.17.post_attention_layernorm.weight.bf": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "bfloat16",
      "shape": [
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 1610866688,
      "file_name": ".cache\\MatMulNBits_2_0_454.const",
      "file_size": 6144
    },
    "model.layers.17.mlp.gate_proj.MatMulNBits.qweight": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "uint8",
      "shape": [
        8192,
        24,
        64
      ],
      "size_in_bytes": 12582912,
      "op_tensor_size": 12582912,
      "offset": 1610872832,
      "file_name": ".cache\\MatMulNBits_2_0_455.const",
      "file_size": 12582912
    },
    "model.layers.17.mlp.gate_proj.MatMulNBits.scales.f": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        196608
      ],
      "size_in_bytes": 786432,
      "op_tensor_size": 786432,
      "offset": 1623455744,
      "file_name": ".cache\\MatMulNBits_2_0_456.const",
      "file_size": 786432
    },
    "model.layers.17.mlp.gate_proj.MatMulNBits.qzeros": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "uint8",
      "shape": [
        98304
      ],
      "size_in_bytes": 98304,
      "op_tensor_size": 98304,
      "offset": 1624242176,
      "file_name": ".cache\\MatMulNBits_2_0_457.const",
      "file_size": 98304
    },
    "model.layers.17.mlp.gate_proj.MatMulNBits.bias.f": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        8192
      ],
      "size_in_bytes": 32768,
      "op_tensor_size": 32768,
      "offset": 1624340480,
      "file_name": ".cache\\MatMulNBits_2_0_458.const",
      "file_size": 32768
    },
    "model.layers.17.mlp.up_proj.MatMulNBits.qweight": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "uint8",
      "shape": [
        8192,
        24,
        64
      ],
      "size_in_bytes": 12582912,
      "op_tensor_size": 12582912,
      "offset": 1624373248,
      "file_name": ".cache\\MatMulNBits_2_0_459.const",
      "file_size": 12582912
    },
    "model.layers.17.mlp.up_proj.MatMulNBits.scales.f": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        196608
      ],
      "size_in_bytes": 786432,
      "op_tensor_size": 786432,
      "offset": 1636956160,
      "file_name": ".cache\\MatMulNBits_2_0_460.const",
      "file_size": 786432
    },
    "model.layers.17.mlp.up_proj.MatMulNBits.qzeros": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "uint8",
      "shape": [
        98304
      ],
      "size_in_bytes": 98304,
      "op_tensor_size": 98304,
      "offset": 1637742592,
      "file_name": ".cache\\MatMulNBits_2_0_461.const",
      "file_size": 98304
    },
    "model.layers.17.mlp.up_proj.MatMulNBits.bias.f": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        8192
      ],
      "size_in_bytes": 32768,
      "op_tensor_size": 32768,
      "offset": 1637840896,
      "file_name": ".cache\\MatMulNBits_2_0_462.const",
      "file_size": 32768
    },
    "model.layers.17.mlp.down_proj.MatMulNBits.qweight.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        8192,
        3072
      ],
      "size_in_bytes": 25165824,
      "op_tensor_size": 25165824,
      "offset": 1637873664,
      "file_name": ".cache\\MatMulNBits_2_0_463.const",
      "file_size": 25165824
    },
    "model.layers.17.mlp.down_proj.MatMulNBits.bias.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        3072
      ],
      "size_in_bytes": 12288,
      "op_tensor_size": 12288,
      "offset": 1663039488,
      "file_name": ".cache\\MatMulNBits_2_0_464.const",
      "file_size": 12288
    },
    "model.layers.17.mlp.down_proj.MatMulNBits.scales.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        196608
      ],
      "size_in_bytes": 786432,
      "op_tensor_size": 786432,
      "offset": 1663051776,
      "file_name": ".cache\\MatMulNBits_2_0_465.const",
      "file_size": 786432
    },
    "model.layers.17.mlp.down_proj.MatMulNBits.qzeros.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        196608
      ],
      "size_in_bytes": 196608,
      "op_tensor_size": 196608,
      "offset": 1663838208,
      "file_name": ".cache\\MatMulNBits_2_0_466.const",
      "file_size": 196608
    },
    "model.layers.18.input_layernorm.weight.bf": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "bfloat16",
      "shape": [
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 1664034816,
      "file_name": ".cache\\MatMulNBits_2_0_467.const",
      "file_size": 6144
    },
    "model.layers.18.attn.qk_proj.MatMulNBits.qweight.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        3072,
        6144
      ],
      "size_in_bytes": 18874368,
      "op_tensor_size": 18874368,
      "offset": 1664040960,
      "file_name": ".cache\\MatMulNBits_2_0_468.const",
      "file_size": 18874368
    },
    "model.layers.18.attn.qk_proj.MatMulNBits.bias.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        6144
      ],
      "size_in_bytes": 24576,
      "op_tensor_size": 24576,
      "offset": 1682915328,
      "file_name": ".cache\\MatMulNBits_2_0_469.const",
      "file_size": 24576
    },
    "model.layers.18.attn.qk_proj.MatMulNBits.scales.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        147456
      ],
      "size_in_bytes": 589824,
      "op_tensor_size": 589824,
      "offset": 1682939904,
      "file_name": ".cache\\MatMulNBits_2_0_470.const",
      "file_size": 589824
    },
    "model.layers.18.attn.qk_proj.MatMulNBits.qzeros.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        147456
      ],
      "size_in_bytes": 147456,
      "op_tensor_size": 147456,
      "offset": 1683529728,
      "file_name": ".cache\\MatMulNBits_2_0_471.const",
      "file_size": 147456
    },
    "model.layers.18.attn.v_proj.MatMulNBits.qweight.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        3072,
        3072
      ],
      "size_in_bytes": 9437184,
      "op_tensor_size": 9437184,
      "offset": 1683677184,
      "file_name": ".cache\\MatMulNBits_2_0_472.const",
      "file_size": 9437184
    },
    "model.layers.18.attn.v_proj.MatMulNBits.bias.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        3072
      ],
      "size_in_bytes": 12288,
      "op_tensor_size": 12288,
      "offset": 1693114368,
      "file_name": ".cache\\MatMulNBits_2_0_473.const",
      "file_size": 12288
    },
    "model.layers.18.attn.v_proj.MatMulNBits.scales.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        73728
      ],
      "size_in_bytes": 294912,
      "op_tensor_size": 294912,
      "offset": 1693126656,
      "file_name": ".cache\\MatMulNBits_2_0_474.const",
      "file_size": 294912
    },
    "model.layers.18.attn.v_proj.MatMulNBits.qzeros.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        73728
      ],
      "size_in_bytes": 73728,
      "op_tensor_size": 73728,
      "offset": 1693421568,
      "file_name": ".cache\\MatMulNBits_2_0_475.const",
      "file_size": 73728
    },
    "model.layers.18.attn.o_proj.MatMulNBits.qweight.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        3072,
        3072
      ],
      "size_in_bytes": 9437184,
      "op_tensor_size": 9437184,
      "offset": 1693495296,
      "file_name": ".cache\\MatMulNBits_2_0_476.const",
      "file_size": 9437184
    },
    "model.layers.18.attn.o_proj.MatMulNBits.bias.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        3072
      ],
      "size_in_bytes": 12288,
      "op_tensor_size": 12288,
      "offset": 1702932480,
      "file_name": ".cache\\MatMulNBits_2_0_477.const",
      "file_size": 12288
    },
    "model.layers.18.attn.o_proj.MatMulNBits.scales.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        73728
      ],
      "size_in_bytes": 294912,
      "op_tensor_size": 294912,
      "offset": 1702944768,
      "file_name": ".cache\\MatMulNBits_2_0_478.const",
      "file_size": 294912
    },
    "model.layers.18.attn.o_proj.MatMulNBits.qzeros.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        73728
      ],
      "size_in_bytes": 73728,
      "op_tensor_size": 73728,
      "offset": 1703239680,
      "file_name": ".cache\\MatMulNBits_2_0_479.const",
      "file_size": 73728
    },
    "model.layers.18.post_attention_layernorm.weight.bf": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "bfloat16",
      "shape": [
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 1703313408,
      "file_name": ".cache\\MatMulNBits_2_0_480.const",
      "file_size": 6144
    },
    "model.layers.18.mlp.gate_proj.MatMulNBits.qweight": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "uint8",
      "shape": [
        8192,
        24,
        64
      ],
      "size_in_bytes": 12582912,
      "op_tensor_size": 12582912,
      "offset": 1703319552,
      "file_name": ".cache\\MatMulNBits_2_0_481.const",
      "file_size": 12582912
    },
    "model.layers.18.mlp.gate_proj.MatMulNBits.scales.f": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        196608
      ],
      "size_in_bytes": 786432,
      "op_tensor_size": 786432,
      "offset": 1715902464,
      "file_name": ".cache\\MatMulNBits_2_0_482.const",
      "file_size": 786432
    },
    "model.layers.18.mlp.gate_proj.MatMulNBits.qzeros": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "uint8",
      "shape": [
        98304
      ],
      "size_in_bytes": 98304,
      "op_tensor_size": 98304,
      "offset": 1716688896,
      "file_name": ".cache\\MatMulNBits_2_0_483.const",
      "file_size": 98304
    },
    "model.layers.18.mlp.gate_proj.MatMulNBits.bias.f": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        8192
      ],
      "size_in_bytes": 32768,
      "op_tensor_size": 32768,
      "offset": 1716787200,
      "file_name": ".cache\\MatMulNBits_2_0_484.const",
      "file_size": 32768
    },
    "model.layers.18.mlp.up_proj.MatMulNBits.qweight": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "uint8",
      "shape": [
        8192,
        24,
        64
      ],
      "size_in_bytes": 12582912,
      "op_tensor_size": 12582912,
      "offset": 1716819968,
      "file_name": ".cache\\MatMulNBits_2_0_485.const",
      "file_size": 12582912
    },
    "model.layers.18.mlp.up_proj.MatMulNBits.scales.f": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        196608
      ],
      "size_in_bytes": 786432,
      "op_tensor_size": 786432,
      "offset": 1729402880,
      "file_name": ".cache\\MatMulNBits_2_0_486.const",
      "file_size": 786432
    },
    "model.layers.18.mlp.up_proj.MatMulNBits.qzeros": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "uint8",
      "shape": [
        98304
      ],
      "size_in_bytes": 98304,
      "op_tensor_size": 98304,
      "offset": 1730189312,
      "file_name": ".cache\\MatMulNBits_2_0_487.const",
      "file_size": 98304
    },
    "model.layers.18.mlp.up_proj.MatMulNBits.bias.f": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        8192
      ],
      "size_in_bytes": 32768,
      "op_tensor_size": 32768,
      "offset": 1730287616,
      "file_name": ".cache\\MatMulNBits_2_0_488.const",
      "file_size": 32768
    },
    "model.layers.18.mlp.down_proj.MatMulNBits.qweight.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        8192,
        3072
      ],
      "size_in_bytes": 25165824,
      "op_tensor_size": 25165824,
      "offset": 1730320384,
      "file_name": ".cache\\MatMulNBits_2_0_489.const",
      "file_size": 25165824
    },
    "model.layers.18.mlp.down_proj.MatMulNBits.bias.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        3072
      ],
      "size_in_bytes": 12288,
      "op_tensor_size": 12288,
      "offset": 1755486208,
      "file_name": ".cache\\MatMulNBits_2_0_490.const",
      "file_size": 12288
    },
    "model.layers.18.mlp.down_proj.MatMulNBits.scales.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        196608
      ],
      "size_in_bytes": 786432,
      "op_tensor_size": 786432,
      "offset": 1755498496,
      "file_name": ".cache\\MatMulNBits_2_0_491.const",
      "file_size": 786432
    },
    "model.layers.18.mlp.down_proj.MatMulNBits.qzeros.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        196608
      ],
      "size_in_bytes": 196608,
      "op_tensor_size": 196608,
      "offset": 1756284928,
      "file_name": ".cache\\MatMulNBits_2_0_492.const",
      "file_size": 196608
    },
    "model.layers.19.input_layernorm.weight.bf": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "bfloat16",
      "shape": [
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 1756481536,
      "file_name": ".cache\\MatMulNBits_2_0_493.const",
      "file_size": 6144
    },
    "model.layers.19.attn.qk_proj.MatMulNBits.qweight.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        3072,
        6144
      ],
      "size_in_bytes": 18874368,
      "op_tensor_size": 18874368,
      "offset": 1756487680,
      "file_name": ".cache\\MatMulNBits_2_0_494.const",
      "file_size": 18874368
    },
    "model.layers.19.attn.qk_proj.MatMulNBits.bias.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        6144
      ],
      "size_in_bytes": 24576,
      "op_tensor_size": 24576,
      "offset": 1775362048,
      "file_name": ".cache\\MatMulNBits_2_0_495.const",
      "file_size": 24576
    },
    "model.layers.19.attn.qk_proj.MatMulNBits.scales.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        147456
      ],
      "size_in_bytes": 589824,
      "op_tensor_size": 589824,
      "offset": 1775386624,
      "file_name": ".cache\\MatMulNBits_2_0_496.const",
      "file_size": 589824
    },
    "model.layers.19.attn.qk_proj.MatMulNBits.qzeros.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        147456
      ],
      "size_in_bytes": 147456,
      "op_tensor_size": 147456,
      "offset": 1775976448,
      "file_name": ".cache\\MatMulNBits_2_0_497.const",
      "file_size": 147456
    },
    "model.layers.19.attn.v_proj.MatMulNBits.qweight.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        3072,
        3072
      ],
      "size_in_bytes": 9437184,
      "op_tensor_size": 9437184,
      "offset": 1776123904,
      "file_name": ".cache\\MatMulNBits_2_0_498.const",
      "file_size": 9437184
    },
    "model.layers.19.attn.v_proj.MatMulNBits.bias.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        3072
      ],
      "size_in_bytes": 12288,
      "op_tensor_size": 12288,
      "offset": 1785561088,
      "file_name": ".cache\\MatMulNBits_2_0_499.const",
      "file_size": 12288
    },
    "model.layers.19.attn.v_proj.MatMulNBits.scales.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        73728
      ],
      "size_in_bytes": 294912,
      "op_tensor_size": 294912,
      "offset": 1785573376,
      "file_name": ".cache\\MatMulNBits_2_0_500.const",
      "file_size": 294912
    },
    "model.layers.19.attn.v_proj.MatMulNBits.qzeros.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        73728
      ],
      "size_in_bytes": 73728,
      "op_tensor_size": 73728,
      "offset": 1785868288,
      "file_name": ".cache\\MatMulNBits_2_0_501.const",
      "file_size": 73728
    },
    "model.layers.19.attn.o_proj.MatMulNBits.qweight.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        3072,
        3072
      ],
      "size_in_bytes": 9437184,
      "op_tensor_size": 9437184,
      "offset": 1785942016,
      "file_name": ".cache\\MatMulNBits_2_0_502.const",
      "file_size": 9437184
    },
    "model.layers.19.attn.o_proj.MatMulNBits.bias.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        3072
      ],
      "size_in_bytes": 12288,
      "op_tensor_size": 12288,
      "offset": 1795379200,
      "file_name": ".cache\\MatMulNBits_2_0_503.const",
      "file_size": 12288
    },
    "model.layers.19.attn.o_proj.MatMulNBits.scales.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        73728
      ],
      "size_in_bytes": 294912,
      "op_tensor_size": 294912,
      "offset": 1795391488,
      "file_name": ".cache\\MatMulNBits_2_0_504.const",
      "file_size": 294912
    },
    "model.layers.19.attn.o_proj.MatMulNBits.qzeros.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        73728
      ],
      "size_in_bytes": 73728,
      "op_tensor_size": 73728,
      "offset": 1795686400,
      "file_name": ".cache\\MatMulNBits_2_0_505.const",
      "file_size": 73728
    },
    "model.layers.19.post_attention_layernorm.weight.bf": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "bfloat16",
      "shape": [
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 1795760128,
      "file_name": ".cache\\MatMulNBits_2_0_506.const",
      "file_size": 6144
    },
    "model.layers.19.mlp.gate_proj.MatMulNBits.qweight": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "uint8",
      "shape": [
        8192,
        24,
        64
      ],
      "size_in_bytes": 12582912,
      "op_tensor_size": 12582912,
      "offset": 1795766272,
      "file_name": ".cache\\MatMulNBits_2_0_507.const",
      "file_size": 12582912
    },
    "model.layers.19.mlp.gate_proj.MatMulNBits.scales.f": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        196608
      ],
      "size_in_bytes": 786432,
      "op_tensor_size": 786432,
      "offset": 1808349184,
      "file_name": ".cache\\MatMulNBits_2_0_508.const",
      "file_size": 786432
    },
    "model.layers.19.mlp.gate_proj.MatMulNBits.qzeros": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "uint8",
      "shape": [
        98304
      ],
      "size_in_bytes": 98304,
      "op_tensor_size": 98304,
      "offset": 1809135616,
      "file_name": ".cache\\MatMulNBits_2_0_509.const",
      "file_size": 98304
    },
    "model.layers.19.mlp.gate_proj.MatMulNBits.bias.f": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        8192
      ],
      "size_in_bytes": 32768,
      "op_tensor_size": 32768,
      "offset": 1809233920,
      "file_name": ".cache\\MatMulNBits_2_0_510.const",
      "file_size": 32768
    },
    "model.layers.19.mlp.up_proj.MatMulNBits.qweight": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "uint8",
      "shape": [
        8192,
        24,
        64
      ],
      "size_in_bytes": 12582912,
      "op_tensor_size": 12582912,
      "offset": 1809266688,
      "file_name": ".cache\\MatMulNBits_2_0_511.const",
      "file_size": 12582912
    },
    "model.layers.19.mlp.up_proj.MatMulNBits.scales.f": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        196608
      ],
      "size_in_bytes": 786432,
      "op_tensor_size": 786432,
      "offset": 1821849600,
      "file_name": ".cache\\MatMulNBits_2_0_512.const",
      "file_size": 786432
    },
    "model.layers.19.mlp.up_proj.MatMulNBits.qzeros": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "uint8",
      "shape": [
        98304
      ],
      "size_in_bytes": 98304,
      "op_tensor_size": 98304,
      "offset": 1822636032,
      "file_name": ".cache\\MatMulNBits_2_0_513.const",
      "file_size": 98304
    },
    "model.layers.19.mlp.up_proj.MatMulNBits.bias.f": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        8192
      ],
      "size_in_bytes": 32768,
      "op_tensor_size": 32768,
      "offset": 1822734336,
      "file_name": ".cache\\MatMulNBits_2_0_514.const",
      "file_size": 32768
    },
    "model.layers.19.mlp.down_proj.MatMulNBits.qweight.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        8192,
        3072
      ],
      "size_in_bytes": 25165824,
      "op_tensor_size": 25165824,
      "offset": 1822767104,
      "file_name": ".cache\\MatMulNBits_2_0_515.const",
      "file_size": 25165824
    },
    "model.layers.19.mlp.down_proj.MatMulNBits.bias.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        3072
      ],
      "size_in_bytes": 12288,
      "op_tensor_size": 12288,
      "offset": 1847932928,
      "file_name": ".cache\\MatMulNBits_2_0_516.const",
      "file_size": 12288
    },
    "model.layers.19.mlp.down_proj.MatMulNBits.scales.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        196608
      ],
      "size_in_bytes": 786432,
      "op_tensor_size": 786432,
      "offset": 1847945216,
      "file_name": ".cache\\MatMulNBits_2_0_517.const",
      "file_size": 786432
    },
    "model.layers.19.mlp.down_proj.MatMulNBits.qzeros.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        196608
      ],
      "size_in_bytes": 196608,
      "op_tensor_size": 196608,
      "offset": 1848731648,
      "file_name": ".cache\\MatMulNBits_2_0_518.const",
      "file_size": 196608
    },
    "model.layers.20.input_layernorm.weight.bf": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "bfloat16",
      "shape": [
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 1848928256,
      "file_name": ".cache\\MatMulNBits_2_0_519.const",
      "file_size": 6144
    },
    "model.layers.20.attn.qk_proj.MatMulNBits.qweight.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        3072,
        6144
      ],
      "size_in_bytes": 18874368,
      "op_tensor_size": 18874368,
      "offset": 1848934400,
      "file_name": ".cache\\MatMulNBits_2_0_520.const",
      "file_size": 18874368
    },
    "model.layers.20.attn.qk_proj.MatMulNBits.bias.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        6144
      ],
      "size_in_bytes": 24576,
      "op_tensor_size": 24576,
      "offset": 1867808768,
      "file_name": ".cache\\MatMulNBits_2_0_521.const",
      "file_size": 24576
    },
    "model.layers.20.attn.qk_proj.MatMulNBits.scales.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        147456
      ],
      "size_in_bytes": 589824,
      "op_tensor_size": 589824,
      "offset": 1867833344,
      "file_name": ".cache\\MatMulNBits_2_0_522.const",
      "file_size": 589824
    },
    "model.layers.20.attn.qk_proj.MatMulNBits.qzeros.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        147456
      ],
      "size_in_bytes": 147456,
      "op_tensor_size": 147456,
      "offset": 1868423168,
      "file_name": ".cache\\MatMulNBits_2_0_523.const",
      "file_size": 147456
    },
    "model.layers.20.attn.v_proj.MatMulNBits.qweight.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        3072,
        3072
      ],
      "size_in_bytes": 9437184,
      "op_tensor_size": 9437184,
      "offset": 1868570624,
      "file_name": ".cache\\MatMulNBits_2_0_524.const",
      "file_size": 9437184
    },
    "model.layers.20.attn.v_proj.MatMulNBits.bias.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        3072
      ],
      "size_in_bytes": 12288,
      "op_tensor_size": 12288,
      "offset": 1878007808,
      "file_name": ".cache\\MatMulNBits_2_0_525.const",
      "file_size": 12288
    },
    "model.layers.20.attn.v_proj.MatMulNBits.scales.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        73728
      ],
      "size_in_bytes": 294912,
      "op_tensor_size": 294912,
      "offset": 1878020096,
      "file_name": ".cache\\MatMulNBits_2_0_526.const",
      "file_size": 294912
    },
    "model.layers.20.attn.v_proj.MatMulNBits.qzeros.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        73728
      ],
      "size_in_bytes": 73728,
      "op_tensor_size": 73728,
      "offset": 1878315008,
      "file_name": ".cache\\MatMulNBits_2_0_527.const",
      "file_size": 73728
    },
    "model.layers.20.attn.o_proj.MatMulNBits.qweight.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        3072,
        3072
      ],
      "size_in_bytes": 9437184,
      "op_tensor_size": 9437184,
      "offset": 1878388736,
      "file_name": ".cache\\MatMulNBits_2_0_528.const",
      "file_size": 9437184
    },
    "model.layers.20.attn.o_proj.MatMulNBits.bias.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        3072
      ],
      "size_in_bytes": 12288,
      "op_tensor_size": 12288,
      "offset": 1887825920,
      "file_name": ".cache\\MatMulNBits_2_0_529.const",
      "file_size": 12288
    },
    "model.layers.20.attn.o_proj.MatMulNBits.scales.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        73728
      ],
      "size_in_bytes": 294912,
      "op_tensor_size": 294912,
      "offset": 1887838208,
      "file_name": ".cache\\MatMulNBits_2_0_530.const",
      "file_size": 294912
    },
    "model.layers.20.attn.o_proj.MatMulNBits.qzeros.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        73728
      ],
      "size_in_bytes": 73728,
      "op_tensor_size": 73728,
      "offset": 1888133120,
      "file_name": ".cache\\MatMulNBits_2_0_531.const",
      "file_size": 73728
    },
    "model.layers.20.post_attention_layernorm.weight.bf": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "bfloat16",
      "shape": [
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 1888206848,
      "file_name": ".cache\\MatMulNBits_2_0_532.const",
      "file_size": 6144
    },
    "model.layers.20.mlp.gate_proj.MatMulNBits.qweight": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "uint8",
      "shape": [
        8192,
        24,
        64
      ],
      "size_in_bytes": 12582912,
      "op_tensor_size": 12582912,
      "offset": 1888212992,
      "file_name": ".cache\\MatMulNBits_2_0_533.const",
      "file_size": 12582912
    },
    "model.layers.20.mlp.gate_proj.MatMulNBits.scales.f": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        196608
      ],
      "size_in_bytes": 786432,
      "op_tensor_size": 786432,
      "offset": 1900795904,
      "file_name": ".cache\\MatMulNBits_2_0_534.const",
      "file_size": 786432
    },
    "model.layers.20.mlp.gate_proj.MatMulNBits.qzeros": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "uint8",
      "shape": [
        98304
      ],
      "size_in_bytes": 98304,
      "op_tensor_size": 98304,
      "offset": 1901582336,
      "file_name": ".cache\\MatMulNBits_2_0_535.const",
      "file_size": 98304
    },
    "model.layers.20.mlp.gate_proj.MatMulNBits.bias.f": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        8192
      ],
      "size_in_bytes": 32768,
      "op_tensor_size": 32768,
      "offset": 1901680640,
      "file_name": ".cache\\MatMulNBits_2_0_536.const",
      "file_size": 32768
    },
    "model.layers.20.mlp.up_proj.MatMulNBits.qweight": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "uint8",
      "shape": [
        8192,
        24,
        64
      ],
      "size_in_bytes": 12582912,
      "op_tensor_size": 12582912,
      "offset": 1901713408,
      "file_name": ".cache\\MatMulNBits_2_0_537.const",
      "file_size": 12582912
    },
    "model.layers.20.mlp.up_proj.MatMulNBits.scales.f": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        196608
      ],
      "size_in_bytes": 786432,
      "op_tensor_size": 786432,
      "offset": 1914296320,
      "file_name": ".cache\\MatMulNBits_2_0_538.const",
      "file_size": 786432
    },
    "model.layers.20.mlp.up_proj.MatMulNBits.qzeros": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "uint8",
      "shape": [
        98304
      ],
      "size_in_bytes": 98304,
      "op_tensor_size": 98304,
      "offset": 1915082752,
      "file_name": ".cache\\MatMulNBits_2_0_539.const",
      "file_size": 98304
    },
    "model.layers.20.mlp.up_proj.MatMulNBits.bias.f": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        8192
      ],
      "size_in_bytes": 32768,
      "op_tensor_size": 32768,
      "offset": 1915181056,
      "file_name": ".cache\\MatMulNBits_2_0_540.const",
      "file_size": 32768
    },
    "model.layers.20.mlp.down_proj.MatMulNBits.qweight.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        8192,
        3072
      ],
      "size_in_bytes": 25165824,
      "op_tensor_size": 25165824,
      "offset": 1915213824,
      "file_name": ".cache\\MatMulNBits_2_0_541.const",
      "file_size": 25165824
    },
    "model.layers.20.mlp.down_proj.MatMulNBits.bias.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        3072
      ],
      "size_in_bytes": 12288,
      "op_tensor_size": 12288,
      "offset": 1940379648,
      "file_name": ".cache\\MatMulNBits_2_0_542.const",
      "file_size": 12288
    },
    "model.layers.20.mlp.down_proj.MatMulNBits.scales.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        196608
      ],
      "size_in_bytes": 786432,
      "op_tensor_size": 786432,
      "offset": 1940391936,
      "file_name": ".cache\\MatMulNBits_2_0_543.const",
      "file_size": 786432
    },
    "model.layers.20.mlp.down_proj.MatMulNBits.qzeros.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        196608
      ],
      "size_in_bytes": 196608,
      "op_tensor_size": 196608,
      "offset": 1941178368,
      "file_name": ".cache\\MatMulNBits_2_0_544.const",
      "file_size": 196608
    },
    "model.layers.21.input_layernorm.weight.bf": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "bfloat16",
      "shape": [
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 1941374976,
      "file_name": ".cache\\MatMulNBits_2_0_545.const",
      "file_size": 6144
    },
    "model.layers.21.attn.qk_proj.MatMulNBits.qweight.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        3072,
        6144
      ],
      "size_in_bytes": 18874368,
      "op_tensor_size": 18874368,
      "offset": 1941381120,
      "file_name": ".cache\\MatMulNBits_2_0_546.const",
      "file_size": 18874368
    },
    "model.layers.21.attn.qk_proj.MatMulNBits.bias.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        6144
      ],
      "size_in_bytes": 24576,
      "op_tensor_size": 24576,
      "offset": 1960255488,
      "file_name": ".cache\\MatMulNBits_2_0_547.const",
      "file_size": 24576
    },
    "model.layers.21.attn.qk_proj.MatMulNBits.scales.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        147456
      ],
      "size_in_bytes": 589824,
      "op_tensor_size": 589824,
      "offset": 1960280064,
      "file_name": ".cache\\MatMulNBits_2_0_548.const",
      "file_size": 589824
    },
    "model.layers.21.attn.qk_proj.MatMulNBits.qzeros.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        147456
      ],
      "size_in_bytes": 147456,
      "op_tensor_size": 147456,
      "offset": 1960869888,
      "file_name": ".cache\\MatMulNBits_2_0_549.const",
      "file_size": 147456
    },
    "model.layers.21.attn.v_proj.MatMulNBits.qweight.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        3072,
        3072
      ],
      "size_in_bytes": 9437184,
      "op_tensor_size": 9437184,
      "offset": 1961017344,
      "file_name": ".cache\\MatMulNBits_2_0_550.const",
      "file_size": 9437184
    },
    "model.layers.21.attn.v_proj.MatMulNBits.bias.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        3072
      ],
      "size_in_bytes": 12288,
      "op_tensor_size": 12288,
      "offset": 1970454528,
      "file_name": ".cache\\MatMulNBits_2_0_551.const",
      "file_size": 12288
    },
    "model.layers.21.attn.v_proj.MatMulNBits.scales.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        73728
      ],
      "size_in_bytes": 294912,
      "op_tensor_size": 294912,
      "offset": 1970466816,
      "file_name": ".cache\\MatMulNBits_2_0_552.const",
      "file_size": 294912
    },
    "model.layers.21.attn.v_proj.MatMulNBits.qzeros.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        73728
      ],
      "size_in_bytes": 73728,
      "op_tensor_size": 73728,
      "offset": 1970761728,
      "file_name": ".cache\\MatMulNBits_2_0_553.const",
      "file_size": 73728
    },
    "model.layers.21.attn.o_proj.MatMulNBits.qweight.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        3072,
        3072
      ],
      "size_in_bytes": 9437184,
      "op_tensor_size": 9437184,
      "offset": 1970835456,
      "file_name": ".cache\\MatMulNBits_2_0_554.const",
      "file_size": 9437184
    },
    "model.layers.21.attn.o_proj.MatMulNBits.bias.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        3072
      ],
      "size_in_bytes": 12288,
      "op_tensor_size": 12288,
      "offset": 1980272640,
      "file_name": ".cache\\MatMulNBits_2_0_555.const",
      "file_size": 12288
    },
    "model.layers.21.attn.o_proj.MatMulNBits.scales.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        73728
      ],
      "size_in_bytes": 294912,
      "op_tensor_size": 294912,
      "offset": 1980284928,
      "file_name": ".cache\\MatMulNBits_2_0_556.const",
      "file_size": 294912
    },
    "model.layers.21.attn.o_proj.MatMulNBits.qzeros.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        73728
      ],
      "size_in_bytes": 73728,
      "op_tensor_size": 73728,
      "offset": 1980579840,
      "file_name": ".cache\\MatMulNBits_2_0_557.const",
      "file_size": 73728
    },
    "model.layers.21.post_attention_layernorm.weight.bf": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "bfloat16",
      "shape": [
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 1980653568,
      "file_name": ".cache\\MatMulNBits_2_0_558.const",
      "file_size": 6144
    },
    "model.layers.21.mlp.gate_proj.MatMulNBits.qweight": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "uint8",
      "shape": [
        8192,
        24,
        64
      ],
      "size_in_bytes": 12582912,
      "op_tensor_size": 12582912,
      "offset": 1980659712,
      "file_name": ".cache\\MatMulNBits_2_0_559.const",
      "file_size": 12582912
    },
    "model.layers.21.mlp.gate_proj.MatMulNBits.scales.f": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        196608
      ],
      "size_in_bytes": 786432,
      "op_tensor_size": 786432,
      "offset": 1993242624,
      "file_name": ".cache\\MatMulNBits_2_0_560.const",
      "file_size": 786432
    },
    "model.layers.21.mlp.gate_proj.MatMulNBits.qzeros": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "uint8",
      "shape": [
        98304
      ],
      "size_in_bytes": 98304,
      "op_tensor_size": 98304,
      "offset": 1994029056,
      "file_name": ".cache\\MatMulNBits_2_0_561.const",
      "file_size": 98304
    },
    "model.layers.21.mlp.gate_proj.MatMulNBits.bias.f": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        8192
      ],
      "size_in_bytes": 32768,
      "op_tensor_size": 32768,
      "offset": 1994127360,
      "file_name": ".cache\\MatMulNBits_2_0_562.const",
      "file_size": 32768
    },
    "model.layers.21.mlp.up_proj.MatMulNBits.qweight": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "uint8",
      "shape": [
        8192,
        24,
        64
      ],
      "size_in_bytes": 12582912,
      "op_tensor_size": 12582912,
      "offset": 1994160128,
      "file_name": ".cache\\MatMulNBits_2_0_563.const",
      "file_size": 12582912
    },
    "model.layers.21.mlp.up_proj.MatMulNBits.scales.f": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        196608
      ],
      "size_in_bytes": 786432,
      "op_tensor_size": 786432,
      "offset": 2006743040,
      "file_name": ".cache\\MatMulNBits_2_0_564.const",
      "file_size": 786432
    },
    "model.layers.21.mlp.up_proj.MatMulNBits.qzeros": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "uint8",
      "shape": [
        98304
      ],
      "size_in_bytes": 98304,
      "op_tensor_size": 98304,
      "offset": 2007529472,
      "file_name": ".cache\\MatMulNBits_2_0_565.const",
      "file_size": 98304
    },
    "model.layers.21.mlp.up_proj.MatMulNBits.bias.f": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        8192
      ],
      "size_in_bytes": 32768,
      "op_tensor_size": 32768,
      "offset": 2007627776,
      "file_name": ".cache\\MatMulNBits_2_0_566.const",
      "file_size": 32768
    },
    "model.layers.21.mlp.down_proj.MatMulNBits.qweight.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        8192,
        3072
      ],
      "size_in_bytes": 25165824,
      "op_tensor_size": 25165824,
      "offset": 2007660544,
      "file_name": ".cache\\MatMulNBits_2_0_567.const",
      "file_size": 25165824
    },
    "model.layers.21.mlp.down_proj.MatMulNBits.bias.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        3072
      ],
      "size_in_bytes": 12288,
      "op_tensor_size": 12288,
      "offset": 2032826368,
      "file_name": ".cache\\MatMulNBits_2_0_568.const",
      "file_size": 12288
    },
    "model.layers.21.mlp.down_proj.MatMulNBits.scales.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        196608
      ],
      "size_in_bytes": 786432,
      "op_tensor_size": 786432,
      "offset": 2032838656,
      "file_name": ".cache\\MatMulNBits_2_0_569.const",
      "file_size": 786432
    },
    "model.layers.21.mlp.down_proj.MatMulNBits.qzeros.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        196608
      ],
      "size_in_bytes": 196608,
      "op_tensor_size": 196608,
      "offset": 2033625088,
      "file_name": ".cache\\MatMulNBits_2_0_570.const",
      "file_size": 196608
    },
    "model.layers.22.input_layernorm.weight.bf": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "bfloat16",
      "shape": [
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 2033821696,
      "file_name": ".cache\\MatMulNBits_2_0_571.const",
      "file_size": 6144
    },
    "model.layers.22.attn.qk_proj.MatMulNBits.qweight.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        3072,
        6144
      ],
      "size_in_bytes": 18874368,
      "op_tensor_size": 18874368,
      "offset": 2033827840,
      "file_name": ".cache\\MatMulNBits_2_0_572.const",
      "file_size": 18874368
    },
    "model.layers.22.attn.qk_proj.MatMulNBits.bias.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        6144
      ],
      "size_in_bytes": 24576,
      "op_tensor_size": 24576,
      "offset": 2052702208,
      "file_name": ".cache\\MatMulNBits_2_0_573.const",
      "file_size": 24576
    },
    "model.layers.22.attn.qk_proj.MatMulNBits.scales.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        147456
      ],
      "size_in_bytes": 589824,
      "op_tensor_size": 589824,
      "offset": 2052726784,
      "file_name": ".cache\\MatMulNBits_2_0_574.const",
      "file_size": 589824
    },
    "model.layers.22.attn.qk_proj.MatMulNBits.qzeros.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        147456
      ],
      "size_in_bytes": 147456,
      "op_tensor_size": 147456,
      "offset": 2053316608,
      "file_name": ".cache\\MatMulNBits_2_0_575.const",
      "file_size": 147456
    },
    "model.layers.22.attn.v_proj.MatMulNBits.qweight.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        3072,
        3072
      ],
      "size_in_bytes": 9437184,
      "op_tensor_size": 9437184,
      "offset": 2053464064,
      "file_name": ".cache\\MatMulNBits_2_0_576.const",
      "file_size": 9437184
    },
    "model.layers.22.attn.v_proj.MatMulNBits.bias.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        3072
      ],
      "size_in_bytes": 12288,
      "op_tensor_size": 12288,
      "offset": 2062901248,
      "file_name": ".cache\\MatMulNBits_2_0_577.const",
      "file_size": 12288
    },
    "model.layers.22.attn.v_proj.MatMulNBits.scales.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        73728
      ],
      "size_in_bytes": 294912,
      "op_tensor_size": 294912,
      "offset": 2062913536,
      "file_name": ".cache\\MatMulNBits_2_0_578.const",
      "file_size": 294912
    },
    "model.layers.22.attn.v_proj.MatMulNBits.qzeros.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        73728
      ],
      "size_in_bytes": 73728,
      "op_tensor_size": 73728,
      "offset": 2063208448,
      "file_name": ".cache\\MatMulNBits_2_0_579.const",
      "file_size": 73728
    },
    "model.layers.22.attn.o_proj.MatMulNBits.qweight.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        3072,
        3072
      ],
      "size_in_bytes": 9437184,
      "op_tensor_size": 9437184,
      "offset": 2063282176,
      "file_name": ".cache\\MatMulNBits_2_0_580.const",
      "file_size": 9437184
    },
    "model.layers.22.attn.o_proj.MatMulNBits.bias.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        3072
      ],
      "size_in_bytes": 12288,
      "op_tensor_size": 12288,
      "offset": 2072719360,
      "file_name": ".cache\\MatMulNBits_2_0_581.const",
      "file_size": 12288
    },
    "model.layers.22.attn.o_proj.MatMulNBits.scales.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        73728
      ],
      "size_in_bytes": 294912,
      "op_tensor_size": 294912,
      "offset": 2072731648,
      "file_name": ".cache\\MatMulNBits_2_0_582.const",
      "file_size": 294912
    },
    "model.layers.22.attn.o_proj.MatMulNBits.qzeros.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        73728
      ],
      "size_in_bytes": 73728,
      "op_tensor_size": 73728,
      "offset": 2073026560,
      "file_name": ".cache\\MatMulNBits_2_0_583.const",
      "file_size": 73728
    },
    "model.layers.22.post_attention_layernorm.weight.bf": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "bfloat16",
      "shape": [
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 2073100288,
      "file_name": ".cache\\MatMulNBits_2_0_584.const",
      "file_size": 6144
    },
    "model.layers.22.mlp.gate_proj.MatMulNBits.qweight": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "uint8",
      "shape": [
        8192,
        24,
        64
      ],
      "size_in_bytes": 12582912,
      "op_tensor_size": 12582912,
      "offset": 2073106432,
      "file_name": ".cache\\MatMulNBits_2_0_585.const",
      "file_size": 12582912
    },
    "model.layers.22.mlp.gate_proj.MatMulNBits.scales.f": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        196608
      ],
      "size_in_bytes": 786432,
      "op_tensor_size": 786432,
      "offset": 2085689344,
      "file_name": ".cache\\MatMulNBits_2_0_586.const",
      "file_size": 786432
    },
    "model.layers.22.mlp.gate_proj.MatMulNBits.qzeros": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "uint8",
      "shape": [
        98304
      ],
      "size_in_bytes": 98304,
      "op_tensor_size": 98304,
      "offset": 2086475776,
      "file_name": ".cache\\MatMulNBits_2_0_587.const",
      "file_size": 98304
    },
    "model.layers.22.mlp.gate_proj.MatMulNBits.bias.f": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        8192
      ],
      "size_in_bytes": 32768,
      "op_tensor_size": 32768,
      "offset": 2086574080,
      "file_name": ".cache\\MatMulNBits_2_0_588.const",
      "file_size": 32768
    },
    "model.layers.22.mlp.up_proj.MatMulNBits.qweight": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "uint8",
      "shape": [
        8192,
        24,
        64
      ],
      "size_in_bytes": 12582912,
      "op_tensor_size": 12582912,
      "offset": 2086606848,
      "file_name": ".cache\\MatMulNBits_2_0_589.const",
      "file_size": 12582912
    },
    "model.layers.22.mlp.up_proj.MatMulNBits.scales.f": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        196608
      ],
      "size_in_bytes": 786432,
      "op_tensor_size": 786432,
      "offset": 2099189760,
      "file_name": ".cache\\MatMulNBits_2_0_590.const",
      "file_size": 786432
    },
    "model.layers.22.mlp.up_proj.MatMulNBits.qzeros": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "uint8",
      "shape": [
        98304
      ],
      "size_in_bytes": 98304,
      "op_tensor_size": 98304,
      "offset": 2099976192,
      "file_name": ".cache\\MatMulNBits_2_0_591.const",
      "file_size": 98304
    },
    "model.layers.22.mlp.up_proj.MatMulNBits.bias.f": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        8192
      ],
      "size_in_bytes": 32768,
      "op_tensor_size": 32768,
      "offset": 2100074496,
      "file_name": ".cache\\MatMulNBits_2_0_592.const",
      "file_size": 32768
    },
    "model.layers.22.mlp.down_proj.MatMulNBits.qweight.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        8192,
        3072
      ],
      "size_in_bytes": 25165824,
      "op_tensor_size": 25165824,
      "offset": 2100107264,
      "file_name": ".cache\\MatMulNBits_2_0_593.const",
      "file_size": 25165824
    },
    "model.layers.22.mlp.down_proj.MatMulNBits.bias.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        3072
      ],
      "size_in_bytes": 12288,
      "op_tensor_size": 12288,
      "offset": 2125273088,
      "file_name": ".cache\\MatMulNBits_2_0_594.const",
      "file_size": 12288
    },
    "model.layers.22.mlp.down_proj.MatMulNBits.scales.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        196608
      ],
      "size_in_bytes": 786432,
      "op_tensor_size": 786432,
      "offset": 2125285376,
      "file_name": ".cache\\MatMulNBits_2_0_595.const",
      "file_size": 786432
    },
    "model.layers.22.mlp.down_proj.MatMulNBits.qzeros.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        196608
      ],
      "size_in_bytes": 196608,
      "op_tensor_size": 196608,
      "offset": 2126071808,
      "file_name": ".cache\\MatMulNBits_2_0_596.const",
      "file_size": 196608
    },
    "model.layers.23.input_layernorm.weight.bf": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "bfloat16",
      "shape": [
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 2126268416,
      "file_name": ".cache\\MatMulNBits_2_0_597.const",
      "file_size": 6144
    },
    "model.layers.23.attn.qk_proj.MatMulNBits.qweight.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        3072,
        6144
      ],
      "size_in_bytes": 18874368,
      "op_tensor_size": 18874368,
      "offset": 2126274560,
      "file_name": ".cache\\MatMulNBits_2_0_598.const",
      "file_size": 18874368
    },
    "model.layers.23.attn.qk_proj.MatMulNBits.bias.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        6144
      ],
      "size_in_bytes": 24576,
      "op_tensor_size": 24576,
      "offset": 2145148928,
      "file_name": ".cache\\MatMulNBits_2_0_599.const",
      "file_size": 24576
    },
    "model.layers.23.attn.qk_proj.MatMulNBits.scales.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        147456
      ],
      "size_in_bytes": 589824,
      "op_tensor_size": 589824,
      "offset": 2145173504,
      "file_name": ".cache\\MatMulNBits_2_0_600.const",
      "file_size": 589824
    },
    "model.layers.23.attn.qk_proj.MatMulNBits.qzeros.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        147456
      ],
      "size_in_bytes": 147456,
      "op_tensor_size": 147456,
      "offset": 2145763328,
      "file_name": ".cache\\MatMulNBits_2_0_601.const",
      "file_size": 147456
    },
    "model.layers.23.attn.v_proj.MatMulNBits.qweight.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        3072,
        3072
      ],
      "size_in_bytes": 9437184,
      "op_tensor_size": 9437184,
      "offset": 2145910784,
      "file_name": ".cache\\MatMulNBits_2_0_602.const",
      "file_size": 9437184
    },
    "model.layers.23.attn.v_proj.MatMulNBits.bias.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        3072
      ],
      "size_in_bytes": 12288,
      "op_tensor_size": 12288,
      "offset": 2155347968,
      "file_name": ".cache\\MatMulNBits_2_0_603.const",
      "file_size": 12288
    },
    "model.layers.23.attn.v_proj.MatMulNBits.scales.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        73728
      ],
      "size_in_bytes": 294912,
      "op_tensor_size": 294912,
      "offset": 2155360256,
      "file_name": ".cache\\MatMulNBits_2_0_604.const",
      "file_size": 294912
    },
    "model.layers.23.attn.v_proj.MatMulNBits.qzeros.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        73728
      ],
      "size_in_bytes": 73728,
      "op_tensor_size": 73728,
      "offset": 2155655168,
      "file_name": ".cache\\MatMulNBits_2_0_605.const",
      "file_size": 73728
    },
    "model.layers.23.attn.o_proj.MatMulNBits.qweight.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        3072,
        3072
      ],
      "size_in_bytes": 9437184,
      "op_tensor_size": 9437184,
      "offset": 2155728896,
      "file_name": ".cache\\MatMulNBits_2_0_606.const",
      "file_size": 9437184
    },
    "model.layers.23.attn.o_proj.MatMulNBits.bias.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        3072
      ],
      "size_in_bytes": 12288,
      "op_tensor_size": 12288,
      "offset": 2165166080,
      "file_name": ".cache\\MatMulNBits_2_0_607.const",
      "file_size": 12288
    },
    "model.layers.23.attn.o_proj.MatMulNBits.scales.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        73728
      ],
      "size_in_bytes": 294912,
      "op_tensor_size": 294912,
      "offset": 2165178368,
      "file_name": ".cache\\MatMulNBits_2_0_608.const",
      "file_size": 294912
    },
    "model.layers.23.attn.o_proj.MatMulNBits.qzeros.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        73728
      ],
      "size_in_bytes": 73728,
      "op_tensor_size": 73728,
      "offset": 2165473280,
      "file_name": ".cache\\MatMulNBits_2_0_609.const",
      "file_size": 73728
    },
    "model.layers.23.post_attention_layernorm.weight.bf": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "bfloat16",
      "shape": [
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 2165547008,
      "file_name": ".cache\\MatMulNBits_2_0_610.const",
      "file_size": 6144
    },
    "model.layers.23.mlp.gate_proj.MatMulNBits.qweight": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "uint8",
      "shape": [
        8192,
        24,
        64
      ],
      "size_in_bytes": 12582912,
      "op_tensor_size": 12582912,
      "offset": 2165553152,
      "file_name": ".cache\\MatMulNBits_2_0_611.const",
      "file_size": 12582912
    },
    "model.layers.23.mlp.gate_proj.MatMulNBits.scales.f": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        196608
      ],
      "size_in_bytes": 786432,
      "op_tensor_size": 786432,
      "offset": 2178136064,
      "file_name": ".cache\\MatMulNBits_2_0_612.const",
      "file_size": 786432
    },
    "model.layers.23.mlp.gate_proj.MatMulNBits.qzeros": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "uint8",
      "shape": [
        98304
      ],
      "size_in_bytes": 98304,
      "op_tensor_size": 98304,
      "offset": 2178922496,
      "file_name": ".cache\\MatMulNBits_2_0_613.const",
      "file_size": 98304
    },
    "model.layers.23.mlp.gate_proj.MatMulNBits.bias.f": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        8192
      ],
      "size_in_bytes": 32768,
      "op_tensor_size": 32768,
      "offset": 2179020800,
      "file_name": ".cache\\MatMulNBits_2_0_614.const",
      "file_size": 32768
    },
    "model.layers.23.mlp.up_proj.MatMulNBits.qweight": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "uint8",
      "shape": [
        8192,
        24,
        64
      ],
      "size_in_bytes": 12582912,
      "op_tensor_size": 12582912,
      "offset": 2179053568,
      "file_name": ".cache\\MatMulNBits_2_0_615.const",
      "file_size": 12582912
    },
    "model.layers.23.mlp.up_proj.MatMulNBits.scales.f": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        196608
      ],
      "size_in_bytes": 786432,
      "op_tensor_size": 786432,
      "offset": 2191636480,
      "file_name": ".cache\\MatMulNBits_2_0_616.const",
      "file_size": 786432
    },
    "model.layers.23.mlp.up_proj.MatMulNBits.qzeros": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "uint8",
      "shape": [
        98304
      ],
      "size_in_bytes": 98304,
      "op_tensor_size": 98304,
      "offset": 2192422912,
      "file_name": ".cache\\MatMulNBits_2_0_617.const",
      "file_size": 98304
    },
    "model.layers.23.mlp.up_proj.MatMulNBits.bias.f": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        8192
      ],
      "size_in_bytes": 32768,
      "op_tensor_size": 32768,
      "offset": 2192521216,
      "file_name": ".cache\\MatMulNBits_2_0_618.const",
      "file_size": 32768
    },
    "model.layers.23.mlp.down_proj.MatMulNBits.qweight.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        8192,
        3072
      ],
      "size_in_bytes": 25165824,
      "op_tensor_size": 25165824,
      "offset": 2192553984,
      "file_name": ".cache\\MatMulNBits_2_0_619.const",
      "file_size": 25165824
    },
    "model.layers.23.mlp.down_proj.MatMulNBits.bias.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        3072
      ],
      "size_in_bytes": 12288,
      "op_tensor_size": 12288,
      "offset": 2217719808,
      "file_name": ".cache\\MatMulNBits_2_0_620.const",
      "file_size": 12288
    },
    "model.layers.23.mlp.down_proj.MatMulNBits.scales.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        196608
      ],
      "size_in_bytes": 786432,
      "op_tensor_size": 786432,
      "offset": 2217732096,
      "file_name": ".cache\\MatMulNBits_2_0_621.const",
      "file_size": 786432
    },
    "model.layers.23.mlp.down_proj.MatMulNBits.qzeros.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        196608
      ],
      "size_in_bytes": 196608,
      "op_tensor_size": 196608,
      "offset": 2218518528,
      "file_name": ".cache\\MatMulNBits_2_0_622.const",
      "file_size": 196608
    },
    "model.layers.24.input_layernorm.weight.bf": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "bfloat16",
      "shape": [
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 2218715136,
      "file_name": ".cache\\MatMulNBits_2_0_623.const",
      "file_size": 6144
    },
    "model.layers.24.attn.qk_proj.MatMulNBits.qweight.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        3072,
        6144
      ],
      "size_in_bytes": 18874368,
      "op_tensor_size": 18874368,
      "offset": 2218721280,
      "file_name": ".cache\\MatMulNBits_2_0_624.const",
      "file_size": 18874368
    },
    "model.layers.24.attn.qk_proj.MatMulNBits.bias.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        6144
      ],
      "size_in_bytes": 24576,
      "op_tensor_size": 24576,
      "offset": 2237595648,
      "file_name": ".cache\\MatMulNBits_2_0_625.const",
      "file_size": 24576
    },
    "model.layers.24.attn.qk_proj.MatMulNBits.scales.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        147456
      ],
      "size_in_bytes": 589824,
      "op_tensor_size": 589824,
      "offset": 2237620224,
      "file_name": ".cache\\MatMulNBits_2_0_626.const",
      "file_size": 589824
    },
    "model.layers.24.attn.qk_proj.MatMulNBits.qzeros.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        147456
      ],
      "size_in_bytes": 147456,
      "op_tensor_size": 147456,
      "offset": 2238210048,
      "file_name": ".cache\\MatMulNBits_2_0_627.const",
      "file_size": 147456
    },
    "model.layers.24.attn.v_proj.MatMulNBits.qweight.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        3072,
        3072
      ],
      "size_in_bytes": 9437184,
      "op_tensor_size": 9437184,
      "offset": 2238357504,
      "file_name": ".cache\\MatMulNBits_2_0_628.const",
      "file_size": 9437184
    },
    "model.layers.24.attn.v_proj.MatMulNBits.bias.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        3072
      ],
      "size_in_bytes": 12288,
      "op_tensor_size": 12288,
      "offset": 2247794688,
      "file_name": ".cache\\MatMulNBits_2_0_629.const",
      "file_size": 12288
    },
    "model.layers.24.attn.v_proj.MatMulNBits.scales.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        73728
      ],
      "size_in_bytes": 294912,
      "op_tensor_size": 294912,
      "offset": 2247806976,
      "file_name": ".cache\\MatMulNBits_2_0_630.const",
      "file_size": 294912
    },
    "model.layers.24.attn.v_proj.MatMulNBits.qzeros.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        73728
      ],
      "size_in_bytes": 73728,
      "op_tensor_size": 73728,
      "offset": 2248101888,
      "file_name": ".cache\\MatMulNBits_2_0_631.const",
      "file_size": 73728
    },
    "model.layers.24.attn.o_proj.MatMulNBits.qweight.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        3072,
        3072
      ],
      "size_in_bytes": 9437184,
      "op_tensor_size": 9437184,
      "offset": 2248175616,
      "file_name": ".cache\\MatMulNBits_2_0_632.const",
      "file_size": 9437184
    },
    "model.layers.24.attn.o_proj.MatMulNBits.bias.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        3072
      ],
      "size_in_bytes": 12288,
      "op_tensor_size": 12288,
      "offset": 2257612800,
      "file_name": ".cache\\MatMulNBits_2_0_633.const",
      "file_size": 12288
    },
    "model.layers.24.attn.o_proj.MatMulNBits.scales.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        73728
      ],
      "size_in_bytes": 294912,
      "op_tensor_size": 294912,
      "offset": 2257625088,
      "file_name": ".cache\\MatMulNBits_2_0_634.const",
      "file_size": 294912
    },
    "model.layers.24.attn.o_proj.MatMulNBits.qzeros.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        73728
      ],
      "size_in_bytes": 73728,
      "op_tensor_size": 73728,
      "offset": 2257920000,
      "file_name": ".cache\\MatMulNBits_2_0_635.const",
      "file_size": 73728
    },
    "model.layers.24.post_attention_layernorm.weight.bf": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "bfloat16",
      "shape": [
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 2257993728,
      "file_name": ".cache\\MatMulNBits_2_0_636.const",
      "file_size": 6144
    },
    "model.layers.24.mlp.gate_proj.MatMulNBits.qweight": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "uint8",
      "shape": [
        8192,
        24,
        64
      ],
      "size_in_bytes": 12582912,
      "op_tensor_size": 12582912,
      "offset": 2257999872,
      "file_name": ".cache\\MatMulNBits_2_0_637.const",
      "file_size": 12582912
    },
    "model.layers.24.mlp.gate_proj.MatMulNBits.scales.f": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        196608
      ],
      "size_in_bytes": 786432,
      "op_tensor_size": 786432,
      "offset": 2270582784,
      "file_name": ".cache\\MatMulNBits_2_0_638.const",
      "file_size": 786432
    },
    "model.layers.24.mlp.gate_proj.MatMulNBits.qzeros": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "uint8",
      "shape": [
        98304
      ],
      "size_in_bytes": 98304,
      "op_tensor_size": 98304,
      "offset": 2271369216,
      "file_name": ".cache\\MatMulNBits_2_0_639.const",
      "file_size": 98304
    },
    "model.layers.24.mlp.gate_proj.MatMulNBits.bias.f": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        8192
      ],
      "size_in_bytes": 32768,
      "op_tensor_size": 32768,
      "offset": 2271467520,
      "file_name": ".cache\\MatMulNBits_2_0_640.const",
      "file_size": 32768
    },
    "model.layers.24.mlp.up_proj.MatMulNBits.qweight": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "uint8",
      "shape": [
        8192,
        24,
        64
      ],
      "size_in_bytes": 12582912,
      "op_tensor_size": 12582912,
      "offset": 2271500288,
      "file_name": ".cache\\MatMulNBits_2_0_641.const",
      "file_size": 12582912
    },
    "model.layers.24.mlp.up_proj.MatMulNBits.scales.f": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        196608
      ],
      "size_in_bytes": 786432,
      "op_tensor_size": 786432,
      "offset": 2284083200,
      "file_name": ".cache\\MatMulNBits_2_0_642.const",
      "file_size": 786432
    },
    "model.layers.24.mlp.up_proj.MatMulNBits.qzeros": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "uint8",
      "shape": [
        98304
      ],
      "size_in_bytes": 98304,
      "op_tensor_size": 98304,
      "offset": 2284869632,
      "file_name": ".cache\\MatMulNBits_2_0_643.const",
      "file_size": 98304
    },
    "model.layers.24.mlp.up_proj.MatMulNBits.bias.f": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        8192
      ],
      "size_in_bytes": 32768,
      "op_tensor_size": 32768,
      "offset": 2284967936,
      "file_name": ".cache\\MatMulNBits_2_0_644.const",
      "file_size": 32768
    },
    "model.layers.24.mlp.down_proj.MatMulNBits.qweight.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        8192,
        3072
      ],
      "size_in_bytes": 25165824,
      "op_tensor_size": 25165824,
      "offset": 2285000704,
      "file_name": ".cache\\MatMulNBits_2_0_645.const",
      "file_size": 25165824
    },
    "model.layers.24.mlp.down_proj.MatMulNBits.bias.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        3072
      ],
      "size_in_bytes": 12288,
      "op_tensor_size": 12288,
      "offset": 2310166528,
      "file_name": ".cache\\MatMulNBits_2_0_646.const",
      "file_size": 12288
    },
    "model.layers.24.mlp.down_proj.MatMulNBits.scales.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        196608
      ],
      "size_in_bytes": 786432,
      "op_tensor_size": 786432,
      "offset": 2310178816,
      "file_name": ".cache\\MatMulNBits_2_0_647.const",
      "file_size": 786432
    },
    "model.layers.24.mlp.down_proj.MatMulNBits.qzeros.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        196608
      ],
      "size_in_bytes": 196608,
      "op_tensor_size": 196608,
      "offset": 2310965248,
      "file_name": ".cache\\MatMulNBits_2_0_648.const",
      "file_size": 196608
    },
    "model.layers.25.input_layernorm.weight.bf": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "bfloat16",
      "shape": [
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 2311161856,
      "file_name": ".cache\\MatMulNBits_2_0_649.const",
      "file_size": 6144
    },
    "model.layers.25.attn.qk_proj.MatMulNBits.qweight.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        3072,
        6144
      ],
      "size_in_bytes": 18874368,
      "op_tensor_size": 18874368,
      "offset": 2311168000,
      "file_name": ".cache\\MatMulNBits_2_0_650.const",
      "file_size": 18874368
    },
    "model.layers.25.attn.qk_proj.MatMulNBits.bias.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        6144
      ],
      "size_in_bytes": 24576,
      "op_tensor_size": 24576,
      "offset": 2330042368,
      "file_name": ".cache\\MatMulNBits_2_0_651.const",
      "file_size": 24576
    },
    "model.layers.25.attn.qk_proj.MatMulNBits.scales.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        147456
      ],
      "size_in_bytes": 589824,
      "op_tensor_size": 589824,
      "offset": 2330066944,
      "file_name": ".cache\\MatMulNBits_2_0_652.const",
      "file_size": 589824
    },
    "model.layers.25.attn.qk_proj.MatMulNBits.qzeros.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        147456
      ],
      "size_in_bytes": 147456,
      "op_tensor_size": 147456,
      "offset": 2330656768,
      "file_name": ".cache\\MatMulNBits_2_0_653.const",
      "file_size": 147456
    },
    "model.layers.25.attn.v_proj.MatMulNBits.qweight.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        3072,
        3072
      ],
      "size_in_bytes": 9437184,
      "op_tensor_size": 9437184,
      "offset": 2330804224,
      "file_name": ".cache\\MatMulNBits_2_0_654.const",
      "file_size": 9437184
    },
    "model.layers.25.attn.v_proj.MatMulNBits.bias.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        3072
      ],
      "size_in_bytes": 12288,
      "op_tensor_size": 12288,
      "offset": 2340241408,
      "file_name": ".cache\\MatMulNBits_2_0_655.const",
      "file_size": 12288
    },
    "model.layers.25.attn.v_proj.MatMulNBits.scales.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        73728
      ],
      "size_in_bytes": 294912,
      "op_tensor_size": 294912,
      "offset": 2340253696,
      "file_name": ".cache\\MatMulNBits_2_0_656.const",
      "file_size": 294912
    },
    "model.layers.25.attn.v_proj.MatMulNBits.qzeros.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        73728
      ],
      "size_in_bytes": 73728,
      "op_tensor_size": 73728,
      "offset": 2340548608,
      "file_name": ".cache\\MatMulNBits_2_0_657.const",
      "file_size": 73728
    },
    "model.layers.25.attn.o_proj.MatMulNBits.qweight.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        3072,
        3072
      ],
      "size_in_bytes": 9437184,
      "op_tensor_size": 9437184,
      "offset": 2340622336,
      "file_name": ".cache\\MatMulNBits_2_0_658.const",
      "file_size": 9437184
    },
    "model.layers.25.attn.o_proj.MatMulNBits.bias.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        3072
      ],
      "size_in_bytes": 12288,
      "op_tensor_size": 12288,
      "offset": 2350059520,
      "file_name": ".cache\\MatMulNBits_2_0_659.const",
      "file_size": 12288
    },
    "model.layers.25.attn.o_proj.MatMulNBits.scales.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        73728
      ],
      "size_in_bytes": 294912,
      "op_tensor_size": 294912,
      "offset": 2350071808,
      "file_name": ".cache\\MatMulNBits_2_0_660.const",
      "file_size": 294912
    },
    "model.layers.25.attn.o_proj.MatMulNBits.qzeros.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        73728
      ],
      "size_in_bytes": 73728,
      "op_tensor_size": 73728,
      "offset": 2350366720,
      "file_name": ".cache\\MatMulNBits_2_0_661.const",
      "file_size": 73728
    },
    "model.layers.25.post_attention_layernorm.weight.bf": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "bfloat16",
      "shape": [
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 2350440448,
      "file_name": ".cache\\MatMulNBits_2_0_662.const",
      "file_size": 6144
    },
    "model.layers.25.mlp.gate_proj.MatMulNBits.qweight": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "uint8",
      "shape": [
        8192,
        24,
        64
      ],
      "size_in_bytes": 12582912,
      "op_tensor_size": 12582912,
      "offset": 2350446592,
      "file_name": ".cache\\MatMulNBits_2_0_663.const",
      "file_size": 12582912
    },
    "model.layers.25.mlp.gate_proj.MatMulNBits.scales.f": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        196608
      ],
      "size_in_bytes": 786432,
      "op_tensor_size": 786432,
      "offset": 2363029504,
      "file_name": ".cache\\MatMulNBits_2_0_664.const",
      "file_size": 786432
    },
    "model.layers.25.mlp.gate_proj.MatMulNBits.qzeros": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "uint8",
      "shape": [
        98304
      ],
      "size_in_bytes": 98304,
      "op_tensor_size": 98304,
      "offset": 2363815936,
      "file_name": ".cache\\MatMulNBits_2_0_665.const",
      "file_size": 98304
    },
    "model.layers.25.mlp.gate_proj.MatMulNBits.bias.f": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        8192
      ],
      "size_in_bytes": 32768,
      "op_tensor_size": 32768,
      "offset": 2363914240,
      "file_name": ".cache\\MatMulNBits_2_0_666.const",
      "file_size": 32768
    },
    "model.layers.25.mlp.up_proj.MatMulNBits.qweight": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "uint8",
      "shape": [
        8192,
        24,
        64
      ],
      "size_in_bytes": 12582912,
      "op_tensor_size": 12582912,
      "offset": 2363947008,
      "file_name": ".cache\\MatMulNBits_2_0_667.const",
      "file_size": 12582912
    },
    "model.layers.25.mlp.up_proj.MatMulNBits.scales.f": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        196608
      ],
      "size_in_bytes": 786432,
      "op_tensor_size": 786432,
      "offset": 2376529920,
      "file_name": ".cache\\MatMulNBits_2_0_668.const",
      "file_size": 786432
    },
    "model.layers.25.mlp.up_proj.MatMulNBits.qzeros": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "uint8",
      "shape": [
        98304
      ],
      "size_in_bytes": 98304,
      "op_tensor_size": 98304,
      "offset": 2377316352,
      "file_name": ".cache\\MatMulNBits_2_0_669.const",
      "file_size": 98304
    },
    "model.layers.25.mlp.up_proj.MatMulNBits.bias.f": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        8192
      ],
      "size_in_bytes": 32768,
      "op_tensor_size": 32768,
      "offset": 2377414656,
      "file_name": ".cache\\MatMulNBits_2_0_670.const",
      "file_size": 32768
    },
    "model.layers.25.mlp.down_proj.MatMulNBits.qweight.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        8192,
        3072
      ],
      "size_in_bytes": 25165824,
      "op_tensor_size": 25165824,
      "offset": 2377447424,
      "file_name": ".cache\\MatMulNBits_2_0_671.const",
      "file_size": 25165824
    },
    "model.layers.25.mlp.down_proj.MatMulNBits.bias.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        3072
      ],
      "size_in_bytes": 12288,
      "op_tensor_size": 12288,
      "offset": 2402613248,
      "file_name": ".cache\\MatMulNBits_2_0_672.const",
      "file_size": 12288
    },
    "model.layers.25.mlp.down_proj.MatMulNBits.scales.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        196608
      ],
      "size_in_bytes": 786432,
      "op_tensor_size": 786432,
      "offset": 2402625536,
      "file_name": ".cache\\MatMulNBits_2_0_673.const",
      "file_size": 786432
    },
    "model.layers.25.mlp.down_proj.MatMulNBits.qzeros.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        196608
      ],
      "size_in_bytes": 196608,
      "op_tensor_size": 196608,
      "offset": 2403411968,
      "file_name": ".cache\\MatMulNBits_2_0_674.const",
      "file_size": 196608
    },
    "model.layers.26.input_layernorm.weight.bf": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "bfloat16",
      "shape": [
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 2403608576,
      "file_name": ".cache\\MatMulNBits_2_0_675.const",
      "file_size": 6144
    },
    "model.layers.26.attn.qk_proj.MatMulNBits.qweight.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        3072,
        6144
      ],
      "size_in_bytes": 18874368,
      "op_tensor_size": 18874368,
      "offset": 2403614720,
      "file_name": ".cache\\MatMulNBits_2_0_676.const",
      "file_size": 18874368
    },
    "model.layers.26.attn.qk_proj.MatMulNBits.bias.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        6144
      ],
      "size_in_bytes": 24576,
      "op_tensor_size": 24576,
      "offset": 2422489088,
      "file_name": ".cache\\MatMulNBits_2_0_677.const",
      "file_size": 24576
    },
    "model.layers.26.attn.qk_proj.MatMulNBits.scales.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        147456
      ],
      "size_in_bytes": 589824,
      "op_tensor_size": 589824,
      "offset": 2422513664,
      "file_name": ".cache\\MatMulNBits_2_0_678.const",
      "file_size": 589824
    },
    "model.layers.26.attn.qk_proj.MatMulNBits.qzeros.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        147456
      ],
      "size_in_bytes": 147456,
      "op_tensor_size": 147456,
      "offset": 2423103488,
      "file_name": ".cache\\MatMulNBits_2_0_679.const",
      "file_size": 147456
    },
    "model.layers.26.attn.v_proj.MatMulNBits.qweight.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        3072,
        3072
      ],
      "size_in_bytes": 9437184,
      "op_tensor_size": 9437184,
      "offset": 2423250944,
      "file_name": ".cache\\MatMulNBits_2_0_680.const",
      "file_size": 9437184
    },
    "model.layers.26.attn.v_proj.MatMulNBits.bias.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        3072
      ],
      "size_in_bytes": 12288,
      "op_tensor_size": 12288,
      "offset": 2432688128,
      "file_name": ".cache\\MatMulNBits_2_0_681.const",
      "file_size": 12288
    },
    "model.layers.26.attn.v_proj.MatMulNBits.scales.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        73728
      ],
      "size_in_bytes": 294912,
      "op_tensor_size": 294912,
      "offset": 2432700416,
      "file_name": ".cache\\MatMulNBits_2_0_682.const",
      "file_size": 294912
    },
    "model.layers.26.attn.v_proj.MatMulNBits.qzeros.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        73728
      ],
      "size_in_bytes": 73728,
      "op_tensor_size": 73728,
      "offset": 2432995328,
      "file_name": ".cache\\MatMulNBits_2_0_683.const",
      "file_size": 73728
    },
    "model.layers.26.attn.o_proj.MatMulNBits.qweight.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        3072,
        3072
      ],
      "size_in_bytes": 9437184,
      "op_tensor_size": 9437184,
      "offset": 2433069056,
      "file_name": ".cache\\MatMulNBits_2_0_684.const",
      "file_size": 9437184
    },
    "model.layers.26.attn.o_proj.MatMulNBits.bias.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        3072
      ],
      "size_in_bytes": 12288,
      "op_tensor_size": 12288,
      "offset": 2442506240,
      "file_name": ".cache\\MatMulNBits_2_0_685.const",
      "file_size": 12288
    },
    "model.layers.26.attn.o_proj.MatMulNBits.scales.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        73728
      ],
      "size_in_bytes": 294912,
      "op_tensor_size": 294912,
      "offset": 2442518528,
      "file_name": ".cache\\MatMulNBits_2_0_686.const",
      "file_size": 294912
    },
    "model.layers.26.attn.o_proj.MatMulNBits.qzeros.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        73728
      ],
      "size_in_bytes": 73728,
      "op_tensor_size": 73728,
      "offset": 2442813440,
      "file_name": ".cache\\MatMulNBits_2_0_687.const",
      "file_size": 73728
    },
    "model.layers.26.post_attention_layernorm.weight.bf": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "bfloat16",
      "shape": [
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 2442887168,
      "file_name": ".cache\\MatMulNBits_2_0_688.const",
      "file_size": 6144
    },
    "model.layers.26.mlp.gate_proj.MatMulNBits.qweight": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "uint8",
      "shape": [
        8192,
        24,
        64
      ],
      "size_in_bytes": 12582912,
      "op_tensor_size": 12582912,
      "offset": 2442893312,
      "file_name": ".cache\\MatMulNBits_2_0_689.const",
      "file_size": 12582912
    },
    "model.layers.26.mlp.gate_proj.MatMulNBits.scales.f": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        196608
      ],
      "size_in_bytes": 786432,
      "op_tensor_size": 786432,
      "offset": 2455476224,
      "file_name": ".cache\\MatMulNBits_2_0_690.const",
      "file_size": 786432
    },
    "model.layers.26.mlp.gate_proj.MatMulNBits.qzeros": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "uint8",
      "shape": [
        98304
      ],
      "size_in_bytes": 98304,
      "op_tensor_size": 98304,
      "offset": 2456262656,
      "file_name": ".cache\\MatMulNBits_2_0_691.const",
      "file_size": 98304
    },
    "model.layers.26.mlp.gate_proj.MatMulNBits.bias.f": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        8192
      ],
      "size_in_bytes": 32768,
      "op_tensor_size": 32768,
      "offset": 2456360960,
      "file_name": ".cache\\MatMulNBits_2_0_692.const",
      "file_size": 32768
    },
    "model.layers.26.mlp.up_proj.MatMulNBits.qweight": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "uint8",
      "shape": [
        8192,
        24,
        64
      ],
      "size_in_bytes": 12582912,
      "op_tensor_size": 12582912,
      "offset": 2456393728,
      "file_name": ".cache\\MatMulNBits_2_0_693.const",
      "file_size": 12582912
    },
    "model.layers.26.mlp.up_proj.MatMulNBits.scales.f": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        196608
      ],
      "size_in_bytes": 786432,
      "op_tensor_size": 786432,
      "offset": 2468976640,
      "file_name": ".cache\\MatMulNBits_2_0_694.const",
      "file_size": 786432
    },
    "model.layers.26.mlp.up_proj.MatMulNBits.qzeros": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "uint8",
      "shape": [
        98304
      ],
      "size_in_bytes": 98304,
      "op_tensor_size": 98304,
      "offset": 2469763072,
      "file_name": ".cache\\MatMulNBits_2_0_695.const",
      "file_size": 98304
    },
    "model.layers.26.mlp.up_proj.MatMulNBits.bias.f": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        8192
      ],
      "size_in_bytes": 32768,
      "op_tensor_size": 32768,
      "offset": 2469861376,
      "file_name": ".cache\\MatMulNBits_2_0_696.const",
      "file_size": 32768
    },
    "model.layers.26.mlp.down_proj.MatMulNBits.qweight.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        8192,
        3072
      ],
      "size_in_bytes": 25165824,
      "op_tensor_size": 25165824,
      "offset": 2469894144,
      "file_name": ".cache\\MatMulNBits_2_0_697.const",
      "file_size": 25165824
    },
    "model.layers.26.mlp.down_proj.MatMulNBits.bias.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        3072
      ],
      "size_in_bytes": 12288,
      "op_tensor_size": 12288,
      "offset": 2495059968,
      "file_name": ".cache\\MatMulNBits_2_0_698.const",
      "file_size": 12288
    },
    "model.layers.26.mlp.down_proj.MatMulNBits.scales.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        196608
      ],
      "size_in_bytes": 786432,
      "op_tensor_size": 786432,
      "offset": 2495072256,
      "file_name": ".cache\\MatMulNBits_2_0_699.const",
      "file_size": 786432
    },
    "model.layers.26.mlp.down_proj.MatMulNBits.qzeros.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        196608
      ],
      "size_in_bytes": 196608,
      "op_tensor_size": 196608,
      "offset": 2495858688,
      "file_name": ".cache\\MatMulNBits_2_0_700.const",
      "file_size": 196608
    },
    "model.layers.27.input_layernorm.weight.bf": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "bfloat16",
      "shape": [
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 2496055296,
      "file_name": ".cache\\MatMulNBits_2_0_701.const",
      "file_size": 6144
    },
    "model.layers.27.attn.qk_proj.MatMulNBits.qweight.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        3072,
        6144
      ],
      "size_in_bytes": 18874368,
      "op_tensor_size": 18874368,
      "offset": 2496061440,
      "file_name": ".cache\\MatMulNBits_2_0_702.const",
      "file_size": 18874368
    },
    "model.layers.27.attn.qk_proj.MatMulNBits.bias.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        6144
      ],
      "size_in_bytes": 24576,
      "op_tensor_size": 24576,
      "offset": 2514935808,
      "file_name": ".cache\\MatMulNBits_2_0_703.const",
      "file_size": 24576
    },
    "model.layers.27.attn.qk_proj.MatMulNBits.scales.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        147456
      ],
      "size_in_bytes": 589824,
      "op_tensor_size": 589824,
      "offset": 2514960384,
      "file_name": ".cache\\MatMulNBits_2_0_704.const",
      "file_size": 589824
    },
    "model.layers.27.attn.qk_proj.MatMulNBits.qzeros.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        147456
      ],
      "size_in_bytes": 147456,
      "op_tensor_size": 147456,
      "offset": 2515550208,
      "file_name": ".cache\\MatMulNBits_2_0_705.const",
      "file_size": 147456
    },
    "model.layers.27.attn.v_proj.MatMulNBits.qweight.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        3072,
        3072
      ],
      "size_in_bytes": 9437184,
      "op_tensor_size": 9437184,
      "offset": 2515697664,
      "file_name": ".cache\\MatMulNBits_2_0_706.const",
      "file_size": 9437184
    },
    "model.layers.27.attn.v_proj.MatMulNBits.bias.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        3072
      ],
      "size_in_bytes": 12288,
      "op_tensor_size": 12288,
      "offset": 2525134848,
      "file_name": ".cache\\MatMulNBits_2_0_707.const",
      "file_size": 12288
    },
    "model.layers.27.attn.v_proj.MatMulNBits.scales.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        73728
      ],
      "size_in_bytes": 294912,
      "op_tensor_size": 294912,
      "offset": 2525147136,
      "file_name": ".cache\\MatMulNBits_2_0_708.const",
      "file_size": 294912
    },
    "model.layers.27.attn.v_proj.MatMulNBits.qzeros.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        73728
      ],
      "size_in_bytes": 73728,
      "op_tensor_size": 73728,
      "offset": 2525442048,
      "file_name": ".cache\\MatMulNBits_2_0_709.const",
      "file_size": 73728
    },
    "model.layers.27.attn.o_proj.MatMulNBits.qweight.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        3072,
        3072
      ],
      "size_in_bytes": 9437184,
      "op_tensor_size": 9437184,
      "offset": 2525515776,
      "file_name": ".cache\\MatMulNBits_2_0_710.const",
      "file_size": 9437184
    },
    "model.layers.27.attn.o_proj.MatMulNBits.bias.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        3072
      ],
      "size_in_bytes": 12288,
      "op_tensor_size": 12288,
      "offset": 2534952960,
      "file_name": ".cache\\MatMulNBits_2_0_711.const",
      "file_size": 12288
    },
    "model.layers.27.attn.o_proj.MatMulNBits.scales.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        73728
      ],
      "size_in_bytes": 294912,
      "op_tensor_size": 294912,
      "offset": 2534965248,
      "file_name": ".cache\\MatMulNBits_2_0_712.const",
      "file_size": 294912
    },
    "model.layers.27.attn.o_proj.MatMulNBits.qzeros.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        73728
      ],
      "size_in_bytes": 73728,
      "op_tensor_size": 73728,
      "offset": 2535260160,
      "file_name": ".cache\\MatMulNBits_2_0_713.const",
      "file_size": 73728
    },
    "model.layers.27.post_attention_layernorm.weight.bf": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "bfloat16",
      "shape": [
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 2535333888,
      "file_name": ".cache\\MatMulNBits_2_0_714.const",
      "file_size": 6144
    },
    "model.layers.27.mlp.gate_proj.MatMulNBits.qweight": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "uint8",
      "shape": [
        8192,
        24,
        64
      ],
      "size_in_bytes": 12582912,
      "op_tensor_size": 12582912,
      "offset": 2535340032,
      "file_name": ".cache\\MatMulNBits_2_0_715.const",
      "file_size": 12582912
    },
    "model.layers.27.mlp.gate_proj.MatMulNBits.scales.f": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        196608
      ],
      "size_in_bytes": 786432,
      "op_tensor_size": 786432,
      "offset": 2547922944,
      "file_name": ".cache\\MatMulNBits_2_0_716.const",
      "file_size": 786432
    },
    "model.layers.27.mlp.gate_proj.MatMulNBits.qzeros": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "uint8",
      "shape": [
        98304
      ],
      "size_in_bytes": 98304,
      "op_tensor_size": 98304,
      "offset": 2548709376,
      "file_name": ".cache\\MatMulNBits_2_0_717.const",
      "file_size": 98304
    },
    "model.layers.27.mlp.gate_proj.MatMulNBits.bias.f": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        8192
      ],
      "size_in_bytes": 32768,
      "op_tensor_size": 32768,
      "offset": 2548807680,
      "file_name": ".cache\\MatMulNBits_2_0_718.const",
      "file_size": 32768
    },
    "model.layers.27.mlp.up_proj.MatMulNBits.qweight": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "uint8",
      "shape": [
        8192,
        24,
        64
      ],
      "size_in_bytes": 12582912,
      "op_tensor_size": 12582912,
      "offset": 2548840448,
      "file_name": ".cache\\MatMulNBits_2_0_719.const",
      "file_size": 12582912
    },
    "model.layers.27.mlp.up_proj.MatMulNBits.scales.f": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        196608
      ],
      "size_in_bytes": 786432,
      "op_tensor_size": 786432,
      "offset": 2561423360,
      "file_name": ".cache\\MatMulNBits_2_0_720.const",
      "file_size": 786432
    },
    "model.layers.27.mlp.up_proj.MatMulNBits.qzeros": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "uint8",
      "shape": [
        98304
      ],
      "size_in_bytes": 98304,
      "op_tensor_size": 98304,
      "offset": 2562209792,
      "file_name": ".cache\\MatMulNBits_2_0_721.const",
      "file_size": 98304
    },
    "model.layers.27.mlp.up_proj.MatMulNBits.bias.f": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        8192
      ],
      "size_in_bytes": 32768,
      "op_tensor_size": 32768,
      "offset": 2562308096,
      "file_name": ".cache\\MatMulNBits_2_0_722.const",
      "file_size": 32768
    },
    "model.layers.27.mlp.down_proj.MatMulNBits.qweight.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        8192,
        3072
      ],
      "size_in_bytes": 25165824,
      "op_tensor_size": 25165824,
      "offset": 2562340864,
      "file_name": ".cache\\MatMulNBits_2_0_723.const",
      "file_size": 25165824
    },
    "model.layers.27.mlp.down_proj.MatMulNBits.bias.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        3072
      ],
      "size_in_bytes": 12288,
      "op_tensor_size": 12288,
      "offset": 2587506688,
      "file_name": ".cache\\MatMulNBits_2_0_724.const",
      "file_size": 12288
    },
    "model.layers.27.mlp.down_proj.MatMulNBits.scales.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        196608
      ],
      "size_in_bytes": 786432,
      "op_tensor_size": 786432,
      "offset": 2587518976,
      "file_name": ".cache\\MatMulNBits_2_0_725.const",
      "file_size": 786432
    },
    "model.layers.27.mlp.down_proj.MatMulNBits.qzeros.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        196608
      ],
      "size_in_bytes": 196608,
      "op_tensor_size": 196608,
      "offset": 2588305408,
      "file_name": ".cache\\MatMulNBits_2_0_726.const",
      "file_size": 196608
    },
    "model.layers.28.input_layernorm.weight.bf": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "bfloat16",
      "shape": [
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 2588502016,
      "file_name": ".cache\\MatMulNBits_2_0_727.const",
      "file_size": 6144
    },
    "model.layers.28.attn.qk_proj.MatMulNBits.qweight.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        3072,
        6144
      ],
      "size_in_bytes": 18874368,
      "op_tensor_size": 18874368,
      "offset": 2588508160,
      "file_name": ".cache\\MatMulNBits_2_0_728.const",
      "file_size": 18874368
    },
    "model.layers.28.attn.qk_proj.MatMulNBits.bias.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        6144
      ],
      "size_in_bytes": 24576,
      "op_tensor_size": 24576,
      "offset": 2607382528,
      "file_name": ".cache\\MatMulNBits_2_0_729.const",
      "file_size": 24576
    },
    "model.layers.28.attn.qk_proj.MatMulNBits.scales.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        147456
      ],
      "size_in_bytes": 589824,
      "op_tensor_size": 589824,
      "offset": 2607407104,
      "file_name": ".cache\\MatMulNBits_2_0_730.const",
      "file_size": 589824
    },
    "model.layers.28.attn.qk_proj.MatMulNBits.qzeros.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        147456
      ],
      "size_in_bytes": 147456,
      "op_tensor_size": 147456,
      "offset": 2607996928,
      "file_name": ".cache\\MatMulNBits_2_0_731.const",
      "file_size": 147456
    },
    "model.layers.28.attn.v_proj.MatMulNBits.qweight.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        3072,
        3072
      ],
      "size_in_bytes": 9437184,
      "op_tensor_size": 9437184,
      "offset": 2608144384,
      "file_name": ".cache\\MatMulNBits_2_0_732.const",
      "file_size": 9437184
    },
    "model.layers.28.attn.v_proj.MatMulNBits.bias.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        3072
      ],
      "size_in_bytes": 12288,
      "op_tensor_size": 12288,
      "offset": 2617581568,
      "file_name": ".cache\\MatMulNBits_2_0_733.const",
      "file_size": 12288
    },
    "model.layers.28.attn.v_proj.MatMulNBits.scales.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        73728
      ],
      "size_in_bytes": 294912,
      "op_tensor_size": 294912,
      "offset": 2617593856,
      "file_name": ".cache\\MatMulNBits_2_0_734.const",
      "file_size": 294912
    },
    "model.layers.28.attn.v_proj.MatMulNBits.qzeros.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        73728
      ],
      "size_in_bytes": 73728,
      "op_tensor_size": 73728,
      "offset": 2617888768,
      "file_name": ".cache\\MatMulNBits_2_0_735.const",
      "file_size": 73728
    },
    "model.layers.28.attn.o_proj.MatMulNBits.qweight.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        3072,
        3072
      ],
      "size_in_bytes": 9437184,
      "op_tensor_size": 9437184,
      "offset": 2617962496,
      "file_name": ".cache\\MatMulNBits_2_0_736.const",
      "file_size": 9437184
    },
    "model.layers.28.attn.o_proj.MatMulNBits.bias.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        3072
      ],
      "size_in_bytes": 12288,
      "op_tensor_size": 12288,
      "offset": 2627399680,
      "file_name": ".cache\\MatMulNBits_2_0_737.const",
      "file_size": 12288
    },
    "model.layers.28.attn.o_proj.MatMulNBits.scales.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        73728
      ],
      "size_in_bytes": 294912,
      "op_tensor_size": 294912,
      "offset": 2627411968,
      "file_name": ".cache\\MatMulNBits_2_0_738.const",
      "file_size": 294912
    },
    "model.layers.28.attn.o_proj.MatMulNBits.qzeros.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        73728
      ],
      "size_in_bytes": 73728,
      "op_tensor_size": 73728,
      "offset": 2627706880,
      "file_name": ".cache\\MatMulNBits_2_0_739.const",
      "file_size": 73728
    },
    "model.layers.28.post_attention_layernorm.weight.bf": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "bfloat16",
      "shape": [
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 2627780608,
      "file_name": ".cache\\MatMulNBits_2_0_740.const",
      "file_size": 6144
    },
    "model.layers.28.mlp.gate_proj.MatMulNBits.qweight": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "uint8",
      "shape": [
        8192,
        24,
        64
      ],
      "size_in_bytes": 12582912,
      "op_tensor_size": 12582912,
      "offset": 2627786752,
      "file_name": ".cache\\MatMulNBits_2_0_741.const",
      "file_size": 12582912
    },
    "model.layers.28.mlp.gate_proj.MatMulNBits.scales.f": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        196608
      ],
      "size_in_bytes": 786432,
      "op_tensor_size": 786432,
      "offset": 2640369664,
      "file_name": ".cache\\MatMulNBits_2_0_742.const",
      "file_size": 786432
    },
    "model.layers.28.mlp.gate_proj.MatMulNBits.qzeros": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "uint8",
      "shape": [
        98304
      ],
      "size_in_bytes": 98304,
      "op_tensor_size": 98304,
      "offset": 2641156096,
      "file_name": ".cache\\MatMulNBits_2_0_743.const",
      "file_size": 98304
    },
    "model.layers.28.mlp.gate_proj.MatMulNBits.bias.f": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        8192
      ],
      "size_in_bytes": 32768,
      "op_tensor_size": 32768,
      "offset": 2641254400,
      "file_name": ".cache\\MatMulNBits_2_0_744.const",
      "file_size": 32768
    },
    "model.layers.28.mlp.up_proj.MatMulNBits.qweight": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "uint8",
      "shape": [
        8192,
        24,
        64
      ],
      "size_in_bytes": 12582912,
      "op_tensor_size": 12582912,
      "offset": 2641287168,
      "file_name": ".cache\\MatMulNBits_2_0_745.const",
      "file_size": 12582912
    },
    "model.layers.28.mlp.up_proj.MatMulNBits.scales.f": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        196608
      ],
      "size_in_bytes": 786432,
      "op_tensor_size": 786432,
      "offset": 2653870080,
      "file_name": ".cache\\MatMulNBits_2_0_746.const",
      "file_size": 786432
    },
    "model.layers.28.mlp.up_proj.MatMulNBits.qzeros": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "uint8",
      "shape": [
        98304
      ],
      "size_in_bytes": 98304,
      "op_tensor_size": 98304,
      "offset": 2654656512,
      "file_name": ".cache\\MatMulNBits_2_0_747.const",
      "file_size": 98304
    },
    "model.layers.28.mlp.up_proj.MatMulNBits.bias.f": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        8192
      ],
      "size_in_bytes": 32768,
      "op_tensor_size": 32768,
      "offset": 2654754816,
      "file_name": ".cache\\MatMulNBits_2_0_748.const",
      "file_size": 32768
    },
    "model.layers.28.mlp.down_proj.MatMulNBits.qweight.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        8192,
        3072
      ],
      "size_in_bytes": 25165824,
      "op_tensor_size": 25165824,
      "offset": 2654787584,
      "file_name": ".cache\\MatMulNBits_2_0_749.const",
      "file_size": 25165824
    },
    "model.layers.28.mlp.down_proj.MatMulNBits.bias.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        3072
      ],
      "size_in_bytes": 12288,
      "op_tensor_size": 12288,
      "offset": 2679953408,
      "file_name": ".cache\\MatMulNBits_2_0_750.const",
      "file_size": 12288
    },
    "model.layers.28.mlp.down_proj.MatMulNBits.scales.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        196608
      ],
      "size_in_bytes": 786432,
      "op_tensor_size": 786432,
      "offset": 2679965696,
      "file_name": ".cache\\MatMulNBits_2_0_751.const",
      "file_size": 786432
    },
    "model.layers.28.mlp.down_proj.MatMulNBits.qzeros.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        196608
      ],
      "size_in_bytes": 196608,
      "op_tensor_size": 196608,
      "offset": 2680752128,
      "file_name": ".cache\\MatMulNBits_2_0_752.const",
      "file_size": 196608
    },
    "model.layers.29.input_layernorm.weight.bf": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "bfloat16",
      "shape": [
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 2680948736,
      "file_name": ".cache\\MatMulNBits_2_0_753.const",
      "file_size": 6144
    },
    "model.layers.29.attn.qk_proj.MatMulNBits.qweight.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        3072,
        6144
      ],
      "size_in_bytes": 18874368,
      "op_tensor_size": 18874368,
      "offset": 2680954880,
      "file_name": ".cache\\MatMulNBits_2_0_754.const",
      "file_size": 18874368
    },
    "model.layers.29.attn.qk_proj.MatMulNBits.bias.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        6144
      ],
      "size_in_bytes": 24576,
      "op_tensor_size": 24576,
      "offset": 2699829248,
      "file_name": ".cache\\MatMulNBits_2_0_755.const",
      "file_size": 24576
    },
    "model.layers.29.attn.qk_proj.MatMulNBits.scales.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        147456
      ],
      "size_in_bytes": 589824,
      "op_tensor_size": 589824,
      "offset": 2699853824,
      "file_name": ".cache\\MatMulNBits_2_0_756.const",
      "file_size": 589824
    },
    "model.layers.29.attn.qk_proj.MatMulNBits.qzeros.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        147456
      ],
      "size_in_bytes": 147456,
      "op_tensor_size": 147456,
      "offset": 2700443648,
      "file_name": ".cache\\MatMulNBits_2_0_757.const",
      "file_size": 147456
    },
    "model.layers.29.attn.v_proj.MatMulNBits.qweight.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        3072,
        3072
      ],
      "size_in_bytes": 9437184,
      "op_tensor_size": 9437184,
      "offset": 2700591104,
      "file_name": ".cache\\MatMulNBits_2_0_758.const",
      "file_size": 9437184
    },
    "model.layers.29.attn.v_proj.MatMulNBits.bias.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        3072
      ],
      "size_in_bytes": 12288,
      "op_tensor_size": 12288,
      "offset": 2710028288,
      "file_name": ".cache\\MatMulNBits_2_0_759.const",
      "file_size": 12288
    },
    "model.layers.29.attn.v_proj.MatMulNBits.scales.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        73728
      ],
      "size_in_bytes": 294912,
      "op_tensor_size": 294912,
      "offset": 2710040576,
      "file_name": ".cache\\MatMulNBits_2_0_760.const",
      "file_size": 294912
    },
    "model.layers.29.attn.v_proj.MatMulNBits.qzeros.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        73728
      ],
      "size_in_bytes": 73728,
      "op_tensor_size": 73728,
      "offset": 2710335488,
      "file_name": ".cache\\MatMulNBits_2_0_761.const",
      "file_size": 73728
    },
    "model.layers.29.attn.o_proj.MatMulNBits.qweight.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        3072,
        3072
      ],
      "size_in_bytes": 9437184,
      "op_tensor_size": 9437184,
      "offset": 2710409216,
      "file_name": ".cache\\MatMulNBits_2_0_762.const",
      "file_size": 9437184
    },
    "model.layers.29.attn.o_proj.MatMulNBits.bias.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        3072
      ],
      "size_in_bytes": 12288,
      "op_tensor_size": 12288,
      "offset": 2719846400,
      "file_name": ".cache\\MatMulNBits_2_0_763.const",
      "file_size": 12288
    },
    "model.layers.29.attn.o_proj.MatMulNBits.scales.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        73728
      ],
      "size_in_bytes": 294912,
      "op_tensor_size": 294912,
      "offset": 2719858688,
      "file_name": ".cache\\MatMulNBits_2_0_764.const",
      "file_size": 294912
    },
    "model.layers.29.attn.o_proj.MatMulNBits.qzeros.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        73728
      ],
      "size_in_bytes": 73728,
      "op_tensor_size": 73728,
      "offset": 2720153600,
      "file_name": ".cache\\MatMulNBits_2_0_765.const",
      "file_size": 73728
    },
    "model.layers.29.post_attention_layernorm.weight.bf": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "bfloat16",
      "shape": [
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 2720227328,
      "file_name": ".cache\\MatMulNBits_2_0_766.const",
      "file_size": 6144
    },
    "model.layers.29.mlp.gate_proj.MatMulNBits.qweight": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "uint8",
      "shape": [
        8192,
        24,
        64
      ],
      "size_in_bytes": 12582912,
      "op_tensor_size": 12582912,
      "offset": 2720233472,
      "file_name": ".cache\\MatMulNBits_2_0_767.const",
      "file_size": 12582912
    },
    "model.layers.29.mlp.gate_proj.MatMulNBits.scales.f": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        196608
      ],
      "size_in_bytes": 786432,
      "op_tensor_size": 786432,
      "offset": 2732816384,
      "file_name": ".cache\\MatMulNBits_2_0_768.const",
      "file_size": 786432
    },
    "model.layers.29.mlp.gate_proj.MatMulNBits.qzeros": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "uint8",
      "shape": [
        98304
      ],
      "size_in_bytes": 98304,
      "op_tensor_size": 98304,
      "offset": 2733602816,
      "file_name": ".cache\\MatMulNBits_2_0_769.const",
      "file_size": 98304
    },
    "model.layers.29.mlp.gate_proj.MatMulNBits.bias.f": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        8192
      ],
      "size_in_bytes": 32768,
      "op_tensor_size": 32768,
      "offset": 2733701120,
      "file_name": ".cache\\MatMulNBits_2_0_770.const",
      "file_size": 32768
    },
    "model.layers.29.mlp.up_proj.MatMulNBits.qweight": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "uint8",
      "shape": [
        8192,
        24,
        64
      ],
      "size_in_bytes": 12582912,
      "op_tensor_size": 12582912,
      "offset": 2733733888,
      "file_name": ".cache\\MatMulNBits_2_0_771.const",
      "file_size": 12582912
    },
    "model.layers.29.mlp.up_proj.MatMulNBits.scales.f": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        196608
      ],
      "size_in_bytes": 786432,
      "op_tensor_size": 786432,
      "offset": 2746316800,
      "file_name": ".cache\\MatMulNBits_2_0_772.const",
      "file_size": 786432
    },
    "model.layers.29.mlp.up_proj.MatMulNBits.qzeros": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "uint8",
      "shape": [
        98304
      ],
      "size_in_bytes": 98304,
      "op_tensor_size": 98304,
      "offset": 2747103232,
      "file_name": ".cache\\MatMulNBits_2_0_773.const",
      "file_size": 98304
    },
    "model.layers.29.mlp.up_proj.MatMulNBits.bias.f": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        8192
      ],
      "size_in_bytes": 32768,
      "op_tensor_size": 32768,
      "offset": 2747201536,
      "file_name": ".cache\\MatMulNBits_2_0_774.const",
      "file_size": 32768
    },
    "model.layers.29.mlp.down_proj.MatMulNBits.qweight.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        8192,
        3072
      ],
      "size_in_bytes": 25165824,
      "op_tensor_size": 25165824,
      "offset": 2747234304,
      "file_name": ".cache\\MatMulNBits_2_0_775.const",
      "file_size": 25165824
    },
    "model.layers.29.mlp.down_proj.MatMulNBits.bias.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        3072
      ],
      "size_in_bytes": 12288,
      "op_tensor_size": 12288,
      "offset": 2772400128,
      "file_name": ".cache\\MatMulNBits_2_0_776.const",
      "file_size": 12288
    },
    "model.layers.29.mlp.down_proj.MatMulNBits.scales.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        196608
      ],
      "size_in_bytes": 786432,
      "op_tensor_size": 786432,
      "offset": 2772412416,
      "file_name": ".cache\\MatMulNBits_2_0_777.const",
      "file_size": 786432
    },
    "model.layers.29.mlp.down_proj.MatMulNBits.qzeros.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        196608
      ],
      "size_in_bytes": 196608,
      "op_tensor_size": 196608,
      "offset": 2773198848,
      "file_name": ".cache\\MatMulNBits_2_0_778.const",
      "file_size": 196608
    },
    "model.layers.30.input_layernorm.weight.bf": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "bfloat16",
      "shape": [
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 2773395456,
      "file_name": ".cache\\MatMulNBits_2_0_779.const",
      "file_size": 6144
    },
    "model.layers.30.attn.qk_proj.MatMulNBits.qweight.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        3072,
        6144
      ],
      "size_in_bytes": 18874368,
      "op_tensor_size": 18874368,
      "offset": 2773401600,
      "file_name": ".cache\\MatMulNBits_2_0_780.const",
      "file_size": 18874368
    },
    "model.layers.30.attn.qk_proj.MatMulNBits.bias.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        6144
      ],
      "size_in_bytes": 24576,
      "op_tensor_size": 24576,
      "offset": 2792275968,
      "file_name": ".cache\\MatMulNBits_2_0_781.const",
      "file_size": 24576
    },
    "model.layers.30.attn.qk_proj.MatMulNBits.scales.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        147456
      ],
      "size_in_bytes": 589824,
      "op_tensor_size": 589824,
      "offset": 2792300544,
      "file_name": ".cache\\MatMulNBits_2_0_782.const",
      "file_size": 589824
    },
    "model.layers.30.attn.qk_proj.MatMulNBits.qzeros.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        147456
      ],
      "size_in_bytes": 147456,
      "op_tensor_size": 147456,
      "offset": 2792890368,
      "file_name": ".cache\\MatMulNBits_2_0_783.const",
      "file_size": 147456
    },
    "model.layers.30.attn.v_proj.MatMulNBits.qweight.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        3072,
        3072
      ],
      "size_in_bytes": 9437184,
      "op_tensor_size": 9437184,
      "offset": 2793037824,
      "file_name": ".cache\\MatMulNBits_2_0_784.const",
      "file_size": 9437184
    },
    "model.layers.30.attn.v_proj.MatMulNBits.bias.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        3072
      ],
      "size_in_bytes": 12288,
      "op_tensor_size": 12288,
      "offset": 2802475008,
      "file_name": ".cache\\MatMulNBits_2_0_785.const",
      "file_size": 12288
    },
    "model.layers.30.attn.v_proj.MatMulNBits.scales.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        73728
      ],
      "size_in_bytes": 294912,
      "op_tensor_size": 294912,
      "offset": 2802487296,
      "file_name": ".cache\\MatMulNBits_2_0_786.const",
      "file_size": 294912
    },
    "model.layers.30.attn.v_proj.MatMulNBits.qzeros.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        73728
      ],
      "size_in_bytes": 73728,
      "op_tensor_size": 73728,
      "offset": 2802782208,
      "file_name": ".cache\\MatMulNBits_2_0_787.const",
      "file_size": 73728
    },
    "model.layers.30.attn.o_proj.MatMulNBits.qweight.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        3072,
        3072
      ],
      "size_in_bytes": 9437184,
      "op_tensor_size": 9437184,
      "offset": 2802855936,
      "file_name": ".cache\\MatMulNBits_2_0_788.const",
      "file_size": 9437184
    },
    "model.layers.30.attn.o_proj.MatMulNBits.bias.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        3072
      ],
      "size_in_bytes": 12288,
      "op_tensor_size": 12288,
      "offset": 2812293120,
      "file_name": ".cache\\MatMulNBits_2_0_789.const",
      "file_size": 12288
    },
    "model.layers.30.attn.o_proj.MatMulNBits.scales.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        73728
      ],
      "size_in_bytes": 294912,
      "op_tensor_size": 294912,
      "offset": 2812305408,
      "file_name": ".cache\\MatMulNBits_2_0_790.const",
      "file_size": 294912
    },
    "model.layers.30.attn.o_proj.MatMulNBits.qzeros.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        73728
      ],
      "size_in_bytes": 73728,
      "op_tensor_size": 73728,
      "offset": 2812600320,
      "file_name": ".cache\\MatMulNBits_2_0_791.const",
      "file_size": 73728
    },
    "model.layers.30.post_attention_layernorm.weight.bf": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "bfloat16",
      "shape": [
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 2812674048,
      "file_name": ".cache\\MatMulNBits_2_0_792.const",
      "file_size": 6144
    },
    "model.layers.30.mlp.gate_proj.MatMulNBits.qweight": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "uint8",
      "shape": [
        8192,
        24,
        64
      ],
      "size_in_bytes": 12582912,
      "op_tensor_size": 12582912,
      "offset": 2812680192,
      "file_name": ".cache\\MatMulNBits_2_0_793.const",
      "file_size": 12582912
    },
    "model.layers.30.mlp.gate_proj.MatMulNBits.scales.f": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        196608
      ],
      "size_in_bytes": 786432,
      "op_tensor_size": 786432,
      "offset": 2825263104,
      "file_name": ".cache\\MatMulNBits_2_0_794.const",
      "file_size": 786432
    },
    "model.layers.30.mlp.gate_proj.MatMulNBits.qzeros": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "uint8",
      "shape": [
        98304
      ],
      "size_in_bytes": 98304,
      "op_tensor_size": 98304,
      "offset": 2826049536,
      "file_name": ".cache\\MatMulNBits_2_0_795.const",
      "file_size": 98304
    },
    "model.layers.30.mlp.gate_proj.MatMulNBits.bias.f": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        8192
      ],
      "size_in_bytes": 32768,
      "op_tensor_size": 32768,
      "offset": 2826147840,
      "file_name": ".cache\\MatMulNBits_2_0_796.const",
      "file_size": 32768
    },
    "model.layers.30.mlp.up_proj.MatMulNBits.qweight": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "uint8",
      "shape": [
        8192,
        24,
        64
      ],
      "size_in_bytes": 12582912,
      "op_tensor_size": 12582912,
      "offset": 2826180608,
      "file_name": ".cache\\MatMulNBits_2_0_797.const",
      "file_size": 12582912
    },
    "model.layers.30.mlp.up_proj.MatMulNBits.scales.f": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        196608
      ],
      "size_in_bytes": 786432,
      "op_tensor_size": 786432,
      "offset": 2838763520,
      "file_name": ".cache\\MatMulNBits_2_0_798.const",
      "file_size": 786432
    },
    "model.layers.30.mlp.up_proj.MatMulNBits.qzeros": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "uint8",
      "shape": [
        98304
      ],
      "size_in_bytes": 98304,
      "op_tensor_size": 98304,
      "offset": 2839549952,
      "file_name": ".cache\\MatMulNBits_2_0_799.const",
      "file_size": 98304
    },
    "model.layers.30.mlp.up_proj.MatMulNBits.bias.f": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        8192
      ],
      "size_in_bytes": 32768,
      "op_tensor_size": 32768,
      "offset": 2839648256,
      "file_name": ".cache\\MatMulNBits_2_0_800.const",
      "file_size": 32768
    },
    "model.layers.30.mlp.down_proj.MatMulNBits.qweight.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        8192,
        3072
      ],
      "size_in_bytes": 25165824,
      "op_tensor_size": 25165824,
      "offset": 2839681024,
      "file_name": ".cache\\MatMulNBits_2_0_801.const",
      "file_size": 25165824
    },
    "model.layers.30.mlp.down_proj.MatMulNBits.bias.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        3072
      ],
      "size_in_bytes": 12288,
      "op_tensor_size": 12288,
      "offset": 2864846848,
      "file_name": ".cache\\MatMulNBits_2_0_802.const",
      "file_size": 12288
    },
    "model.layers.30.mlp.down_proj.MatMulNBits.scales.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        196608
      ],
      "size_in_bytes": 786432,
      "op_tensor_size": 786432,
      "offset": 2864859136,
      "file_name": ".cache\\MatMulNBits_2_0_803.const",
      "file_size": 786432
    },
    "model.layers.30.mlp.down_proj.MatMulNBits.qzeros.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        196608
      ],
      "size_in_bytes": 196608,
      "op_tensor_size": 196608,
      "offset": 2865645568,
      "file_name": ".cache\\MatMulNBits_2_0_804.const",
      "file_size": 196608
    },
    "model.layers.31.input_layernorm.weight.bf": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "bfloat16",
      "shape": [
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 2865842176,
      "file_name": ".cache\\MatMulNBits_2_0_805.const",
      "file_size": 6144
    },
    "model.layers.31.attn.qk_proj.MatMulNBits.qweight.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        3072,
        6144
      ],
      "size_in_bytes": 18874368,
      "op_tensor_size": 18874368,
      "offset": 2865848320,
      "file_name": ".cache\\MatMulNBits_2_0_806.const",
      "file_size": 18874368
    },
    "model.layers.31.attn.qk_proj.MatMulNBits.bias.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        6144
      ],
      "size_in_bytes": 24576,
      "op_tensor_size": 24576,
      "offset": 2884722688,
      "file_name": ".cache\\MatMulNBits_2_0_807.const",
      "file_size": 24576
    },
    "model.layers.31.attn.qk_proj.MatMulNBits.scales.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        147456
      ],
      "size_in_bytes": 589824,
      "op_tensor_size": 589824,
      "offset": 2884747264,
      "file_name": ".cache\\MatMulNBits_2_0_808.const",
      "file_size": 589824
    },
    "model.layers.31.attn.qk_proj.MatMulNBits.qzeros.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        147456
      ],
      "size_in_bytes": 147456,
      "op_tensor_size": 147456,
      "offset": 2885337088,
      "file_name": ".cache\\MatMulNBits_2_0_809.const",
      "file_size": 147456
    },
    "model.layers.31.attn.v_proj.MatMulNBits.qweight.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        3072,
        3072
      ],
      "size_in_bytes": 9437184,
      "op_tensor_size": 9437184,
      "offset": 2885484544,
      "file_name": ".cache\\MatMulNBits_2_0_810.const",
      "file_size": 9437184
    },
    "model.layers.31.attn.v_proj.MatMulNBits.bias.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        3072
      ],
      "size_in_bytes": 12288,
      "op_tensor_size": 12288,
      "offset": 2894921728,
      "file_name": ".cache\\MatMulNBits_2_0_811.const",
      "file_size": 12288
    },
    "model.layers.31.attn.v_proj.MatMulNBits.scales.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        73728
      ],
      "size_in_bytes": 294912,
      "op_tensor_size": 294912,
      "offset": 2894934016,
      "file_name": ".cache\\MatMulNBits_2_0_812.const",
      "file_size": 294912
    },
    "model.layers.31.attn.v_proj.MatMulNBits.qzeros.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        73728
      ],
      "size_in_bytes": 73728,
      "op_tensor_size": 73728,
      "offset": 2895228928,
      "file_name": ".cache\\MatMulNBits_2_0_813.const",
      "file_size": 73728
    },
    "model.layers.31.attn.o_proj.MatMulNBits.qweight.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        3072,
        3072
      ],
      "size_in_bytes": 9437184,
      "op_tensor_size": 9437184,
      "offset": 2895302656,
      "file_name": ".cache\\MatMulNBits_2_0_814.const",
      "file_size": 9437184
    },
    "model.layers.31.attn.o_proj.MatMulNBits.bias.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        3072
      ],
      "size_in_bytes": 12288,
      "op_tensor_size": 12288,
      "offset": 2904739840,
      "file_name": ".cache\\MatMulNBits_2_0_815.const",
      "file_size": 12288
    },
    "model.layers.31.attn.o_proj.MatMulNBits.scales.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        73728
      ],
      "size_in_bytes": 294912,
      "op_tensor_size": 294912,
      "offset": 2904752128,
      "file_name": ".cache\\MatMulNBits_2_0_816.const",
      "file_size": 294912
    },
    "model.layers.31.attn.o_proj.MatMulNBits.qzeros.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        73728
      ],
      "size_in_bytes": 73728,
      "op_tensor_size": 73728,
      "offset": 2905047040,
      "file_name": ".cache\\MatMulNBits_2_0_817.const",
      "file_size": 73728
    },
    "model.layers.31.post_attention_layernorm.weight.bf": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "bfloat16",
      "shape": [
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 2905120768,
      "file_name": ".cache\\MatMulNBits_2_0_818.const",
      "file_size": 6144
    },
    "model.layers.31.mlp.gate_proj.MatMulNBits.qweight": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "uint8",
      "shape": [
        8192,
        24,
        64
      ],
      "size_in_bytes": 12582912,
      "op_tensor_size": 12582912,
      "offset": 2905126912,
      "file_name": ".cache\\MatMulNBits_2_0_819.const",
      "file_size": 12582912
    },
    "model.layers.31.mlp.gate_proj.MatMulNBits.scales.f": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        196608
      ],
      "size_in_bytes": 786432,
      "op_tensor_size": 786432,
      "offset": 2917709824,
      "file_name": ".cache\\MatMulNBits_2_0_820.const",
      "file_size": 786432
    },
    "model.layers.31.mlp.gate_proj.MatMulNBits.qzeros": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "uint8",
      "shape": [
        98304
      ],
      "size_in_bytes": 98304,
      "op_tensor_size": 98304,
      "offset": 2918496256,
      "file_name": ".cache\\MatMulNBits_2_0_821.const",
      "file_size": 98304
    },
    "model.layers.31.mlp.gate_proj.MatMulNBits.bias.f": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        8192
      ],
      "size_in_bytes": 32768,
      "op_tensor_size": 32768,
      "offset": 2918594560,
      "file_name": ".cache\\MatMulNBits_2_0_822.const",
      "file_size": 32768
    },
    "model.layers.31.mlp.up_proj.MatMulNBits.qweight": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "uint8",
      "shape": [
        8192,
        24,
        64
      ],
      "size_in_bytes": 12582912,
      "op_tensor_size": 12582912,
      "offset": 2918627328,
      "file_name": ".cache\\MatMulNBits_2_0_823.const",
      "file_size": 12582912
    },
    "model.layers.31.mlp.up_proj.MatMulNBits.scales.f": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        196608
      ],
      "size_in_bytes": 786432,
      "op_tensor_size": 786432,
      "offset": 2931210240,
      "file_name": ".cache\\MatMulNBits_2_0_824.const",
      "file_size": 786432
    },
    "model.layers.31.mlp.up_proj.MatMulNBits.qzeros": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "uint8",
      "shape": [
        98304
      ],
      "size_in_bytes": 98304,
      "op_tensor_size": 98304,
      "offset": 2931996672,
      "file_name": ".cache\\MatMulNBits_2_0_825.const",
      "file_size": 98304
    },
    "model.layers.31.mlp.up_proj.MatMulNBits.bias.f": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        8192
      ],
      "size_in_bytes": 32768,
      "op_tensor_size": 32768,
      "offset": 2932094976,
      "file_name": ".cache\\MatMulNBits_2_0_826.const",
      "file_size": 32768
    },
    "model.layers.31.mlp.down_proj.MatMulNBits.qweight.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        8192,
        3072
      ],
      "size_in_bytes": 25165824,
      "op_tensor_size": 25165824,
      "offset": 2932127744,
      "file_name": ".cache\\MatMulNBits_2_0_827.const",
      "file_size": 25165824
    },
    "model.layers.31.mlp.down_proj.MatMulNBits.bias.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        3072
      ],
      "size_in_bytes": 12288,
      "op_tensor_size": 12288,
      "offset": 2957293568,
      "file_name": ".cache\\MatMulNBits_2_0_828.const",
      "file_size": 12288
    },
    "model.layers.31.mlp.down_proj.MatMulNBits.scales.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        196608
      ],
      "size_in_bytes": 786432,
      "op_tensor_size": 786432,
      "offset": 2957305856,
      "file_name": ".cache\\MatMulNBits_2_0_829.const",
      "file_size": 786432
    },
    "model.layers.31.mlp.down_proj.MatMulNBits.qzeros.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        196608
      ],
      "size_in_bytes": 196608,
      "op_tensor_size": 196608,
      "offset": 2958092288,
      "file_name": ".cache\\MatMulNBits_2_0_830.const",
      "file_size": 196608
    },
    "model.layers.32.final_norm_layernorm.weight.bf": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "bfloat16",
      "shape": [
        3072
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 2958288896,
      "file_name": ".cache\\MatMulNBits_2_0_831.const",
      "file_size": 6144
    },
    "lm_head.MatMulNBits.qweight.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        3072,
        32064
      ],
      "size_in_bytes": 98500608,
      "op_tensor_size": 98500608,
      "offset": 2958295040,
      "file_name": ".cache\\MatMulNBits_2_0_832.const",
      "file_size": 98500608
    },
    "lm_head.MatMulNBits.bias.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        32064
      ],
      "size_in_bytes": 128256,
      "op_tensor_size": 128256,
      "offset": 3056795648,
      "file_name": ".cache\\MatMulNBits_2_0_833.const",
      "file_size": 128256
    },
    "lm_head.MatMulNBits.scales.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        769536
      ],
      "size_in_bytes": 3078144,
      "op_tensor_size": 3078144,
      "offset": 3056923904,
      "file_name": ".cache\\MatMulNBits_2_0_834.const",
      "file_size": 3078144
    },
    "lm_head.MatMulNBits.qzeros.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        769536
      ],
      "size_in_bytes": 769536,
      "op_tensor_size": 769536,
      "offset": 3060002048,
      "file_name": ".cache\\MatMulNBits_2_0_835.const",
      "file_size": 769536
    },
    "past_key_values.0.key": {
      "packed_buffer_label": "ext_buf_0",
      "xrt_arg_id": 5,
      "dtype": "bfloat16",
      "shape": [
        1,
        32,
        4096,
        96
      ],
      "size_in_bytes": 25165824,
      "op_tensor_size": 25165824,
      "offset": 0
    },
    "past_key_values.0.value": {
      "packed_buffer_label": "ext_buf_0",
      "xrt_arg_id": 5,
      "dtype": "bfloat16",
      "shape": [
        1,
        32,
        4096,
        96
      ],
      "size_in_bytes": 25165824,
      "op_tensor_size": 25165824,
      "offset": 25165824
    },
    "present.0.key": {
      "packed_buffer_label": "ext_buf_0",
      "xrt_arg_id": 5,
      "dtype": "bfloat16",
      "shape": [
        1,
        32,
        4096,
        96
      ],
      "size_in_bytes": 25165824,
      "op_tensor_size": 25165824,
      "offset": 0
    },
    "present.0.value": {
      "packed_buffer_label": "ext_buf_0",
      "xrt_arg_id": 5,
      "dtype": "bfloat16",
      "shape": [
        1,
        32,
        4096,
        96
      ],
      "size_in_bytes": 25165824,
      "op_tensor_size": 25165824,
      "offset": 25165824
    },
    "past_key_values.1.key": {
      "packed_buffer_label": "ext_buf_0",
      "xrt_arg_id": 5,
      "dtype": "bfloat16",
      "shape": [
        1,
        32,
        4096,
        96
      ],
      "size_in_bytes": 25165824,
      "op_tensor_size": 25165824,
      "offset": 50331648
    },
    "past_key_values.1.value": {
      "packed_buffer_label": "ext_buf_0",
      "xrt_arg_id": 5,
      "dtype": "bfloat16",
      "shape": [
        1,
        32,
        4096,
        96
      ],
      "size_in_bytes": 25165824,
      "op_tensor_size": 25165824,
      "offset": 75497472
    },
    "present.1.key": {
      "packed_buffer_label": "ext_buf_0",
      "xrt_arg_id": 5,
      "dtype": "bfloat16",
      "shape": [
        1,
        32,
        4096,
        96
      ],
      "size_in_bytes": 25165824,
      "op_tensor_size": 25165824,
      "offset": 50331648
    },
    "present.1.value": {
      "packed_buffer_label": "ext_buf_0",
      "xrt_arg_id": 5,
      "dtype": "bfloat16",
      "shape": [
        1,
        32,
        4096,
        96
      ],
      "size_in_bytes": 25165824,
      "op_tensor_size": 25165824,
      "offset": 75497472
    },
    "past_key_values.2.key": {
      "packed_buffer_label": "ext_buf_0",
      "xrt_arg_id": 5,
      "dtype": "bfloat16",
      "shape": [
        1,
        32,
        4096,
        96
      ],
      "size_in_bytes": 25165824,
      "op_tensor_size": 25165824,
      "offset": 100663296
    },
    "past_key_values.2.value": {
      "packed_buffer_label": "ext_buf_0",
      "xrt_arg_id": 5,
      "dtype": "bfloat16",
      "shape": [
        1,
        32,
        4096,
        96
      ],
      "size_in_bytes": 25165824,
      "op_tensor_size": 25165824,
      "offset": 125829120
    },
    "present.2.key": {
      "packed_buffer_label": "ext_buf_0",
      "xrt_arg_id": 5,
      "dtype": "bfloat16",
      "shape": [
        1,
        32,
        4096,
        96
      ],
      "size_in_bytes": 25165824,
      "op_tensor_size": 25165824,
      "offset": 100663296
    },
    "present.2.value": {
      "packed_buffer_label": "ext_buf_0",
      "xrt_arg_id": 5,
      "dtype": "bfloat16",
      "shape": [
        1,
        32,
        4096,
        96
      ],
      "size_in_bytes": 25165824,
      "op_tensor_size": 25165824,
      "offset": 125829120
    },
    "past_key_values.3.key": {
      "packed_buffer_label": "ext_buf_0",
      "xrt_arg_id": 5,
      "dtype": "bfloat16",
      "shape": [
        1,
        32,
        4096,
        96
      ],
      "size_in_bytes": 25165824,
      "op_tensor_size": 25165824,
      "offset": 150994944
    },
    "past_key_values.3.value": {
      "packed_buffer_label": "ext_buf_0",
      "xrt_arg_id": 5,
      "dtype": "bfloat16",
      "shape": [
        1,
        32,
        4096,
        96
      ],
      "size_in_bytes": 25165824,
      "op_tensor_size": 25165824,
      "offset": 176160768
    },
    "present.3.key": {
      "packed_buffer_label": "ext_buf_0",
      "xrt_arg_id": 5,
      "dtype": "bfloat16",
      "shape": [
        1,
        32,
        4096,
        96
      ],
      "size_in_bytes": 25165824,
      "op_tensor_size": 25165824,
      "offset": 150994944
    },
    "present.3.value": {
      "packed_buffer_label": "ext_buf_0",
      "xrt_arg_id": 5,
      "dtype": "bfloat16",
      "shape": [
        1,
        32,
        4096,
        96
      ],
      "size_in_bytes": 25165824,
      "op_tensor_size": 25165824,
      "offset": 176160768
    },
    "past_key_values.4.key": {
      "packed_buffer_label": "ext_buf_0",
      "xrt_arg_id": 5,
      "dtype": "bfloat16",
      "shape": [
        1,
        32,
        4096,
        96
      ],
      "size_in_bytes": 25165824,
      "op_tensor_size": 25165824,
      "offset": 201326592
    },
    "past_key_values.4.value": {
      "packed_buffer_label": "ext_buf_0",
      "xrt_arg_id": 5,
      "dtype": "bfloat16",
      "shape": [
        1,
        32,
        4096,
        96
      ],
      "size_in_bytes": 25165824,
      "op_tensor_size": 25165824,
      "offset": 226492416
    },
    "present.4.key": {
      "packed_buffer_label": "ext_buf_0",
      "xrt_arg_id": 5,
      "dtype": "bfloat16",
      "shape": [
        1,
        32,
        4096,
        96
      ],
      "size_in_bytes": 25165824,
      "op_tensor_size": 25165824,
      "offset": 201326592
    },
    "present.4.value": {
      "packed_buffer_label": "ext_buf_0",
      "xrt_arg_id": 5,
      "dtype": "bfloat16",
      "shape": [
        1,
        32,
        4096,
        96
      ],
      "size_in_bytes": 25165824,
      "op_tensor_size": 25165824,
      "offset": 226492416
    },
    "past_key_values.5.key": {
      "packed_buffer_label": "ext_buf_0",
      "xrt_arg_id": 5,
      "dtype": "bfloat16",
      "shape": [
        1,
        32,
        4096,
        96
      ],
      "size_in_bytes": 25165824,
      "op_tensor_size": 25165824,
      "offset": 251658240
    },
    "past_key_values.5.value": {
      "packed_buffer_label": "ext_buf_0",
      "xrt_arg_id": 5,
      "dtype": "bfloat16",
      "shape": [
        1,
        32,
        4096,
        96
      ],
      "size_in_bytes": 25165824,
      "op_tensor_size": 25165824,
      "offset": 276824064
    },
    "present.5.key": {
      "packed_buffer_label": "ext_buf_0",
      "xrt_arg_id": 5,
      "dtype": "bfloat16",
      "shape": [
        1,
        32,
        4096,
        96
      ],
      "size_in_bytes": 25165824,
      "op_tensor_size": 25165824,
      "offset": 251658240
    },
    "present.5.value": {
      "packed_buffer_label": "ext_buf_0",
      "xrt_arg_id": 5,
      "dtype": "bfloat16",
      "shape": [
        1,
        32,
        4096,
        96
      ],
      "size_in_bytes": 25165824,
      "op_tensor_size": 25165824,
      "offset": 276824064
    },
    "past_key_values.6.key": {
      "packed_buffer_label": "ext_buf_0",
      "xrt_arg_id": 5,
      "dtype": "bfloat16",
      "shape": [
        1,
        32,
        4096,
        96
      ],
      "size_in_bytes": 25165824,
      "op_tensor_size": 25165824,
      "offset": 301989888
    },
    "past_key_values.6.value": {
      "packed_buffer_label": "ext_buf_0",
      "xrt_arg_id": 5,
      "dtype": "bfloat16",
      "shape": [
        1,
        32,
        4096,
        96
      ],
      "size_in_bytes": 25165824,
      "op_tensor_size": 25165824,
      "offset": 327155712
    },
    "present.6.key": {
      "packed_buffer_label": "ext_buf_0",
      "xrt_arg_id": 5,
      "dtype": "bfloat16",
      "shape": [
        1,
        32,
        4096,
        96
      ],
      "size_in_bytes": 25165824,
      "op_tensor_size": 25165824,
      "offset": 301989888
    },
    "present.6.value": {
      "packed_buffer_label": "ext_buf_0",
      "xrt_arg_id": 5,
      "dtype": "bfloat16",
      "shape": [
        1,
        32,
        4096,
        96
      ],
      "size_in_bytes": 25165824,
      "op_tensor_size": 25165824,
      "offset": 327155712
    },
    "past_key_values.7.key": {
      "packed_buffer_label": "ext_buf_0",
      "xrt_arg_id": 5,
      "dtype": "bfloat16",
      "shape": [
        1,
        32,
        4096,
        96
      ],
      "size_in_bytes": 25165824,
      "op_tensor_size": 25165824,
      "offset": 352321536
    },
    "past_key_values.7.value": {
      "packed_buffer_label": "ext_buf_0",
      "xrt_arg_id": 5,
      "dtype": "bfloat16",
      "shape": [
        1,
        32,
        4096,
        96
      ],
      "size_in_bytes": 25165824,
      "op_tensor_size": 25165824,
      "offset": 377487360
    },
    "present.7.key": {
      "packed_buffer_label": "ext_buf_0",
      "xrt_arg_id": 5,
      "dtype": "bfloat16",
      "shape": [
        1,
        32,
        4096,
        96
      ],
      "size_in_bytes": 25165824,
      "op_tensor_size": 25165824,
      "offset": 352321536
    },
    "present.7.value": {
      "packed_buffer_label": "ext_buf_0",
      "xrt_arg_id": 5,
      "dtype": "bfloat16",
      "shape": [
        1,
        32,
        4096,
        96
      ],
      "size_in_bytes": 25165824,
      "op_tensor_size": 25165824,
      "offset": 377487360
    },
    "past_key_values.8.key": {
      "packed_buffer_label": "ext_buf_0",
      "xrt_arg_id": 5,
      "dtype": "bfloat16",
      "shape": [
        1,
        32,
        4096,
        96
      ],
      "size_in_bytes": 25165824,
      "op_tensor_size": 25165824,
      "offset": 402653184
    },
    "past_key_values.8.value": {
      "packed_buffer_label": "ext_buf_0",
      "xrt_arg_id": 5,
      "dtype": "bfloat16",
      "shape": [
        1,
        32,
        4096,
        96
      ],
      "size_in_bytes": 25165824,
      "op_tensor_size": 25165824,
      "offset": 427819008
    },
    "present.8.key": {
      "packed_buffer_label": "ext_buf_0",
      "xrt_arg_id": 5,
      "dtype": "bfloat16",
      "shape": [
        1,
        32,
        4096,
        96
      ],
      "size_in_bytes": 25165824,
      "op_tensor_size": 25165824,
      "offset": 402653184
    },
    "present.8.value": {
      "packed_buffer_label": "ext_buf_0",
      "xrt_arg_id": 5,
      "dtype": "bfloat16",
      "shape": [
        1,
        32,
        4096,
        96
      ],
      "size_in_bytes": 25165824,
      "op_tensor_size": 25165824,
      "offset": 427819008
    },
    "past_key_values.9.key": {
      "packed_buffer_label": "ext_buf_0",
      "xrt_arg_id": 5,
      "dtype": "bfloat16",
      "shape": [
        1,
        32,
        4096,
        96
      ],
      "size_in_bytes": 25165824,
      "op_tensor_size": 25165824,
      "offset": 452984832
    },
    "past_key_values.9.value": {
      "packed_buffer_label": "ext_buf_0",
      "xrt_arg_id": 5,
      "dtype": "bfloat16",
      "shape": [
        1,
        32,
        4096,
        96
      ],
      "size_in_bytes": 25165824,
      "op_tensor_size": 25165824,
      "offset": 478150656
    },
    "present.9.key": {
      "packed_buffer_label": "ext_buf_0",
      "xrt_arg_id": 5,
      "dtype": "bfloat16",
      "shape": [
        1,
        32,
        4096,
        96
      ],
      "size_in_bytes": 25165824,
      "op_tensor_size": 25165824,
      "offset": 452984832
    },
    "present.9.value": {
      "packed_buffer_label": "ext_buf_0",
      "xrt_arg_id": 5,
      "dtype": "bfloat16",
      "shape": [
        1,
        32,
        4096,
        96
      ],
      "size_in_bytes": 25165824,
      "op_tensor_size": 25165824,
      "offset": 478150656
    },
    "past_key_values.10.key": {
      "packed_buffer_label": "ext_buf_0",
      "xrt_arg_id": 5,
      "dtype": "bfloat16",
      "shape": [
        1,
        32,
        4096,
        96
      ],
      "size_in_bytes": 25165824,
      "op_tensor_size": 25165824,
      "offset": 503316480
    },
    "past_key_values.10.value": {
      "packed_buffer_label": "ext_buf_0",
      "xrt_arg_id": 5,
      "dtype": "bfloat16",
      "shape": [
        1,
        32,
        4096,
        96
      ],
      "size_in_bytes": 25165824,
      "op_tensor_size": 25165824,
      "offset": 528482304
    },
    "present.10.key": {
      "packed_buffer_label": "ext_buf_0",
      "xrt_arg_id": 5,
      "dtype": "bfloat16",
      "shape": [
        1,
        32,
        4096,
        96
      ],
      "size_in_bytes": 25165824,
      "op_tensor_size": 25165824,
      "offset": 503316480
    },
    "present.10.value": {
      "packed_buffer_label": "ext_buf_0",
      "xrt_arg_id": 5,
      "dtype": "bfloat16",
      "shape": [
        1,
        32,
        4096,
        96
      ],
      "size_in_bytes": 25165824,
      "op_tensor_size": 25165824,
      "offset": 528482304
    },
    "past_key_values.11.key": {
      "packed_buffer_label": "ext_buf_0",
      "xrt_arg_id": 5,
      "dtype": "bfloat16",
      "shape": [
        1,
        32,
        4096,
        96
      ],
      "size_in_bytes": 25165824,
      "op_tensor_size": 25165824,
      "offset": 553648128
    },
    "past_key_values.11.value": {
      "packed_buffer_label": "ext_buf_0",
      "xrt_arg_id": 5,
      "dtype": "bfloat16",
      "shape": [
        1,
        32,
        4096,
        96
      ],
      "size_in_bytes": 25165824,
      "op_tensor_size": 25165824,
      "offset": 578813952
    },
    "present.11.key": {
      "packed_buffer_label": "ext_buf_0",
      "xrt_arg_id": 5,
      "dtype": "bfloat16",
      "shape": [
        1,
        32,
        4096,
        96
      ],
      "size_in_bytes": 25165824,
      "op_tensor_size": 25165824,
      "offset": 553648128
    },
    "present.11.value": {
      "packed_buffer_label": "ext_buf_0",
      "xrt_arg_id": 5,
      "dtype": "bfloat16",
      "shape": [
        1,
        32,
        4096,
        96
      ],
      "size_in_bytes": 25165824,
      "op_tensor_size": 25165824,
      "offset": 578813952
    },
    "past_key_values.12.key": {
      "packed_buffer_label": "ext_buf_0",
      "xrt_arg_id": 5,
      "dtype": "bfloat16",
      "shape": [
        1,
        32,
        4096,
        96
      ],
      "size_in_bytes": 25165824,
      "op_tensor_size": 25165824,
      "offset": 603979776
    },
    "past_key_values.12.value": {
      "packed_buffer_label": "ext_buf_0",
      "xrt_arg_id": 5,
      "dtype": "bfloat16",
      "shape": [
        1,
        32,
        4096,
        96
      ],
      "size_in_bytes": 25165824,
      "op_tensor_size": 25165824,
      "offset": 629145600
    },
    "present.12.key": {
      "packed_buffer_label": "ext_buf_0",
      "xrt_arg_id": 5,
      "dtype": "bfloat16",
      "shape": [
        1,
        32,
        4096,
        96
      ],
      "size_in_bytes": 25165824,
      "op_tensor_size": 25165824,
      "offset": 603979776
    },
    "present.12.value": {
      "packed_buffer_label": "ext_buf_0",
      "xrt_arg_id": 5,
      "dtype": "bfloat16",
      "shape": [
        1,
        32,
        4096,
        96
      ],
      "size_in_bytes": 25165824,
      "op_tensor_size": 25165824,
      "offset": 629145600
    },
    "past_key_values.13.key": {
      "packed_buffer_label": "ext_buf_0",
      "xrt_arg_id": 5,
      "dtype": "bfloat16",
      "shape": [
        1,
        32,
        4096,
        96
      ],
      "size_in_bytes": 25165824,
      "op_tensor_size": 25165824,
      "offset": 654311424
    },
    "past_key_values.13.value": {
      "packed_buffer_label": "ext_buf_0",
      "xrt_arg_id": 5,
      "dtype": "bfloat16",
      "shape": [
        1,
        32,
        4096,
        96
      ],
      "size_in_bytes": 25165824,
      "op_tensor_size": 25165824,
      "offset": 679477248
    },
    "present.13.key": {
      "packed_buffer_label": "ext_buf_0",
      "xrt_arg_id": 5,
      "dtype": "bfloat16",
      "shape": [
        1,
        32,
        4096,
        96
      ],
      "size_in_bytes": 25165824,
      "op_tensor_size": 25165824,
      "offset": 654311424
    },
    "present.13.value": {
      "packed_buffer_label": "ext_buf_0",
      "xrt_arg_id": 5,
      "dtype": "bfloat16",
      "shape": [
        1,
        32,
        4096,
        96
      ],
      "size_in_bytes": 25165824,
      "op_tensor_size": 25165824,
      "offset": 679477248
    },
    "past_key_values.14.key": {
      "packed_buffer_label": "ext_buf_0",
      "xrt_arg_id": 5,
      "dtype": "bfloat16",
      "shape": [
        1,
        32,
        4096,
        96
      ],
      "size_in_bytes": 25165824,
      "op_tensor_size": 25165824,
      "offset": 704643072
    },
    "past_key_values.14.value": {
      "packed_buffer_label": "ext_buf_0",
      "xrt_arg_id": 5,
      "dtype": "bfloat16",
      "shape": [
        1,
        32,
        4096,
        96
      ],
      "size_in_bytes": 25165824,
      "op_tensor_size": 25165824,
      "offset": 729808896
    },
    "present.14.key": {
      "packed_buffer_label": "ext_buf_0",
      "xrt_arg_id": 5,
      "dtype": "bfloat16",
      "shape": [
        1,
        32,
        4096,
        96
      ],
      "size_in_bytes": 25165824,
      "op_tensor_size": 25165824,
      "offset": 704643072
    },
    "present.14.value": {
      "packed_buffer_label": "ext_buf_0",
      "xrt_arg_id": 5,
      "dtype": "bfloat16",
      "shape": [
        1,
        32,
        4096,
        96
      ],
      "size_in_bytes": 25165824,
      "op_tensor_size": 25165824,
      "offset": 729808896
    },
    "past_key_values.15.key": {
      "packed_buffer_label": "ext_buf_0",
      "xrt_arg_id": 5,
      "dtype": "bfloat16",
      "shape": [
        1,
        32,
        4096,
        96
      ],
      "size_in_bytes": 25165824,
      "op_tensor_size": 25165824,
      "offset": 754974720
    },
    "past_key_values.15.value": {
      "packed_buffer_label": "ext_buf_0",
      "xrt_arg_id": 5,
      "dtype": "bfloat16",
      "shape": [
        1,
        32,
        4096,
        96
      ],
      "size_in_bytes": 25165824,
      "op_tensor_size": 25165824,
      "offset": 780140544
    },
    "present.15.key": {
      "packed_buffer_label": "ext_buf_0",
      "xrt_arg_id": 5,
      "dtype": "bfloat16",
      "shape": [
        1,
        32,
        4096,
        96
      ],
      "size_in_bytes": 25165824,
      "op_tensor_size": 25165824,
      "offset": 754974720
    },
    "present.15.value": {
      "packed_buffer_label": "ext_buf_0",
      "xrt_arg_id": 5,
      "dtype": "bfloat16",
      "shape": [
        1,
        32,
        4096,
        96
      ],
      "size_in_bytes": 25165824,
      "op_tensor_size": 25165824,
      "offset": 780140544
    },
    "past_key_values.16.key": {
      "packed_buffer_label": "ext_buf_0",
      "xrt_arg_id": 5,
      "dtype": "bfloat16",
      "shape": [
        1,
        32,
        4096,
        96
      ],
      "size_in_bytes": 25165824,
      "op_tensor_size": 25165824,
      "offset": 805306368
    },
    "past_key_values.16.value": {
      "packed_buffer_label": "ext_buf_0",
      "xrt_arg_id": 5,
      "dtype": "bfloat16",
      "shape": [
        1,
        32,
        4096,
        96
      ],
      "size_in_bytes": 25165824,
      "op_tensor_size": 25165824,
      "offset": 830472192
    },
    "present.16.key": {
      "packed_buffer_label": "ext_buf_0",
      "xrt_arg_id": 5,
      "dtype": "bfloat16",
      "shape": [
        1,
        32,
        4096,
        96
      ],
      "size_in_bytes": 25165824,
      "op_tensor_size": 25165824,
      "offset": 805306368
    },
    "present.16.value": {
      "packed_buffer_label": "ext_buf_0",
      "xrt_arg_id": 5,
      "dtype": "bfloat16",
      "shape": [
        1,
        32,
        4096,
        96
      ],
      "size_in_bytes": 25165824,
      "op_tensor_size": 25165824,
      "offset": 830472192
    },
    "past_key_values.17.key": {
      "packed_buffer_label": "ext_buf_0",
      "xrt_arg_id": 5,
      "dtype": "bfloat16",
      "shape": [
        1,
        32,
        4096,
        96
      ],
      "size_in_bytes": 25165824,
      "op_tensor_size": 25165824,
      "offset": 855638016
    },
    "past_key_values.17.value": {
      "packed_buffer_label": "ext_buf_0",
      "xrt_arg_id": 5,
      "dtype": "bfloat16",
      "shape": [
        1,
        32,
        4096,
        96
      ],
      "size_in_bytes": 25165824,
      "op_tensor_size": 25165824,
      "offset": 880803840
    },
    "present.17.key": {
      "packed_buffer_label": "ext_buf_0",
      "xrt_arg_id": 5,
      "dtype": "bfloat16",
      "shape": [
        1,
        32,
        4096,
        96
      ],
      "size_in_bytes": 25165824,
      "op_tensor_size": 25165824,
      "offset": 855638016
    },
    "present.17.value": {
      "packed_buffer_label": "ext_buf_0",
      "xrt_arg_id": 5,
      "dtype": "bfloat16",
      "shape": [
        1,
        32,
        4096,
        96
      ],
      "size_in_bytes": 25165824,
      "op_tensor_size": 25165824,
      "offset": 880803840
    },
    "past_key_values.18.key": {
      "packed_buffer_label": "ext_buf_0",
      "xrt_arg_id": 5,
      "dtype": "bfloat16",
      "shape": [
        1,
        32,
        4096,
        96
      ],
      "size_in_bytes": 25165824,
      "op_tensor_size": 25165824,
      "offset": 905969664
    },
    "past_key_values.18.value": {
      "packed_buffer_label": "ext_buf_0",
      "xrt_arg_id": 5,
      "dtype": "bfloat16",
      "shape": [
        1,
        32,
        4096,
        96
      ],
      "size_in_bytes": 25165824,
      "op_tensor_size": 25165824,
      "offset": 931135488
    },
    "present.18.key": {
      "packed_buffer_label": "ext_buf_0",
      "xrt_arg_id": 5,
      "dtype": "bfloat16",
      "shape": [
        1,
        32,
        4096,
        96
      ],
      "size_in_bytes": 25165824,
      "op_tensor_size": 25165824,
      "offset": 905969664
    },
    "present.18.value": {
      "packed_buffer_label": "ext_buf_0",
      "xrt_arg_id": 5,
      "dtype": "bfloat16",
      "shape": [
        1,
        32,
        4096,
        96
      ],
      "size_in_bytes": 25165824,
      "op_tensor_size": 25165824,
      "offset": 931135488
    },
    "past_key_values.19.key": {
      "packed_buffer_label": "ext_buf_0",
      "xrt_arg_id": 5,
      "dtype": "bfloat16",
      "shape": [
        1,
        32,
        4096,
        96
      ],
      "size_in_bytes": 25165824,
      "op_tensor_size": 25165824,
      "offset": 956301312
    },
    "past_key_values.19.value": {
      "packed_buffer_label": "ext_buf_0",
      "xrt_arg_id": 5,
      "dtype": "bfloat16",
      "shape": [
        1,
        32,
        4096,
        96
      ],
      "size_in_bytes": 25165824,
      "op_tensor_size": 25165824,
      "offset": 981467136
    },
    "present.19.key": {
      "packed_buffer_label": "ext_buf_0",
      "xrt_arg_id": 5,
      "dtype": "bfloat16",
      "shape": [
        1,
        32,
        4096,
        96
      ],
      "size_in_bytes": 25165824,
      "op_tensor_size": 25165824,
      "offset": 956301312
    },
    "present.19.value": {
      "packed_buffer_label": "ext_buf_0",
      "xrt_arg_id": 5,
      "dtype": "bfloat16",
      "shape": [
        1,
        32,
        4096,
        96
      ],
      "size_in_bytes": 25165824,
      "op_tensor_size": 25165824,
      "offset": 981467136
    },
    "past_key_values.20.key": {
      "packed_buffer_label": "ext_buf_0",
      "xrt_arg_id": 5,
      "dtype": "bfloat16",
      "shape": [
        1,
        32,
        4096,
        96
      ],
      "size_in_bytes": 25165824,
      "op_tensor_size": 25165824,
      "offset": 1006632960
    },
    "past_key_values.20.value": {
      "packed_buffer_label": "ext_buf_0",
      "xrt_arg_id": 5,
      "dtype": "bfloat16",
      "shape": [
        1,
        32,
        4096,
        96
      ],
      "size_in_bytes": 25165824,
      "op_tensor_size": 25165824,
      "offset": 1031798784
    },
    "present.20.key": {
      "packed_buffer_label": "ext_buf_0",
      "xrt_arg_id": 5,
      "dtype": "bfloat16",
      "shape": [
        1,
        32,
        4096,
        96
      ],
      "size_in_bytes": 25165824,
      "op_tensor_size": 25165824,
      "offset": 1006632960
    },
    "present.20.value": {
      "packed_buffer_label": "ext_buf_0",
      "xrt_arg_id": 5,
      "dtype": "bfloat16",
      "shape": [
        1,
        32,
        4096,
        96
      ],
      "size_in_bytes": 25165824,
      "op_tensor_size": 25165824,
      "offset": 1031798784
    },
    "past_key_values.21.key": {
      "packed_buffer_label": "ext_buf_0",
      "xrt_arg_id": 5,
      "dtype": "bfloat16",
      "shape": [
        1,
        32,
        4096,
        96
      ],
      "size_in_bytes": 25165824,
      "op_tensor_size": 25165824,
      "offset": 1056964608
    },
    "past_key_values.21.value": {
      "packed_buffer_label": "ext_buf_0",
      "xrt_arg_id": 5,
      "dtype": "bfloat16",
      "shape": [
        1,
        32,
        4096,
        96
      ],
      "size_in_bytes": 25165824,
      "op_tensor_size": 25165824,
      "offset": 1082130432
    },
    "present.21.key": {
      "packed_buffer_label": "ext_buf_0",
      "xrt_arg_id": 5,
      "dtype": "bfloat16",
      "shape": [
        1,
        32,
        4096,
        96
      ],
      "size_in_bytes": 25165824,
      "op_tensor_size": 25165824,
      "offset": 1056964608
    },
    "present.21.value": {
      "packed_buffer_label": "ext_buf_0",
      "xrt_arg_id": 5,
      "dtype": "bfloat16",
      "shape": [
        1,
        32,
        4096,
        96
      ],
      "size_in_bytes": 25165824,
      "op_tensor_size": 25165824,
      "offset": 1082130432
    },
    "past_key_values.22.key": {
      "packed_buffer_label": "ext_buf_0",
      "xrt_arg_id": 5,
      "dtype": "bfloat16",
      "shape": [
        1,
        32,
        4096,
        96
      ],
      "size_in_bytes": 25165824,
      "op_tensor_size": 25165824,
      "offset": 1107296256
    },
    "past_key_values.22.value": {
      "packed_buffer_label": "ext_buf_0",
      "xrt_arg_id": 5,
      "dtype": "bfloat16",
      "shape": [
        1,
        32,
        4096,
        96
      ],
      "size_in_bytes": 25165824,
      "op_tensor_size": 25165824,
      "offset": 1132462080
    },
    "present.22.key": {
      "packed_buffer_label": "ext_buf_0",
      "xrt_arg_id": 5,
      "dtype": "bfloat16",
      "shape": [
        1,
        32,
        4096,
        96
      ],
      "size_in_bytes": 25165824,
      "op_tensor_size": 25165824,
      "offset": 1107296256
    },
    "present.22.value": {
      "packed_buffer_label": "ext_buf_0",
      "xrt_arg_id": 5,
      "dtype": "bfloat16",
      "shape": [
        1,
        32,
        4096,
        96
      ],
      "size_in_bytes": 25165824,
      "op_tensor_size": 25165824,
      "offset": 1132462080
    },
    "past_key_values.23.key": {
      "packed_buffer_label": "ext_buf_0",
      "xrt_arg_id": 5,
      "dtype": "bfloat16",
      "shape": [
        1,
        32,
        4096,
        96
      ],
      "size_in_bytes": 25165824,
      "op_tensor_size": 25165824,
      "offset": 1157627904
    },
    "past_key_values.23.value": {
      "packed_buffer_label": "ext_buf_0",
      "xrt_arg_id": 5,
      "dtype": "bfloat16",
      "shape": [
        1,
        32,
        4096,
        96
      ],
      "size_in_bytes": 25165824,
      "op_tensor_size": 25165824,
      "offset": 1182793728
    },
    "present.23.key": {
      "packed_buffer_label": "ext_buf_0",
      "xrt_arg_id": 5,
      "dtype": "bfloat16",
      "shape": [
        1,
        32,
        4096,
        96
      ],
      "size_in_bytes": 25165824,
      "op_tensor_size": 25165824,
      "offset": 1157627904
    },
    "present.23.value": {
      "packed_buffer_label": "ext_buf_0",
      "xrt_arg_id": 5,
      "dtype": "bfloat16",
      "shape": [
        1,
        32,
        4096,
        96
      ],
      "size_in_bytes": 25165824,
      "op_tensor_size": 25165824,
      "offset": 1182793728
    },
    "past_key_values.24.key": {
      "packed_buffer_label": "ext_buf_0",
      "xrt_arg_id": 5,
      "dtype": "bfloat16",
      "shape": [
        1,
        32,
        4096,
        96
      ],
      "size_in_bytes": 25165824,
      "op_tensor_size": 25165824,
      "offset": 1207959552
    },
    "past_key_values.24.value": {
      "packed_buffer_label": "ext_buf_0",
      "xrt_arg_id": 5,
      "dtype": "bfloat16",
      "shape": [
        1,
        32,
        4096,
        96
      ],
      "size_in_bytes": 25165824,
      "op_tensor_size": 25165824,
      "offset": 1233125376
    },
    "present.24.key": {
      "packed_buffer_label": "ext_buf_0",
      "xrt_arg_id": 5,
      "dtype": "bfloat16",
      "shape": [
        1,
        32,
        4096,
        96
      ],
      "size_in_bytes": 25165824,
      "op_tensor_size": 25165824,
      "offset": 1207959552
    },
    "present.24.value": {
      "packed_buffer_label": "ext_buf_0",
      "xrt_arg_id": 5,
      "dtype": "bfloat16",
      "shape": [
        1,
        32,
        4096,
        96
      ],
      "size_in_bytes": 25165824,
      "op_tensor_size": 25165824,
      "offset": 1233125376
    },
    "past_key_values.25.key": {
      "packed_buffer_label": "ext_buf_0",
      "xrt_arg_id": 5,
      "dtype": "bfloat16",
      "shape": [
        1,
        32,
        4096,
        96
      ],
      "size_in_bytes": 25165824,
      "op_tensor_size": 25165824,
      "offset": 1258291200
    },
    "past_key_values.25.value": {
      "packed_buffer_label": "ext_buf_0",
      "xrt_arg_id": 5,
      "dtype": "bfloat16",
      "shape": [
        1,
        32,
        4096,
        96
      ],
      "size_in_bytes": 25165824,
      "op_tensor_size": 25165824,
      "offset": 1283457024
    },
    "present.25.key": {
      "packed_buffer_label": "ext_buf_0",
      "xrt_arg_id": 5,
      "dtype": "bfloat16",
      "shape": [
        1,
        32,
        4096,
        96
      ],
      "size_in_bytes": 25165824,
      "op_tensor_size": 25165824,
      "offset": 1258291200
    },
    "present.25.value": {
      "packed_buffer_label": "ext_buf_0",
      "xrt_arg_id": 5,
      "dtype": "bfloat16",
      "shape": [
        1,
        32,
        4096,
        96
      ],
      "size_in_bytes": 25165824,
      "op_tensor_size": 25165824,
      "offset": 1283457024
    },
    "past_key_values.26.key": {
      "packed_buffer_label": "ext_buf_0",
      "xrt_arg_id": 5,
      "dtype": "bfloat16",
      "shape": [
        1,
        32,
        4096,
        96
      ],
      "size_in_bytes": 25165824,
      "op_tensor_size": 25165824,
      "offset": 1308622848
    },
    "past_key_values.26.value": {
      "packed_buffer_label": "ext_buf_0",
      "xrt_arg_id": 5,
      "dtype": "bfloat16",
      "shape": [
        1,
        32,
        4096,
        96
      ],
      "size_in_bytes": 25165824,
      "op_tensor_size": 25165824,
      "offset": 1333788672
    },
    "present.26.key": {
      "packed_buffer_label": "ext_buf_0",
      "xrt_arg_id": 5,
      "dtype": "bfloat16",
      "shape": [
        1,
        32,
        4096,
        96
      ],
      "size_in_bytes": 25165824,
      "op_tensor_size": 25165824,
      "offset": 1308622848
    },
    "present.26.value": {
      "packed_buffer_label": "ext_buf_0",
      "xrt_arg_id": 5,
      "dtype": "bfloat16",
      "shape": [
        1,
        32,
        4096,
        96
      ],
      "size_in_bytes": 25165824,
      "op_tensor_size": 25165824,
      "offset": 1333788672
    },
    "past_key_values.27.key": {
      "packed_buffer_label": "ext_buf_0",
      "xrt_arg_id": 5,
      "dtype": "bfloat16",
      "shape": [
        1,
        32,
        4096,
        96
      ],
      "size_in_bytes": 25165824,
      "op_tensor_size": 25165824,
      "offset": 1358954496
    },
    "past_key_values.27.value": {
      "packed_buffer_label": "ext_buf_0",
      "xrt_arg_id": 5,
      "dtype": "bfloat16",
      "shape": [
        1,
        32,
        4096,
        96
      ],
      "size_in_bytes": 25165824,
      "op_tensor_size": 25165824,
      "offset": 1384120320
    },
    "present.27.key": {
      "packed_buffer_label": "ext_buf_0",
      "xrt_arg_id": 5,
      "dtype": "bfloat16",
      "shape": [
        1,
        32,
        4096,
        96
      ],
      "size_in_bytes": 25165824,
      "op_tensor_size": 25165824,
      "offset": 1358954496
    },
    "present.27.value": {
      "packed_buffer_label": "ext_buf_0",
      "xrt_arg_id": 5,
      "dtype": "bfloat16",
      "shape": [
        1,
        32,
        4096,
        96
      ],
      "size_in_bytes": 25165824,
      "op_tensor_size": 25165824,
      "offset": 1384120320
    },
    "past_key_values.28.key": {
      "packed_buffer_label": "ext_buf_0",
      "xrt_arg_id": 5,
      "dtype": "bfloat16",
      "shape": [
        1,
        32,
        4096,
        96
      ],
      "size_in_bytes": 25165824,
      "op_tensor_size": 25165824,
      "offset": 1409286144
    },
    "past_key_values.28.value": {
      "packed_buffer_label": "ext_buf_0",
      "xrt_arg_id": 5,
      "dtype": "bfloat16",
      "shape": [
        1,
        32,
        4096,
        96
      ],
      "size_in_bytes": 25165824,
      "op_tensor_size": 25165824,
      "offset": 1434451968
    },
    "present.28.key": {
      "packed_buffer_label": "ext_buf_0",
      "xrt_arg_id": 5,
      "dtype": "bfloat16",
      "shape": [
        1,
        32,
        4096,
        96
      ],
      "size_in_bytes": 25165824,
      "op_tensor_size": 25165824,
      "offset": 1409286144
    },
    "present.28.value": {
      "packed_buffer_label": "ext_buf_0",
      "xrt_arg_id": 5,
      "dtype": "bfloat16",
      "shape": [
        1,
        32,
        4096,
        96
      ],
      "size_in_bytes": 25165824,
      "op_tensor_size": 25165824,
      "offset": 1434451968
    },
    "past_key_values.29.key": {
      "packed_buffer_label": "ext_buf_0",
      "xrt_arg_id": 5,
      "dtype": "bfloat16",
      "shape": [
        1,
        32,
        4096,
        96
      ],
      "size_in_bytes": 25165824,
      "op_tensor_size": 25165824,
      "offset": 1459617792
    },
    "past_key_values.29.value": {
      "packed_buffer_label": "ext_buf_0",
      "xrt_arg_id": 5,
      "dtype": "bfloat16",
      "shape": [
        1,
        32,
        4096,
        96
      ],
      "size_in_bytes": 25165824,
      "op_tensor_size": 25165824,
      "offset": 1484783616
    },
    "present.29.key": {
      "packed_buffer_label": "ext_buf_0",
      "xrt_arg_id": 5,
      "dtype": "bfloat16",
      "shape": [
        1,
        32,
        4096,
        96
      ],
      "size_in_bytes": 25165824,
      "op_tensor_size": 25165824,
      "offset": 1459617792
    },
    "present.29.value": {
      "packed_buffer_label": "ext_buf_0",
      "xrt_arg_id": 5,
      "dtype": "bfloat16",
      "shape": [
        1,
        32,
        4096,
        96
      ],
      "size_in_bytes": 25165824,
      "op_tensor_size": 25165824,
      "offset": 1484783616
    },
    "past_key_values.30.key": {
      "packed_buffer_label": "ext_buf_0",
      "xrt_arg_id": 5,
      "dtype": "bfloat16",
      "shape": [
        1,
        32,
        4096,
        96
      ],
      "size_in_bytes": 25165824,
      "op_tensor_size": 25165824,
      "offset": 1509949440
    },
    "past_key_values.30.value": {
      "packed_buffer_label": "ext_buf_0",
      "xrt_arg_id": 5,
      "dtype": "bfloat16",
      "shape": [
        1,
        32,
        4096,
        96
      ],
      "size_in_bytes": 25165824,
      "op_tensor_size": 25165824,
      "offset": 1535115264
    },
    "present.30.key": {
      "packed_buffer_label": "ext_buf_0",
      "xrt_arg_id": 5,
      "dtype": "bfloat16",
      "shape": [
        1,
        32,
        4096,
        96
      ],
      "size_in_bytes": 25165824,
      "op_tensor_size": 25165824,
      "offset": 1509949440
    },
    "present.30.value": {
      "packed_buffer_label": "ext_buf_0",
      "xrt_arg_id": 5,
      "dtype": "bfloat16",
      "shape": [
        1,
        32,
        4096,
        96
      ],
      "size_in_bytes": 25165824,
      "op_tensor_size": 25165824,
      "offset": 1535115264
    },
    "past_key_values.31.key": {
      "packed_buffer_label": "ext_buf_0",
      "xrt_arg_id": 5,
      "dtype": "bfloat16",
      "shape": [
        1,
        32,
        4096,
        96
      ],
      "size_in_bytes": 25165824,
      "op_tensor_size": 25165824,
      "offset": 1560281088
    },
    "past_key_values.31.value": {
      "packed_buffer_label": "ext_buf_0",
      "xrt_arg_id": 5,
      "dtype": "bfloat16",
      "shape": [
        1,
        32,
        4096,
        96
      ],
      "size_in_bytes": 25165824,
      "op_tensor_size": 25165824,
      "offset": 1585446912
    },
    "present.31.key": {
      "packed_buffer_label": "ext_buf_0",
      "xrt_arg_id": 5,
      "dtype": "bfloat16",
      "shape": [
        1,
        32,
        4096,
        96
      ],
      "size_in_bytes": 25165824,
      "op_tensor_size": 25165824,
      "offset": 1560281088
    },
    "present.31.value": {
      "packed_buffer_label": "ext_buf_0",
      "xrt_arg_id": 5,
      "dtype": "bfloat16",
      "shape": [
        1,
        32,
        4096,
        96
      ],
      "size_in_bytes": 25165824,
      "op_tensor_size": 25165824,
      "offset": 1585446912
    },
    "sin_cos_cache_token": {
      "packed_buffer_label": "ext_buf_1",
      "xrt_arg_id": 6,
      "dtype": "bfloat16",
      "shape": [
        135168,
        96
      ],
      "size_in_bytes": 25952256,
      "op_tensor_size": 25952256,
      "offset": 0
    }
  },
  "aux_info": {
    "is_llm": true
  }
}