diff --git "a/residual_lm_mf_f16.mlmodelc/model.mil" "b/residual_lm_mf_f16.mlmodelc/model.mil" new file mode 100644--- /dev/null +++ "b/residual_lm_mf_f16.mlmodelc/model.mil" @@ -0,0 +1,6916 @@ +program(1.3) +[buildInfo = dict({{"coremlc-component-MIL", "3405.2.1"}, {"coremlc-version", "3405.2.1"}})] +{ + func length_1(tensor inputs_embeds, state> key_cache, tensor position_id, state> value_cache) { + tensor var_80 = const()[name = string("op_80"), val = tensor([[0]])]; + tensor position_ids_1 = add(x = var_80, y = position_id)[name = string("position_ids_1")]; + int32 cos_batch_dims_0 = const()[name = string("cos_batch_dims_0"), val = int32(0)]; + bool cos_validate_indices_0 = const()[name = string("cos_validate_indices_0"), val = bool(false)]; + tensor layer_rope_emb_cos_cached_to_fp16 = const()[name = string("layer_rope_emb_cos_cached_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64)))]; + int32 greater_equal_0_y_0 = const()[name = string("greater_equal_0_y_0"), val = int32(0)]; + tensor greater_equal_0 = greater_equal(x = position_ids_1, y = greater_equal_0_y_0)[name = string("greater_equal_0")]; + int32 slice_by_index_0 = const()[name = string("slice_by_index_0"), val = int32(32768)]; + tensor add_0 = add(x = position_ids_1, y = slice_by_index_0)[name = string("add_0")]; + tensor select_0 = select(a = position_ids_1, b = add_0, cond = greater_equal_0)[name = string("select_0")]; + int32 cos_cast_fp16_axis_0 = const()[name = string("cos_cast_fp16_axis_0"), val = int32(0)]; + tensor cos_cast_fp16 = gather(axis = cos_cast_fp16_axis_0, batch_dims = cos_batch_dims_0, indices = select_0, validate_indices = cos_validate_indices_0, x = layer_rope_emb_cos_cached_to_fp16)[name = string("cos_cast_fp16")]; + int32 sin_batch_dims_0 = const()[name = string("sin_batch_dims_0"), val = int32(0)]; + bool sin_validate_indices_0 = const()[name = string("sin_validate_indices_0"), val = bool(false)]; + tensor layer_rope_emb_sin_cached_to_fp16 = const()[name = string("layer_rope_emb_sin_cached_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(4194432)))]; + int32 sin_cast_fp16_axis_0 = const()[name = string("sin_cast_fp16_axis_0"), val = int32(0)]; + tensor sin_cast_fp16 = gather(axis = sin_cast_fp16_axis_0, batch_dims = sin_batch_dims_0, indices = select_0, validate_indices = sin_validate_indices_0, x = layer_rope_emb_sin_cached_to_fp16)[name = string("sin_cast_fp16")]; + tensor var_105 = const()[name = string("op_105"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(8388800)))]; + tensor var_112_axes_0 = const()[name = string("op_112_axes_0"), val = tensor([2])]; + tensor var_112 = expand_dims(axes = var_112_axes_0, x = position_ids_1)[name = string("op_112")]; + tensor var_113 = greater(x = var_105, y = var_112)[name = string("op_113")]; + tensor attention_mask_3_axes_0 = const()[name = string("attention_mask_3_axes_0"), val = tensor([1])]; + string attention_mask_1_to_fp16_dtype_0 = const()[name = string("attention_mask_1_to_fp16_dtype_0"), val = string("fp16")]; + tensor var_113_to_fp16 = cast(dtype = attention_mask_1_to_fp16_dtype_0, x = var_113)[name = string("cast_65")]; + tensor attention_mask_3_cast_fp16 = expand_dims(axes = attention_mask_3_axes_0, x = var_113_to_fp16)[name = string("attention_mask_3_cast_fp16")]; + fp16 var_121_promoted_to_fp16 = const()[name = string("op_121_promoted_to_fp16"), val = fp16(0x0p+0)]; + tensor var_122_cast_fp16 = equal(x = attention_mask_3_cast_fp16, y = var_121_promoted_to_fp16)[name = string("op_122_cast_fp16")]; + tensor var_123_after_broadcast_to_fp16 = const()[name = string("op_123_after_broadcast_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(8397056)))]; + tensor attention_mask_cast_fp16 = select(a = attention_mask_3_cast_fp16, b = var_123_after_broadcast_to_fp16, cond = var_122_cast_fp16)[name = string("attention_mask_cast_fp16")]; + tensor cos_1_perm_0 = const()[name = string("cos_1_perm_0"), val = tensor([0, -1, -2])]; + tensor sin_1_perm_0 = const()[name = string("sin_1_perm_0"), val = tensor([0, -1, -2])]; + tensor attn_mask_perm_0 = const()[name = string("attn_mask_perm_0"), val = tensor([0, 1, -1, -2])]; + int32 var_138 = const()[name = string("op_138"), val = int32(1)]; + fp16 const_4_promoted_to_fp16 = const()[name = string("const_4_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_141_cast_fp16 = mul(x = inputs_embeds, y = const_4_promoted_to_fp16)[name = string("op_141_cast_fp16")]; + bool x_1_interleave_0 = const()[name = string("x_1_interleave_0"), val = bool(false)]; + tensor x_1_cast_fp16 = concat(axis = var_138, interleave = x_1_interleave_0, values = (inputs_embeds, var_141_cast_fp16))[name = string("x_1_cast_fp16")]; + tensor out_1_axes_0 = const()[name = string("out_1_axes_0"), val = tensor([1])]; + fp16 var_151_to_fp16 = const()[name = string("op_151_to_fp16"), val = fp16(0x1.5p-17)]; + tensor out_1_cast_fp16 = layer_norm(axes = out_1_axes_0, epsilon = var_151_to_fp16, x = x_1_cast_fp16)[name = string("out_1_cast_fp16")]; + tensor layer_layers_0_input_layernorm_weight_to_fp16 = const()[name = string("layer_layers_0_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(8401216)))]; + tensor out_3_cast_fp16 = mul(x = out_1_cast_fp16, y = layer_layers_0_input_layernorm_weight_to_fp16)[name = string("out_3_cast_fp16")]; + tensor var_157_split_sizes_0 = const()[name = string("op_157_split_sizes_0"), val = tensor([1024, 1024])]; + int32 var_157_axis_0 = const()[name = string("op_157_axis_0"), val = int32(1)]; + tensor var_157_cast_fp16_0, tensor var_157_cast_fp16_1 = split(axis = var_157_axis_0, split_sizes = var_157_split_sizes_0, x = out_3_cast_fp16)[name = string("op_157_cast_fp16")]; + tensor q_len_1 = const()[name = string("q_len_1"), val = tensor([1])]; + string query_states_1_pad_type_0 = const()[name = string("query_states_1_pad_type_0"), val = string("valid")]; + tensor query_states_1_strides_0 = const()[name = string("query_states_1_strides_0"), val = tensor([1, 1])]; + tensor query_states_1_pad_0 = const()[name = string("query_states_1_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor query_states_1_dilations_0 = const()[name = string("query_states_1_dilations_0"), val = tensor([1, 1])]; + int32 query_states_1_groups_0 = const()[name = string("query_states_1_groups_0"), val = int32(1)]; + tensor var_179_to_fp16 = const()[name = string("op_179_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(8405376)))]; + tensor query_states_1_cast_fp16 = conv(dilations = query_states_1_dilations_0, groups = query_states_1_groups_0, pad = query_states_1_pad_0, pad_type = query_states_1_pad_type_0, strides = query_states_1_strides_0, weight = var_179_to_fp16, x = var_157_cast_fp16_0)[name = string("query_states_1_cast_fp16")]; + string key_states_1_pad_type_0 = const()[name = string("key_states_1_pad_type_0"), val = string("valid")]; + tensor key_states_1_strides_0 = const()[name = string("key_states_1_strides_0"), val = tensor([1, 1])]; + tensor key_states_1_pad_0 = const()[name = string("key_states_1_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor key_states_1_dilations_0 = const()[name = string("key_states_1_dilations_0"), val = tensor([1, 1])]; + int32 key_states_1_groups_0 = const()[name = string("key_states_1_groups_0"), val = int32(1)]; + tensor var_190_to_fp16 = const()[name = string("op_190_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(10502592)))]; + tensor key_states_1_cast_fp16 = conv(dilations = key_states_1_dilations_0, groups = key_states_1_groups_0, pad = key_states_1_pad_0, pad_type = key_states_1_pad_type_0, strides = key_states_1_strides_0, weight = var_190_to_fp16, x = var_157_cast_fp16_0)[name = string("key_states_1_cast_fp16")]; + string value_states_1_pad_type_0 = const()[name = string("value_states_1_pad_type_0"), val = string("valid")]; + tensor value_states_1_strides_0 = const()[name = string("value_states_1_strides_0"), val = tensor([1, 1])]; + tensor value_states_1_pad_0 = const()[name = string("value_states_1_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor value_states_1_dilations_0 = const()[name = string("value_states_1_dilations_0"), val = tensor([1, 1])]; + int32 value_states_1_groups_0 = const()[name = string("value_states_1_groups_0"), val = int32(1)]; + tensor var_201_to_fp16 = const()[name = string("op_201_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(10764800)))]; + tensor value_states_1_cast_fp16 = conv(dilations = value_states_1_dilations_0, groups = value_states_1_groups_0, pad = value_states_1_pad_0, pad_type = value_states_1_pad_type_0, strides = value_states_1_strides_0, weight = var_201_to_fp16, x = var_157_cast_fp16_0)[name = string("value_states_1_cast_fp16")]; + tensor var_209 = const()[name = string("op_209"), val = tensor([1, 16, 64, 1])]; + tensor embed_1_cast_fp16 = reshape(shape = var_209, x = query_states_1_cast_fp16)[name = string("embed_1_cast_fp16")]; + tensor var_213 = const()[name = string("op_213"), val = tensor([1, 2, 64, 1])]; + tensor var_214_cast_fp16 = reshape(shape = var_213, x = key_states_1_cast_fp16)[name = string("op_214_cast_fp16")]; + tensor embed_3_perm_0 = const()[name = string("embed_3_perm_0"), val = tensor([0, 1, 3, 2])]; + tensor var_220 = const()[name = string("op_220"), val = tensor([1, 2, 64, 1])]; + tensor var_221_cast_fp16 = reshape(shape = var_220, x = value_states_1_cast_fp16)[name = string("op_221_cast_fp16")]; + tensor value_states_3_perm_0 = const()[name = string("value_states_3_perm_0"), val = tensor([0, 1, 3, 2])]; + tensor cos_1_cast_fp16 = transpose(perm = cos_1_perm_0, x = cos_cast_fp16)[name = string("transpose_20")]; + tensor var_225_cast_fp16 = mul(x = embed_1_cast_fp16, y = cos_1_cast_fp16)[name = string("op_225_cast_fp16")]; + tensor var_226_split_sizes_0 = const()[name = string("op_226_split_sizes_0"), val = tensor([32, 32])]; + int32 var_226_axis_0 = const()[name = string("op_226_axis_0"), val = int32(-2)]; + tensor var_226_cast_fp16_0, tensor var_226_cast_fp16_1 = split(axis = var_226_axis_0, split_sizes = var_226_split_sizes_0, x = embed_1_cast_fp16)[name = string("op_226_cast_fp16")]; + fp16 const_7_promoted_to_fp16 = const()[name = string("const_7_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_228_cast_fp16 = mul(x = var_226_cast_fp16_1, y = const_7_promoted_to_fp16)[name = string("op_228_cast_fp16")]; + int32 var_230 = const()[name = string("op_230"), val = int32(-2)]; + bool var_231_interleave_0 = const()[name = string("op_231_interleave_0"), val = bool(false)]; + tensor var_231_cast_fp16 = concat(axis = var_230, interleave = var_231_interleave_0, values = (var_228_cast_fp16, var_226_cast_fp16_0))[name = string("op_231_cast_fp16")]; + tensor sin_1_cast_fp16 = transpose(perm = sin_1_perm_0, x = sin_cast_fp16)[name = string("transpose_19")]; + tensor var_232_cast_fp16 = mul(x = var_231_cast_fp16, y = sin_1_cast_fp16)[name = string("op_232_cast_fp16")]; + tensor query_states_3_cast_fp16 = add(x = var_225_cast_fp16, y = var_232_cast_fp16)[name = string("query_states_3_cast_fp16")]; + tensor embed_3_cast_fp16 = transpose(perm = embed_3_perm_0, x = var_214_cast_fp16)[name = string("transpose_17")]; + tensor var_235_cast_fp16 = mul(x = embed_3_cast_fp16, y = cos_cast_fp16)[name = string("op_235_cast_fp16")]; + tensor var_236_split_sizes_0 = const()[name = string("op_236_split_sizes_0"), val = tensor([32, 32])]; + int32 var_236_axis_0 = const()[name = string("op_236_axis_0"), val = int32(-1)]; + tensor var_236_cast_fp16_0, tensor var_236_cast_fp16_1 = split(axis = var_236_axis_0, split_sizes = var_236_split_sizes_0, x = embed_3_cast_fp16)[name = string("op_236_cast_fp16")]; + fp16 const_8_promoted_to_fp16 = const()[name = string("const_8_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_238_cast_fp16 = mul(x = var_236_cast_fp16_1, y = const_8_promoted_to_fp16)[name = string("op_238_cast_fp16")]; + int32 var_240 = const()[name = string("op_240"), val = int32(-1)]; + bool var_241_interleave_0 = const()[name = string("op_241_interleave_0"), val = bool(false)]; + tensor var_241_cast_fp16 = concat(axis = var_240, interleave = var_241_interleave_0, values = (var_238_cast_fp16, var_236_cast_fp16_0))[name = string("op_241_cast_fp16")]; + tensor var_242_cast_fp16 = mul(x = var_241_cast_fp16, y = sin_cast_fp16)[name = string("op_242_cast_fp16")]; + tensor key_states_3_cast_fp16 = add(x = var_235_cast_fp16, y = var_242_cast_fp16)[name = string("key_states_3_cast_fp16")]; + tensor var_246 = add(x = position_id, y = q_len_1)[name = string("op_246")]; + tensor read_state_0 = read_state(input = key_cache)[name = string("read_state_0")]; + tensor expand_dims_1 = const()[name = string("expand_dims_1"), val = tensor([0])]; + tensor expand_dims_2 = const()[name = string("expand_dims_2"), val = tensor([0])]; + tensor expand_dims_4 = const()[name = string("expand_dims_4"), val = tensor([1])]; + tensor concat_3_values3_0 = const()[name = string("concat_3_values3_0"), val = tensor([0])]; + int32 concat_3_axis_0 = const()[name = string("concat_3_axis_0"), val = int32(0)]; + bool concat_3_interleave_0 = const()[name = string("concat_3_interleave_0"), val = bool(false)]; + tensor concat_3 = concat(axis = concat_3_axis_0, interleave = concat_3_interleave_0, values = (expand_dims_1, expand_dims_2, position_id, concat_3_values3_0))[name = string("concat_3")]; + tensor concat_4_values1_0 = const()[name = string("concat_4_values1_0"), val = tensor([0])]; + tensor concat_4_values3_0 = const()[name = string("concat_4_values3_0"), val = tensor([0])]; + int32 concat_4_axis_0 = const()[name = string("concat_4_axis_0"), val = int32(0)]; + bool concat_4_interleave_0 = const()[name = string("concat_4_interleave_0"), val = bool(false)]; + tensor concat_4 = concat(axis = concat_4_axis_0, interleave = concat_4_interleave_0, values = (expand_dims_4, concat_4_values1_0, var_246, concat_4_values3_0))[name = string("concat_4")]; + tensor key_cache_internal_tensor_assign_1_stride_0 = const()[name = string("key_cache_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor key_cache_internal_tensor_assign_1_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false, true])]; + tensor key_cache_internal_tensor_assign_1_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_1_end_mask_0"), val = tensor([false, true, false, true])]; + tensor key_cache_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor key_cache_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_3, begin_mask = key_cache_internal_tensor_assign_1_begin_mask_0, end = concat_4, end_mask = key_cache_internal_tensor_assign_1_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_1_squeeze_mask_0, stride = key_cache_internal_tensor_assign_1_stride_0, update = key_states_3_cast_fp16, x = read_state_0)[name = string("key_cache_internal_tensor_assign_1_cast_fp16")]; + write_state(data = key_cache_internal_tensor_assign_1_cast_fp16, input = key_cache)[name = string("coreml_update_state_12_write_state")]; + tensor coreml_update_state_12 = read_state(input = key_cache)[name = string("coreml_update_state_12")]; + tensor read_state_1 = read_state(input = value_cache)[name = string("read_state_1")]; + tensor value_cache_internal_tensor_assign_1_stride_0 = const()[name = string("value_cache_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor value_cache_internal_tensor_assign_1_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false, true])]; + tensor value_cache_internal_tensor_assign_1_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_1_end_mask_0"), val = tensor([false, true, false, true])]; + tensor value_cache_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor value_states_3_cast_fp16 = transpose(perm = value_states_3_perm_0, x = var_221_cast_fp16)[name = string("transpose_16")]; + tensor value_cache_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_3, begin_mask = value_cache_internal_tensor_assign_1_begin_mask_0, end = concat_4, end_mask = value_cache_internal_tensor_assign_1_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_1_squeeze_mask_0, stride = value_cache_internal_tensor_assign_1_stride_0, update = value_states_3_cast_fp16, x = read_state_1)[name = string("value_cache_internal_tensor_assign_1_cast_fp16")]; + write_state(data = value_cache_internal_tensor_assign_1_cast_fp16, input = value_cache)[name = string("coreml_update_state_13_write_state")]; + tensor coreml_update_state_13 = read_state(input = value_cache)[name = string("coreml_update_state_13")]; + tensor var_285_begin_0 = const()[name = string("op_285_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_285_end_0 = const()[name = string("op_285_end_0"), val = tensor([1, 2, 2048, 64])]; + tensor var_285_end_mask_0 = const()[name = string("op_285_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_285_cast_fp16 = slice_by_index(begin = var_285_begin_0, end = var_285_end_0, end_mask = var_285_end_mask_0, x = coreml_update_state_12)[name = string("op_285_cast_fp16")]; + tensor tile_0 = const()[name = string("tile_0"), val = tensor([1, 1])]; + int32 var_288_axis_0 = const()[name = string("op_288_axis_0"), val = int32(1)]; + tensor var_288_cast_fp16_0, tensor var_288_cast_fp16_1 = split(axis = var_288_axis_0, split_sizes = tile_0, x = var_285_cast_fp16)[name = string("op_288_cast_fp16")]; + tensor var_295_begin_0 = const()[name = string("op_295_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_295_end_0 = const()[name = string("op_295_end_0"), val = tensor([1, 2, 2048, 64])]; + tensor var_295_end_mask_0 = const()[name = string("op_295_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_295_cast_fp16 = slice_by_index(begin = var_295_begin_0, end = var_295_end_0, end_mask = var_295_end_mask_0, x = coreml_update_state_13)[name = string("op_295_cast_fp16")]; + tensor tile_1 = const()[name = string("tile_1"), val = tensor([1, 1])]; + int32 var_298_axis_0 = const()[name = string("op_298_axis_0"), val = int32(1)]; + tensor var_298_cast_fp16_0, tensor var_298_cast_fp16_1 = split(axis = var_298_axis_0, split_sizes = tile_1, x = var_295_cast_fp16)[name = string("op_298_cast_fp16")]; + tensor var_301_split_sizes_0 = const()[name = string("op_301_split_sizes_0"), val = tensor([8, 8])]; + int32 var_301_axis_0 = const()[name = string("op_301_axis_0"), val = int32(1)]; + tensor var_301_cast_fp16_0, tensor var_301_cast_fp16_1 = split(axis = var_301_axis_0, split_sizes = var_301_split_sizes_0, x = query_states_3_cast_fp16)[name = string("op_301_cast_fp16")]; + bool attn_weights_1_transpose_x_0 = const()[name = string("attn_weights_1_transpose_x_0"), val = bool(false)]; + bool attn_weights_1_transpose_y_0 = const()[name = string("attn_weights_1_transpose_y_0"), val = bool(false)]; + tensor attn_weights_1_cast_fp16 = matmul(transpose_x = attn_weights_1_transpose_x_0, transpose_y = attn_weights_1_transpose_y_0, x = var_288_cast_fp16_0, y = var_301_cast_fp16_0)[name = string("attn_weights_1_cast_fp16")]; + fp16 _inversed_attn_weights_3_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_3_y_0_to_fp16"), val = fp16(0x1p-3)]; + tensor _inversed_attn_weights_3_cast_fp16 = mul(x = attn_weights_1_cast_fp16, y = _inversed_attn_weights_3_y_0_to_fp16)[name = string("_inversed_attn_weights_3_cast_fp16")]; + tensor attn_mask_cast_fp16 = transpose(perm = attn_mask_perm_0, x = attention_mask_cast_fp16)[name = string("transpose_18")]; + tensor attn_weights_5_cast_fp16 = add(x = _inversed_attn_weights_3_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_5_cast_fp16")]; + int32 var_308 = const()[name = string("op_308"), val = int32(2)]; + tensor attn_weights_7_cast_fp16 = softmax(axis = var_308, x = attn_weights_5_cast_fp16)[name = string("attn_weights_7_cast_fp16")]; + bool var_314_transpose_x_1 = const()[name = string("op_314_transpose_x_1"), val = bool(true)]; + bool var_314_transpose_y_1 = const()[name = string("op_314_transpose_y_1"), val = bool(false)]; + tensor var_314_cast_fp16 = matmul(transpose_x = var_314_transpose_x_1, transpose_y = var_314_transpose_y_1, x = attn_weights_7_cast_fp16, y = var_298_cast_fp16_0)[name = string("op_314_cast_fp16")]; + bool attn_weights_9_transpose_x_0 = const()[name = string("attn_weights_9_transpose_x_0"), val = bool(false)]; + bool attn_weights_9_transpose_y_0 = const()[name = string("attn_weights_9_transpose_y_0"), val = bool(false)]; + tensor attn_weights_9_cast_fp16 = matmul(transpose_x = attn_weights_9_transpose_x_0, transpose_y = attn_weights_9_transpose_y_0, x = var_288_cast_fp16_1, y = var_301_cast_fp16_1)[name = string("attn_weights_9_cast_fp16")]; + fp16 _inversed_attn_weights_11_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_11_y_0_to_fp16"), val = fp16(0x1p-3)]; + tensor _inversed_attn_weights_11_cast_fp16 = mul(x = attn_weights_9_cast_fp16, y = _inversed_attn_weights_11_y_0_to_fp16)[name = string("_inversed_attn_weights_11_cast_fp16")]; + tensor attn_weights_13_cast_fp16 = add(x = _inversed_attn_weights_11_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_13_cast_fp16")]; + int32 var_320 = const()[name = string("op_320"), val = int32(2)]; + tensor attn_weights_15_cast_fp16 = softmax(axis = var_320, x = attn_weights_13_cast_fp16)[name = string("attn_weights_15_cast_fp16")]; + bool attn_output_1_transpose_x_1 = const()[name = string("attn_output_1_transpose_x_1"), val = bool(true)]; + bool attn_output_1_transpose_y_1 = const()[name = string("attn_output_1_transpose_y_1"), val = bool(false)]; + tensor attn_output_1_cast_fp16 = matmul(transpose_x = attn_output_1_transpose_x_1, transpose_y = attn_output_1_transpose_y_1, x = attn_weights_15_cast_fp16, y = var_298_cast_fp16_1)[name = string("attn_output_1_cast_fp16")]; + int32 var_328 = const()[name = string("op_328"), val = int32(1)]; + bool attn_output_3_interleave_0 = const()[name = string("attn_output_3_interleave_0"), val = bool(false)]; + tensor attn_output_3_cast_fp16 = concat(axis = var_328, interleave = attn_output_3_interleave_0, values = (var_314_cast_fp16, attn_output_1_cast_fp16))[name = string("attn_output_3_cast_fp16")]; + tensor var_332_perm_0 = const()[name = string("op_332_perm_0"), val = tensor([0, 1, 3, 2])]; + tensor var_337 = const()[name = string("op_337"), val = tensor([1, 1024, 1, 1])]; + tensor var_332_cast_fp16 = transpose(perm = var_332_perm_0, x = attn_output_3_cast_fp16)[name = string("transpose_15")]; + tensor x_5_cast_fp16 = reshape(shape = var_337, x = var_332_cast_fp16)[name = string("x_5_cast_fp16")]; + string hidden_states_3_pad_type_0 = const()[name = string("hidden_states_3_pad_type_0"), val = string("valid")]; + tensor hidden_states_3_strides_0 = const()[name = string("hidden_states_3_strides_0"), val = tensor([1, 1])]; + tensor hidden_states_3_pad_0 = const()[name = string("hidden_states_3_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor hidden_states_3_dilations_0 = const()[name = string("hidden_states_3_dilations_0"), val = tensor([1, 1])]; + int32 hidden_states_3_groups_0 = const()[name = string("hidden_states_3_groups_0"), val = int32(1)]; + tensor var_344_to_fp16 = const()[name = string("op_344_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(11027008)))]; + tensor hidden_states_3_cast_fp16 = conv(dilations = hidden_states_3_dilations_0, groups = hidden_states_3_groups_0, pad = hidden_states_3_pad_0, pad_type = hidden_states_3_pad_type_0, strides = hidden_states_3_strides_0, weight = var_344_to_fp16, x = x_5_cast_fp16)[name = string("hidden_states_3_cast_fp16")]; + tensor x_7_cast_fp16 = add(x = inputs_embeds, y = hidden_states_3_cast_fp16)[name = string("x_7_cast_fp16")]; + int32 var_356 = const()[name = string("op_356"), val = int32(1)]; + fp16 const_13_promoted_to_fp16 = const()[name = string("const_13_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_359_cast_fp16 = mul(x = x_7_cast_fp16, y = const_13_promoted_to_fp16)[name = string("op_359_cast_fp16")]; + bool x_9_interleave_0 = const()[name = string("x_9_interleave_0"), val = bool(false)]; + tensor x_9_cast_fp16 = concat(axis = var_356, interleave = x_9_interleave_0, values = (x_7_cast_fp16, var_359_cast_fp16))[name = string("x_9_cast_fp16")]; + tensor out_7_axes_0 = const()[name = string("out_7_axes_0"), val = tensor([1])]; + fp16 var_369_to_fp16 = const()[name = string("op_369_to_fp16"), val = fp16(0x1.5p-17)]; + tensor out_7_cast_fp16 = layer_norm(axes = out_7_axes_0, epsilon = var_369_to_fp16, x = x_9_cast_fp16)[name = string("out_7_cast_fp16")]; + tensor layer_layers_0_post_attention_layernorm_weight_to_fp16 = const()[name = string("layer_layers_0_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(13124224)))]; + tensor out_9_cast_fp16 = mul(x = out_7_cast_fp16, y = layer_layers_0_post_attention_layernorm_weight_to_fp16)[name = string("out_9_cast_fp16")]; + tensor var_375_split_sizes_0 = const()[name = string("op_375_split_sizes_0"), val = tensor([1024, 1024])]; + int32 var_375_axis_0 = const()[name = string("op_375_axis_0"), val = int32(1)]; + tensor var_375_cast_fp16_0, tensor var_375_cast_fp16_1 = split(axis = var_375_axis_0, split_sizes = var_375_split_sizes_0, x = out_9_cast_fp16)[name = string("op_375_cast_fp16")]; + string input_1_pad_type_0 = const()[name = string("input_1_pad_type_0"), val = string("valid")]; + tensor input_1_strides_0 = const()[name = string("input_1_strides_0"), val = tensor([1, 1])]; + tensor input_1_pad_0 = const()[name = string("input_1_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor input_1_dilations_0 = const()[name = string("input_1_dilations_0"), val = tensor([1, 1])]; + int32 input_1_groups_0 = const()[name = string("input_1_groups_0"), val = int32(1)]; + tensor var_380_to_fp16 = const()[name = string("op_380_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(13128384)))]; + tensor input_1_cast_fp16 = conv(dilations = input_1_dilations_0, groups = input_1_groups_0, pad = input_1_pad_0, pad_type = input_1_pad_type_0, strides = input_1_strides_0, weight = var_380_to_fp16, x = var_375_cast_fp16_0)[name = string("input_1_cast_fp16")]; + tensor var_391_cast_fp16 = silu(x = input_1_cast_fp16)[name = string("op_391_cast_fp16")]; + string var_396_pad_type_0 = const()[name = string("op_396_pad_type_0"), val = string("valid")]; + tensor var_396_strides_0 = const()[name = string("op_396_strides_0"), val = tensor([1, 1])]; + tensor var_396_pad_0 = const()[name = string("op_396_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_396_dilations_0 = const()[name = string("op_396_dilations_0"), val = tensor([1, 1])]; + int32 var_396_groups_0 = const()[name = string("op_396_groups_0"), val = int32(1)]; + tensor var_379_to_fp16 = const()[name = string("op_379_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(21517056)))]; + tensor var_396_cast_fp16 = conv(dilations = var_396_dilations_0, groups = var_396_groups_0, pad = var_396_pad_0, pad_type = var_396_pad_type_0, strides = var_396_strides_0, weight = var_379_to_fp16, x = var_375_cast_fp16_0)[name = string("op_396_cast_fp16")]; + tensor x_15_cast_fp16 = mul(x = var_391_cast_fp16, y = var_396_cast_fp16)[name = string("x_15_cast_fp16")]; + string hidden_states_5_pad_type_0 = const()[name = string("hidden_states_5_pad_type_0"), val = string("valid")]; + tensor hidden_states_5_strides_0 = const()[name = string("hidden_states_5_strides_0"), val = tensor([1, 1])]; + tensor hidden_states_5_pad_0 = const()[name = string("hidden_states_5_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor hidden_states_5_dilations_0 = const()[name = string("hidden_states_5_dilations_0"), val = tensor([1, 1])]; + int32 hidden_states_5_groups_0 = const()[name = string("hidden_states_5_groups_0"), val = int32(1)]; + tensor var_378_to_fp16 = const()[name = string("op_378_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(29905728)))]; + tensor hidden_states_5_cast_fp16 = conv(dilations = hidden_states_5_dilations_0, groups = hidden_states_5_groups_0, pad = hidden_states_5_pad_0, pad_type = hidden_states_5_pad_type_0, strides = hidden_states_5_strides_0, weight = var_378_to_fp16, x = x_15_cast_fp16)[name = string("hidden_states_5_cast_fp16")]; + tensor x_17_cast_fp16 = add(x = x_7_cast_fp16, y = hidden_states_5_cast_fp16)[name = string("x_17_cast_fp16")]; + int32 var_409 = const()[name = string("op_409"), val = int32(1)]; + fp16 const_14_promoted_to_fp16 = const()[name = string("const_14_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_412_cast_fp16 = mul(x = x_17_cast_fp16, y = const_14_promoted_to_fp16)[name = string("op_412_cast_fp16")]; + bool x_19_interleave_0 = const()[name = string("x_19_interleave_0"), val = bool(false)]; + tensor x_19_cast_fp16 = concat(axis = var_409, interleave = x_19_interleave_0, values = (x_17_cast_fp16, var_412_cast_fp16))[name = string("x_19_cast_fp16")]; + tensor out_13_axes_0 = const()[name = string("out_13_axes_0"), val = tensor([1])]; + fp16 var_422_to_fp16 = const()[name = string("op_422_to_fp16"), val = fp16(0x1.5p-17)]; + tensor out_13_cast_fp16 = layer_norm(axes = out_13_axes_0, epsilon = var_422_to_fp16, x = x_19_cast_fp16)[name = string("out_13_cast_fp16")]; + tensor layer_layers_1_input_layernorm_weight_to_fp16 = const()[name = string("layer_layers_1_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(38294400)))]; + tensor out_15_cast_fp16 = mul(x = out_13_cast_fp16, y = layer_layers_1_input_layernorm_weight_to_fp16)[name = string("out_15_cast_fp16")]; + tensor var_428_split_sizes_0 = const()[name = string("op_428_split_sizes_0"), val = tensor([1024, 1024])]; + int32 var_428_axis_0 = const()[name = string("op_428_axis_0"), val = int32(1)]; + tensor var_428_cast_fp16_0, tensor var_428_cast_fp16_1 = split(axis = var_428_axis_0, split_sizes = var_428_split_sizes_0, x = out_15_cast_fp16)[name = string("op_428_cast_fp16")]; + string query_states_5_pad_type_0 = const()[name = string("query_states_5_pad_type_0"), val = string("valid")]; + tensor query_states_5_strides_0 = const()[name = string("query_states_5_strides_0"), val = tensor([1, 1])]; + tensor query_states_5_pad_0 = const()[name = string("query_states_5_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor query_states_5_dilations_0 = const()[name = string("query_states_5_dilations_0"), val = tensor([1, 1])]; + int32 query_states_5_groups_0 = const()[name = string("query_states_5_groups_0"), val = int32(1)]; + tensor var_450_to_fp16 = const()[name = string("op_450_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(38298560)))]; + tensor query_states_5_cast_fp16 = conv(dilations = query_states_5_dilations_0, groups = query_states_5_groups_0, pad = query_states_5_pad_0, pad_type = query_states_5_pad_type_0, strides = query_states_5_strides_0, weight = var_450_to_fp16, x = var_428_cast_fp16_0)[name = string("query_states_5_cast_fp16")]; + string key_states_5_pad_type_0 = const()[name = string("key_states_5_pad_type_0"), val = string("valid")]; + tensor key_states_5_strides_0 = const()[name = string("key_states_5_strides_0"), val = tensor([1, 1])]; + tensor key_states_5_pad_0 = const()[name = string("key_states_5_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor key_states_5_dilations_0 = const()[name = string("key_states_5_dilations_0"), val = tensor([1, 1])]; + int32 key_states_5_groups_0 = const()[name = string("key_states_5_groups_0"), val = int32(1)]; + tensor var_461_to_fp16 = const()[name = string("op_461_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(40395776)))]; + tensor key_states_5_cast_fp16 = conv(dilations = key_states_5_dilations_0, groups = key_states_5_groups_0, pad = key_states_5_pad_0, pad_type = key_states_5_pad_type_0, strides = key_states_5_strides_0, weight = var_461_to_fp16, x = var_428_cast_fp16_0)[name = string("key_states_5_cast_fp16")]; + string value_states_5_pad_type_0 = const()[name = string("value_states_5_pad_type_0"), val = string("valid")]; + tensor value_states_5_strides_0 = const()[name = string("value_states_5_strides_0"), val = tensor([1, 1])]; + tensor value_states_5_pad_0 = const()[name = string("value_states_5_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor value_states_5_dilations_0 = const()[name = string("value_states_5_dilations_0"), val = tensor([1, 1])]; + int32 value_states_5_groups_0 = const()[name = string("value_states_5_groups_0"), val = int32(1)]; + tensor var_472_to_fp16 = const()[name = string("op_472_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(40657984)))]; + tensor value_states_5_cast_fp16 = conv(dilations = value_states_5_dilations_0, groups = value_states_5_groups_0, pad = value_states_5_pad_0, pad_type = value_states_5_pad_type_0, strides = value_states_5_strides_0, weight = var_472_to_fp16, x = var_428_cast_fp16_0)[name = string("value_states_5_cast_fp16")]; + tensor var_480 = const()[name = string("op_480"), val = tensor([1, 16, 64, 1])]; + tensor embed_5_cast_fp16 = reshape(shape = var_480, x = query_states_5_cast_fp16)[name = string("embed_5_cast_fp16")]; + tensor var_484 = const()[name = string("op_484"), val = tensor([1, 2, 64, 1])]; + tensor var_485_cast_fp16 = reshape(shape = var_484, x = key_states_5_cast_fp16)[name = string("op_485_cast_fp16")]; + tensor embed_7_perm_0 = const()[name = string("embed_7_perm_0"), val = tensor([0, 1, 3, 2])]; + tensor var_491 = const()[name = string("op_491"), val = tensor([1, 2, 64, 1])]; + tensor var_492_cast_fp16 = reshape(shape = var_491, x = value_states_5_cast_fp16)[name = string("op_492_cast_fp16")]; + tensor value_states_7_perm_0 = const()[name = string("value_states_7_perm_0"), val = tensor([0, 1, 3, 2])]; + tensor var_496_cast_fp16 = mul(x = embed_5_cast_fp16, y = cos_1_cast_fp16)[name = string("op_496_cast_fp16")]; + tensor var_497_split_sizes_0 = const()[name = string("op_497_split_sizes_0"), val = tensor([32, 32])]; + int32 var_497_axis_0 = const()[name = string("op_497_axis_0"), val = int32(-2)]; + tensor var_497_cast_fp16_0, tensor var_497_cast_fp16_1 = split(axis = var_497_axis_0, split_sizes = var_497_split_sizes_0, x = embed_5_cast_fp16)[name = string("op_497_cast_fp16")]; + fp16 const_17_promoted_to_fp16 = const()[name = string("const_17_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_499_cast_fp16 = mul(x = var_497_cast_fp16_1, y = const_17_promoted_to_fp16)[name = string("op_499_cast_fp16")]; + int32 var_501 = const()[name = string("op_501"), val = int32(-2)]; + bool var_502_interleave_0 = const()[name = string("op_502_interleave_0"), val = bool(false)]; + tensor var_502_cast_fp16 = concat(axis = var_501, interleave = var_502_interleave_0, values = (var_499_cast_fp16, var_497_cast_fp16_0))[name = string("op_502_cast_fp16")]; + tensor var_503_cast_fp16 = mul(x = var_502_cast_fp16, y = sin_1_cast_fp16)[name = string("op_503_cast_fp16")]; + tensor query_states_7_cast_fp16 = add(x = var_496_cast_fp16, y = var_503_cast_fp16)[name = string("query_states_7_cast_fp16")]; + tensor embed_7_cast_fp16 = transpose(perm = embed_7_perm_0, x = var_485_cast_fp16)[name = string("transpose_14")]; + tensor var_506_cast_fp16 = mul(x = embed_7_cast_fp16, y = cos_cast_fp16)[name = string("op_506_cast_fp16")]; + tensor var_507_split_sizes_0 = const()[name = string("op_507_split_sizes_0"), val = tensor([32, 32])]; + int32 var_507_axis_0 = const()[name = string("op_507_axis_0"), val = int32(-1)]; + tensor var_507_cast_fp16_0, tensor var_507_cast_fp16_1 = split(axis = var_507_axis_0, split_sizes = var_507_split_sizes_0, x = embed_7_cast_fp16)[name = string("op_507_cast_fp16")]; + fp16 const_18_promoted_to_fp16 = const()[name = string("const_18_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_509_cast_fp16 = mul(x = var_507_cast_fp16_1, y = const_18_promoted_to_fp16)[name = string("op_509_cast_fp16")]; + int32 var_511 = const()[name = string("op_511"), val = int32(-1)]; + bool var_512_interleave_0 = const()[name = string("op_512_interleave_0"), val = bool(false)]; + tensor var_512_cast_fp16 = concat(axis = var_511, interleave = var_512_interleave_0, values = (var_509_cast_fp16, var_507_cast_fp16_0))[name = string("op_512_cast_fp16")]; + tensor var_513_cast_fp16 = mul(x = var_512_cast_fp16, y = sin_cast_fp16)[name = string("op_513_cast_fp16")]; + tensor key_states_7_cast_fp16 = add(x = var_506_cast_fp16, y = var_513_cast_fp16)[name = string("key_states_7_cast_fp16")]; + tensor expand_dims_11 = const()[name = string("expand_dims_11"), val = tensor([1])]; + tensor expand_dims_12 = const()[name = string("expand_dims_12"), val = tensor([0])]; + tensor expand_dims_14 = const()[name = string("expand_dims_14"), val = tensor([2])]; + tensor concat_11_values3_0 = const()[name = string("concat_11_values3_0"), val = tensor([0])]; + int32 concat_11_axis_0 = const()[name = string("concat_11_axis_0"), val = int32(0)]; + bool concat_11_interleave_0 = const()[name = string("concat_11_interleave_0"), val = bool(false)]; + tensor concat_11 = concat(axis = concat_11_axis_0, interleave = concat_11_interleave_0, values = (expand_dims_11, expand_dims_12, position_id, concat_11_values3_0))[name = string("concat_11")]; + tensor concat_12_values1_0 = const()[name = string("concat_12_values1_0"), val = tensor([0])]; + tensor concat_12_values3_0 = const()[name = string("concat_12_values3_0"), val = tensor([0])]; + int32 concat_12_axis_0 = const()[name = string("concat_12_axis_0"), val = int32(0)]; + bool concat_12_interleave_0 = const()[name = string("concat_12_interleave_0"), val = bool(false)]; + tensor concat_12 = concat(axis = concat_12_axis_0, interleave = concat_12_interleave_0, values = (expand_dims_14, concat_12_values1_0, var_246, concat_12_values3_0))[name = string("concat_12")]; + tensor key_cache_internal_tensor_assign_2_stride_0 = const()[name = string("key_cache_internal_tensor_assign_2_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor key_cache_internal_tensor_assign_2_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_2_begin_mask_0"), val = tensor([false, false, false, true])]; + tensor key_cache_internal_tensor_assign_2_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_2_end_mask_0"), val = tensor([false, true, false, true])]; + tensor key_cache_internal_tensor_assign_2_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_2_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor key_cache_internal_tensor_assign_2_cast_fp16 = slice_update(begin = concat_11, begin_mask = key_cache_internal_tensor_assign_2_begin_mask_0, end = concat_12, end_mask = key_cache_internal_tensor_assign_2_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_2_squeeze_mask_0, stride = key_cache_internal_tensor_assign_2_stride_0, update = key_states_7_cast_fp16, x = coreml_update_state_12)[name = string("key_cache_internal_tensor_assign_2_cast_fp16")]; + write_state(data = key_cache_internal_tensor_assign_2_cast_fp16, input = key_cache)[name = string("coreml_update_state_14_write_state")]; + tensor coreml_update_state_14 = read_state(input = key_cache)[name = string("coreml_update_state_14")]; + tensor value_cache_internal_tensor_assign_2_stride_0 = const()[name = string("value_cache_internal_tensor_assign_2_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor value_cache_internal_tensor_assign_2_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_2_begin_mask_0"), val = tensor([false, false, false, true])]; + tensor value_cache_internal_tensor_assign_2_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_2_end_mask_0"), val = tensor([false, true, false, true])]; + tensor value_cache_internal_tensor_assign_2_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_2_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor value_states_7_cast_fp16 = transpose(perm = value_states_7_perm_0, x = var_492_cast_fp16)[name = string("transpose_13")]; + tensor value_cache_internal_tensor_assign_2_cast_fp16 = slice_update(begin = concat_11, begin_mask = value_cache_internal_tensor_assign_2_begin_mask_0, end = concat_12, end_mask = value_cache_internal_tensor_assign_2_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_2_squeeze_mask_0, stride = value_cache_internal_tensor_assign_2_stride_0, update = value_states_7_cast_fp16, x = coreml_update_state_13)[name = string("value_cache_internal_tensor_assign_2_cast_fp16")]; + write_state(data = value_cache_internal_tensor_assign_2_cast_fp16, input = value_cache)[name = string("coreml_update_state_15_write_state")]; + tensor coreml_update_state_15 = read_state(input = value_cache)[name = string("coreml_update_state_15")]; + tensor var_556_begin_0 = const()[name = string("op_556_begin_0"), val = tensor([1, 0, 0, 0])]; + tensor var_556_end_0 = const()[name = string("op_556_end_0"), val = tensor([2, 2, 2048, 64])]; + tensor var_556_end_mask_0 = const()[name = string("op_556_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_556_cast_fp16 = slice_by_index(begin = var_556_begin_0, end = var_556_end_0, end_mask = var_556_end_mask_0, x = coreml_update_state_14)[name = string("op_556_cast_fp16")]; + tensor tile_2 = const()[name = string("tile_2"), val = tensor([1, 1])]; + int32 var_559_axis_0 = const()[name = string("op_559_axis_0"), val = int32(1)]; + tensor var_559_cast_fp16_0, tensor var_559_cast_fp16_1 = split(axis = var_559_axis_0, split_sizes = tile_2, x = var_556_cast_fp16)[name = string("op_559_cast_fp16")]; + tensor var_566_begin_0 = const()[name = string("op_566_begin_0"), val = tensor([1, 0, 0, 0])]; + tensor var_566_end_0 = const()[name = string("op_566_end_0"), val = tensor([2, 2, 2048, 64])]; + tensor var_566_end_mask_0 = const()[name = string("op_566_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_566_cast_fp16 = slice_by_index(begin = var_566_begin_0, end = var_566_end_0, end_mask = var_566_end_mask_0, x = coreml_update_state_15)[name = string("op_566_cast_fp16")]; + tensor tile_3 = const()[name = string("tile_3"), val = tensor([1, 1])]; + int32 var_569_axis_0 = const()[name = string("op_569_axis_0"), val = int32(1)]; + tensor var_569_cast_fp16_0, tensor var_569_cast_fp16_1 = split(axis = var_569_axis_0, split_sizes = tile_3, x = var_566_cast_fp16)[name = string("op_569_cast_fp16")]; + tensor var_572_split_sizes_0 = const()[name = string("op_572_split_sizes_0"), val = tensor([8, 8])]; + int32 var_572_axis_0 = const()[name = string("op_572_axis_0"), val = int32(1)]; + tensor var_572_cast_fp16_0, tensor var_572_cast_fp16_1 = split(axis = var_572_axis_0, split_sizes = var_572_split_sizes_0, x = query_states_7_cast_fp16)[name = string("op_572_cast_fp16")]; + bool attn_weights_17_transpose_x_0 = const()[name = string("attn_weights_17_transpose_x_0"), val = bool(false)]; + bool attn_weights_17_transpose_y_0 = const()[name = string("attn_weights_17_transpose_y_0"), val = bool(false)]; + tensor attn_weights_17_cast_fp16 = matmul(transpose_x = attn_weights_17_transpose_x_0, transpose_y = attn_weights_17_transpose_y_0, x = var_559_cast_fp16_0, y = var_572_cast_fp16_0)[name = string("attn_weights_17_cast_fp16")]; + fp16 _inversed_attn_weights_19_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_19_y_0_to_fp16"), val = fp16(0x1p-3)]; + tensor _inversed_attn_weights_19_cast_fp16 = mul(x = attn_weights_17_cast_fp16, y = _inversed_attn_weights_19_y_0_to_fp16)[name = string("_inversed_attn_weights_19_cast_fp16")]; + tensor attn_weights_21_cast_fp16 = add(x = _inversed_attn_weights_19_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_21_cast_fp16")]; + int32 var_579 = const()[name = string("op_579"), val = int32(2)]; + tensor attn_weights_23_cast_fp16 = softmax(axis = var_579, x = attn_weights_21_cast_fp16)[name = string("attn_weights_23_cast_fp16")]; + bool var_585_transpose_x_1 = const()[name = string("op_585_transpose_x_1"), val = bool(true)]; + bool var_585_transpose_y_1 = const()[name = string("op_585_transpose_y_1"), val = bool(false)]; + tensor var_585_cast_fp16 = matmul(transpose_x = var_585_transpose_x_1, transpose_y = var_585_transpose_y_1, x = attn_weights_23_cast_fp16, y = var_569_cast_fp16_0)[name = string("op_585_cast_fp16")]; + bool attn_weights_25_transpose_x_0 = const()[name = string("attn_weights_25_transpose_x_0"), val = bool(false)]; + bool attn_weights_25_transpose_y_0 = const()[name = string("attn_weights_25_transpose_y_0"), val = bool(false)]; + tensor attn_weights_25_cast_fp16 = matmul(transpose_x = attn_weights_25_transpose_x_0, transpose_y = attn_weights_25_transpose_y_0, x = var_559_cast_fp16_1, y = var_572_cast_fp16_1)[name = string("attn_weights_25_cast_fp16")]; + fp16 _inversed_attn_weights_27_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_27_y_0_to_fp16"), val = fp16(0x1p-3)]; + tensor _inversed_attn_weights_27_cast_fp16 = mul(x = attn_weights_25_cast_fp16, y = _inversed_attn_weights_27_y_0_to_fp16)[name = string("_inversed_attn_weights_27_cast_fp16")]; + tensor attn_weights_29_cast_fp16 = add(x = _inversed_attn_weights_27_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_29_cast_fp16")]; + int32 var_591 = const()[name = string("op_591"), val = int32(2)]; + tensor attn_weights_31_cast_fp16 = softmax(axis = var_591, x = attn_weights_29_cast_fp16)[name = string("attn_weights_31_cast_fp16")]; + bool attn_output_7_transpose_x_1 = const()[name = string("attn_output_7_transpose_x_1"), val = bool(true)]; + bool attn_output_7_transpose_y_1 = const()[name = string("attn_output_7_transpose_y_1"), val = bool(false)]; + tensor attn_output_7_cast_fp16 = matmul(transpose_x = attn_output_7_transpose_x_1, transpose_y = attn_output_7_transpose_y_1, x = attn_weights_31_cast_fp16, y = var_569_cast_fp16_1)[name = string("attn_output_7_cast_fp16")]; + int32 var_599 = const()[name = string("op_599"), val = int32(1)]; + bool attn_output_9_interleave_0 = const()[name = string("attn_output_9_interleave_0"), val = bool(false)]; + tensor attn_output_9_cast_fp16 = concat(axis = var_599, interleave = attn_output_9_interleave_0, values = (var_585_cast_fp16, attn_output_7_cast_fp16))[name = string("attn_output_9_cast_fp16")]; + tensor var_603_perm_0 = const()[name = string("op_603_perm_0"), val = tensor([0, 1, 3, 2])]; + tensor var_608 = const()[name = string("op_608"), val = tensor([1, 1024, 1, 1])]; + tensor var_603_cast_fp16 = transpose(perm = var_603_perm_0, x = attn_output_9_cast_fp16)[name = string("transpose_12")]; + tensor x_23_cast_fp16 = reshape(shape = var_608, x = var_603_cast_fp16)[name = string("x_23_cast_fp16")]; + string hidden_states_9_pad_type_0 = const()[name = string("hidden_states_9_pad_type_0"), val = string("valid")]; + tensor hidden_states_9_strides_0 = const()[name = string("hidden_states_9_strides_0"), val = tensor([1, 1])]; + tensor hidden_states_9_pad_0 = const()[name = string("hidden_states_9_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor hidden_states_9_dilations_0 = const()[name = string("hidden_states_9_dilations_0"), val = tensor([1, 1])]; + int32 hidden_states_9_groups_0 = const()[name = string("hidden_states_9_groups_0"), val = int32(1)]; + tensor var_615_to_fp16 = const()[name = string("op_615_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(40920192)))]; + tensor hidden_states_9_cast_fp16 = conv(dilations = hidden_states_9_dilations_0, groups = hidden_states_9_groups_0, pad = hidden_states_9_pad_0, pad_type = hidden_states_9_pad_type_0, strides = hidden_states_9_strides_0, weight = var_615_to_fp16, x = x_23_cast_fp16)[name = string("hidden_states_9_cast_fp16")]; + tensor x_25_cast_fp16 = add(x = x_17_cast_fp16, y = hidden_states_9_cast_fp16)[name = string("x_25_cast_fp16")]; + int32 var_627 = const()[name = string("op_627"), val = int32(1)]; + fp16 const_23_promoted_to_fp16 = const()[name = string("const_23_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_630_cast_fp16 = mul(x = x_25_cast_fp16, y = const_23_promoted_to_fp16)[name = string("op_630_cast_fp16")]; + bool x_27_interleave_0 = const()[name = string("x_27_interleave_0"), val = bool(false)]; + tensor x_27_cast_fp16 = concat(axis = var_627, interleave = x_27_interleave_0, values = (x_25_cast_fp16, var_630_cast_fp16))[name = string("x_27_cast_fp16")]; + tensor out_19_axes_0 = const()[name = string("out_19_axes_0"), val = tensor([1])]; + fp16 var_640_to_fp16 = const()[name = string("op_640_to_fp16"), val = fp16(0x1.5p-17)]; + tensor out_19_cast_fp16 = layer_norm(axes = out_19_axes_0, epsilon = var_640_to_fp16, x = x_27_cast_fp16)[name = string("out_19_cast_fp16")]; + tensor layer_layers_1_post_attention_layernorm_weight_to_fp16 = const()[name = string("layer_layers_1_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(43017408)))]; + tensor out_21_cast_fp16 = mul(x = out_19_cast_fp16, y = layer_layers_1_post_attention_layernorm_weight_to_fp16)[name = string("out_21_cast_fp16")]; + tensor var_646_split_sizes_0 = const()[name = string("op_646_split_sizes_0"), val = tensor([1024, 1024])]; + int32 var_646_axis_0 = const()[name = string("op_646_axis_0"), val = int32(1)]; + tensor var_646_cast_fp16_0, tensor var_646_cast_fp16_1 = split(axis = var_646_axis_0, split_sizes = var_646_split_sizes_0, x = out_21_cast_fp16)[name = string("op_646_cast_fp16")]; + string input_3_pad_type_0 = const()[name = string("input_3_pad_type_0"), val = string("valid")]; + tensor input_3_strides_0 = const()[name = string("input_3_strides_0"), val = tensor([1, 1])]; + tensor input_3_pad_0 = const()[name = string("input_3_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor input_3_dilations_0 = const()[name = string("input_3_dilations_0"), val = tensor([1, 1])]; + int32 input_3_groups_0 = const()[name = string("input_3_groups_0"), val = int32(1)]; + tensor var_651_to_fp16 = const()[name = string("op_651_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(43021568)))]; + tensor input_3_cast_fp16 = conv(dilations = input_3_dilations_0, groups = input_3_groups_0, pad = input_3_pad_0, pad_type = input_3_pad_type_0, strides = input_3_strides_0, weight = var_651_to_fp16, x = var_646_cast_fp16_0)[name = string("input_3_cast_fp16")]; + tensor var_662_cast_fp16 = silu(x = input_3_cast_fp16)[name = string("op_662_cast_fp16")]; + string var_667_pad_type_0 = const()[name = string("op_667_pad_type_0"), val = string("valid")]; + tensor var_667_strides_0 = const()[name = string("op_667_strides_0"), val = tensor([1, 1])]; + tensor var_667_pad_0 = const()[name = string("op_667_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_667_dilations_0 = const()[name = string("op_667_dilations_0"), val = tensor([1, 1])]; + int32 var_667_groups_0 = const()[name = string("op_667_groups_0"), val = int32(1)]; + tensor var_650_to_fp16 = const()[name = string("op_650_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(51410240)))]; + tensor var_667_cast_fp16 = conv(dilations = var_667_dilations_0, groups = var_667_groups_0, pad = var_667_pad_0, pad_type = var_667_pad_type_0, strides = var_667_strides_0, weight = var_650_to_fp16, x = var_646_cast_fp16_0)[name = string("op_667_cast_fp16")]; + tensor x_33_cast_fp16 = mul(x = var_662_cast_fp16, y = var_667_cast_fp16)[name = string("x_33_cast_fp16")]; + string hidden_states_11_pad_type_0 = const()[name = string("hidden_states_11_pad_type_0"), val = string("valid")]; + tensor hidden_states_11_strides_0 = const()[name = string("hidden_states_11_strides_0"), val = tensor([1, 1])]; + tensor hidden_states_11_pad_0 = const()[name = string("hidden_states_11_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor hidden_states_11_dilations_0 = const()[name = string("hidden_states_11_dilations_0"), val = tensor([1, 1])]; + int32 hidden_states_11_groups_0 = const()[name = string("hidden_states_11_groups_0"), val = int32(1)]; + tensor var_649_to_fp16 = const()[name = string("op_649_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(59798912)))]; + tensor hidden_states_11_cast_fp16 = conv(dilations = hidden_states_11_dilations_0, groups = hidden_states_11_groups_0, pad = hidden_states_11_pad_0, pad_type = hidden_states_11_pad_type_0, strides = hidden_states_11_strides_0, weight = var_649_to_fp16, x = x_33_cast_fp16)[name = string("hidden_states_11_cast_fp16")]; + tensor x_35_cast_fp16 = add(x = x_25_cast_fp16, y = hidden_states_11_cast_fp16)[name = string("x_35_cast_fp16")]; + int32 var_680 = const()[name = string("op_680"), val = int32(1)]; + fp16 const_24_promoted_to_fp16 = const()[name = string("const_24_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_683_cast_fp16 = mul(x = x_35_cast_fp16, y = const_24_promoted_to_fp16)[name = string("op_683_cast_fp16")]; + bool x_37_interleave_0 = const()[name = string("x_37_interleave_0"), val = bool(false)]; + tensor x_37_cast_fp16 = concat(axis = var_680, interleave = x_37_interleave_0, values = (x_35_cast_fp16, var_683_cast_fp16))[name = string("x_37_cast_fp16")]; + tensor out_25_axes_0 = const()[name = string("out_25_axes_0"), val = tensor([1])]; + fp16 var_693_to_fp16 = const()[name = string("op_693_to_fp16"), val = fp16(0x1.5p-17)]; + tensor out_25_cast_fp16 = layer_norm(axes = out_25_axes_0, epsilon = var_693_to_fp16, x = x_37_cast_fp16)[name = string("out_25_cast_fp16")]; + tensor layer_layers_2_input_layernorm_weight_to_fp16 = const()[name = string("layer_layers_2_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(68187584)))]; + tensor out_27_cast_fp16 = mul(x = out_25_cast_fp16, y = layer_layers_2_input_layernorm_weight_to_fp16)[name = string("out_27_cast_fp16")]; + tensor var_699_split_sizes_0 = const()[name = string("op_699_split_sizes_0"), val = tensor([1024, 1024])]; + int32 var_699_axis_0 = const()[name = string("op_699_axis_0"), val = int32(1)]; + tensor var_699_cast_fp16_0, tensor var_699_cast_fp16_1 = split(axis = var_699_axis_0, split_sizes = var_699_split_sizes_0, x = out_27_cast_fp16)[name = string("op_699_cast_fp16")]; + string query_states_9_pad_type_0 = const()[name = string("query_states_9_pad_type_0"), val = string("valid")]; + tensor query_states_9_strides_0 = const()[name = string("query_states_9_strides_0"), val = tensor([1, 1])]; + tensor query_states_9_pad_0 = const()[name = string("query_states_9_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor query_states_9_dilations_0 = const()[name = string("query_states_9_dilations_0"), val = tensor([1, 1])]; + int32 query_states_9_groups_0 = const()[name = string("query_states_9_groups_0"), val = int32(1)]; + tensor var_721_to_fp16 = const()[name = string("op_721_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(68191744)))]; + tensor query_states_9_cast_fp16 = conv(dilations = query_states_9_dilations_0, groups = query_states_9_groups_0, pad = query_states_9_pad_0, pad_type = query_states_9_pad_type_0, strides = query_states_9_strides_0, weight = var_721_to_fp16, x = var_699_cast_fp16_0)[name = string("query_states_9_cast_fp16")]; + string key_states_9_pad_type_0 = const()[name = string("key_states_9_pad_type_0"), val = string("valid")]; + tensor key_states_9_strides_0 = const()[name = string("key_states_9_strides_0"), val = tensor([1, 1])]; + tensor key_states_9_pad_0 = const()[name = string("key_states_9_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor key_states_9_dilations_0 = const()[name = string("key_states_9_dilations_0"), val = tensor([1, 1])]; + int32 key_states_9_groups_0 = const()[name = string("key_states_9_groups_0"), val = int32(1)]; + tensor var_732_to_fp16 = const()[name = string("op_732_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(70288960)))]; + tensor key_states_9_cast_fp16 = conv(dilations = key_states_9_dilations_0, groups = key_states_9_groups_0, pad = key_states_9_pad_0, pad_type = key_states_9_pad_type_0, strides = key_states_9_strides_0, weight = var_732_to_fp16, x = var_699_cast_fp16_0)[name = string("key_states_9_cast_fp16")]; + string value_states_9_pad_type_0 = const()[name = string("value_states_9_pad_type_0"), val = string("valid")]; + tensor value_states_9_strides_0 = const()[name = string("value_states_9_strides_0"), val = tensor([1, 1])]; + tensor value_states_9_pad_0 = const()[name = string("value_states_9_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor value_states_9_dilations_0 = const()[name = string("value_states_9_dilations_0"), val = tensor([1, 1])]; + int32 value_states_9_groups_0 = const()[name = string("value_states_9_groups_0"), val = int32(1)]; + tensor var_743_to_fp16 = const()[name = string("op_743_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(70551168)))]; + tensor value_states_9_cast_fp16 = conv(dilations = value_states_9_dilations_0, groups = value_states_9_groups_0, pad = value_states_9_pad_0, pad_type = value_states_9_pad_type_0, strides = value_states_9_strides_0, weight = var_743_to_fp16, x = var_699_cast_fp16_0)[name = string("value_states_9_cast_fp16")]; + tensor var_751 = const()[name = string("op_751"), val = tensor([1, 16, 64, 1])]; + tensor embed_9_cast_fp16 = reshape(shape = var_751, x = query_states_9_cast_fp16)[name = string("embed_9_cast_fp16")]; + tensor var_755 = const()[name = string("op_755"), val = tensor([1, 2, 64, 1])]; + tensor var_756_cast_fp16 = reshape(shape = var_755, x = key_states_9_cast_fp16)[name = string("op_756_cast_fp16")]; + tensor embed_11_perm_0 = const()[name = string("embed_11_perm_0"), val = tensor([0, 1, 3, 2])]; + tensor var_762 = const()[name = string("op_762"), val = tensor([1, 2, 64, 1])]; + tensor var_763_cast_fp16 = reshape(shape = var_762, x = value_states_9_cast_fp16)[name = string("op_763_cast_fp16")]; + tensor value_states_11_perm_0 = const()[name = string("value_states_11_perm_0"), val = tensor([0, 1, 3, 2])]; + tensor var_767_cast_fp16 = mul(x = embed_9_cast_fp16, y = cos_1_cast_fp16)[name = string("op_767_cast_fp16")]; + tensor var_768_split_sizes_0 = const()[name = string("op_768_split_sizes_0"), val = tensor([32, 32])]; + int32 var_768_axis_0 = const()[name = string("op_768_axis_0"), val = int32(-2)]; + tensor var_768_cast_fp16_0, tensor var_768_cast_fp16_1 = split(axis = var_768_axis_0, split_sizes = var_768_split_sizes_0, x = embed_9_cast_fp16)[name = string("op_768_cast_fp16")]; + fp16 const_27_promoted_to_fp16 = const()[name = string("const_27_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_770_cast_fp16 = mul(x = var_768_cast_fp16_1, y = const_27_promoted_to_fp16)[name = string("op_770_cast_fp16")]; + int32 var_772 = const()[name = string("op_772"), val = int32(-2)]; + bool var_773_interleave_0 = const()[name = string("op_773_interleave_0"), val = bool(false)]; + tensor var_773_cast_fp16 = concat(axis = var_772, interleave = var_773_interleave_0, values = (var_770_cast_fp16, var_768_cast_fp16_0))[name = string("op_773_cast_fp16")]; + tensor var_774_cast_fp16 = mul(x = var_773_cast_fp16, y = sin_1_cast_fp16)[name = string("op_774_cast_fp16")]; + tensor query_states_11_cast_fp16 = add(x = var_767_cast_fp16, y = var_774_cast_fp16)[name = string("query_states_11_cast_fp16")]; + tensor embed_11_cast_fp16 = transpose(perm = embed_11_perm_0, x = var_756_cast_fp16)[name = string("transpose_11")]; + tensor var_777_cast_fp16 = mul(x = embed_11_cast_fp16, y = cos_cast_fp16)[name = string("op_777_cast_fp16")]; + tensor var_778_split_sizes_0 = const()[name = string("op_778_split_sizes_0"), val = tensor([32, 32])]; + int32 var_778_axis_0 = const()[name = string("op_778_axis_0"), val = int32(-1)]; + tensor var_778_cast_fp16_0, tensor var_778_cast_fp16_1 = split(axis = var_778_axis_0, split_sizes = var_778_split_sizes_0, x = embed_11_cast_fp16)[name = string("op_778_cast_fp16")]; + fp16 const_28_promoted_to_fp16 = const()[name = string("const_28_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_780_cast_fp16 = mul(x = var_778_cast_fp16_1, y = const_28_promoted_to_fp16)[name = string("op_780_cast_fp16")]; + int32 var_782 = const()[name = string("op_782"), val = int32(-1)]; + bool var_783_interleave_0 = const()[name = string("op_783_interleave_0"), val = bool(false)]; + tensor var_783_cast_fp16 = concat(axis = var_782, interleave = var_783_interleave_0, values = (var_780_cast_fp16, var_778_cast_fp16_0))[name = string("op_783_cast_fp16")]; + tensor var_784_cast_fp16 = mul(x = var_783_cast_fp16, y = sin_cast_fp16)[name = string("op_784_cast_fp16")]; + tensor key_states_11_cast_fp16 = add(x = var_777_cast_fp16, y = var_784_cast_fp16)[name = string("key_states_11_cast_fp16")]; + tensor expand_dims_21 = const()[name = string("expand_dims_21"), val = tensor([2])]; + tensor expand_dims_22 = const()[name = string("expand_dims_22"), val = tensor([0])]; + tensor expand_dims_24 = const()[name = string("expand_dims_24"), val = tensor([3])]; + tensor concat_19_values3_0 = const()[name = string("concat_19_values3_0"), val = tensor([0])]; + int32 concat_19_axis_0 = const()[name = string("concat_19_axis_0"), val = int32(0)]; + bool concat_19_interleave_0 = const()[name = string("concat_19_interleave_0"), val = bool(false)]; + tensor concat_19 = concat(axis = concat_19_axis_0, interleave = concat_19_interleave_0, values = (expand_dims_21, expand_dims_22, position_id, concat_19_values3_0))[name = string("concat_19")]; + tensor concat_20_values1_0 = const()[name = string("concat_20_values1_0"), val = tensor([0])]; + tensor concat_20_values3_0 = const()[name = string("concat_20_values3_0"), val = tensor([0])]; + int32 concat_20_axis_0 = const()[name = string("concat_20_axis_0"), val = int32(0)]; + bool concat_20_interleave_0 = const()[name = string("concat_20_interleave_0"), val = bool(false)]; + tensor concat_20 = concat(axis = concat_20_axis_0, interleave = concat_20_interleave_0, values = (expand_dims_24, concat_20_values1_0, var_246, concat_20_values3_0))[name = string("concat_20")]; + tensor key_cache_internal_tensor_assign_3_stride_0 = const()[name = string("key_cache_internal_tensor_assign_3_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor key_cache_internal_tensor_assign_3_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_3_begin_mask_0"), val = tensor([false, false, false, true])]; + tensor key_cache_internal_tensor_assign_3_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_3_end_mask_0"), val = tensor([false, true, false, true])]; + tensor key_cache_internal_tensor_assign_3_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_3_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor key_cache_internal_tensor_assign_3_cast_fp16 = slice_update(begin = concat_19, begin_mask = key_cache_internal_tensor_assign_3_begin_mask_0, end = concat_20, end_mask = key_cache_internal_tensor_assign_3_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_3_squeeze_mask_0, stride = key_cache_internal_tensor_assign_3_stride_0, update = key_states_11_cast_fp16, x = coreml_update_state_14)[name = string("key_cache_internal_tensor_assign_3_cast_fp16")]; + write_state(data = key_cache_internal_tensor_assign_3_cast_fp16, input = key_cache)[name = string("coreml_update_state_16_write_state")]; + tensor coreml_update_state_16 = read_state(input = key_cache)[name = string("coreml_update_state_16")]; + tensor value_cache_internal_tensor_assign_3_stride_0 = const()[name = string("value_cache_internal_tensor_assign_3_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor value_cache_internal_tensor_assign_3_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_3_begin_mask_0"), val = tensor([false, false, false, true])]; + tensor value_cache_internal_tensor_assign_3_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_3_end_mask_0"), val = tensor([false, true, false, true])]; + tensor value_cache_internal_tensor_assign_3_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_3_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor value_states_11_cast_fp16 = transpose(perm = value_states_11_perm_0, x = var_763_cast_fp16)[name = string("transpose_10")]; + tensor value_cache_internal_tensor_assign_3_cast_fp16 = slice_update(begin = concat_19, begin_mask = value_cache_internal_tensor_assign_3_begin_mask_0, end = concat_20, end_mask = value_cache_internal_tensor_assign_3_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_3_squeeze_mask_0, stride = value_cache_internal_tensor_assign_3_stride_0, update = value_states_11_cast_fp16, x = coreml_update_state_15)[name = string("value_cache_internal_tensor_assign_3_cast_fp16")]; + write_state(data = value_cache_internal_tensor_assign_3_cast_fp16, input = value_cache)[name = string("coreml_update_state_17_write_state")]; + tensor coreml_update_state_17 = read_state(input = value_cache)[name = string("coreml_update_state_17")]; + tensor var_827_begin_0 = const()[name = string("op_827_begin_0"), val = tensor([2, 0, 0, 0])]; + tensor var_827_end_0 = const()[name = string("op_827_end_0"), val = tensor([3, 2, 2048, 64])]; + tensor var_827_end_mask_0 = const()[name = string("op_827_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_827_cast_fp16 = slice_by_index(begin = var_827_begin_0, end = var_827_end_0, end_mask = var_827_end_mask_0, x = coreml_update_state_16)[name = string("op_827_cast_fp16")]; + tensor tile_4 = const()[name = string("tile_4"), val = tensor([1, 1])]; + int32 var_830_axis_0 = const()[name = string("op_830_axis_0"), val = int32(1)]; + tensor var_830_cast_fp16_0, tensor var_830_cast_fp16_1 = split(axis = var_830_axis_0, split_sizes = tile_4, x = var_827_cast_fp16)[name = string("op_830_cast_fp16")]; + tensor var_837_begin_0 = const()[name = string("op_837_begin_0"), val = tensor([2, 0, 0, 0])]; + tensor var_837_end_0 = const()[name = string("op_837_end_0"), val = tensor([3, 2, 2048, 64])]; + tensor var_837_end_mask_0 = const()[name = string("op_837_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_837_cast_fp16 = slice_by_index(begin = var_837_begin_0, end = var_837_end_0, end_mask = var_837_end_mask_0, x = coreml_update_state_17)[name = string("op_837_cast_fp16")]; + tensor tile_5 = const()[name = string("tile_5"), val = tensor([1, 1])]; + int32 var_840_axis_0 = const()[name = string("op_840_axis_0"), val = int32(1)]; + tensor var_840_cast_fp16_0, tensor var_840_cast_fp16_1 = split(axis = var_840_axis_0, split_sizes = tile_5, x = var_837_cast_fp16)[name = string("op_840_cast_fp16")]; + tensor var_843_split_sizes_0 = const()[name = string("op_843_split_sizes_0"), val = tensor([8, 8])]; + int32 var_843_axis_0 = const()[name = string("op_843_axis_0"), val = int32(1)]; + tensor var_843_cast_fp16_0, tensor var_843_cast_fp16_1 = split(axis = var_843_axis_0, split_sizes = var_843_split_sizes_0, x = query_states_11_cast_fp16)[name = string("op_843_cast_fp16")]; + bool attn_weights_33_transpose_x_0 = const()[name = string("attn_weights_33_transpose_x_0"), val = bool(false)]; + bool attn_weights_33_transpose_y_0 = const()[name = string("attn_weights_33_transpose_y_0"), val = bool(false)]; + tensor attn_weights_33_cast_fp16 = matmul(transpose_x = attn_weights_33_transpose_x_0, transpose_y = attn_weights_33_transpose_y_0, x = var_830_cast_fp16_0, y = var_843_cast_fp16_0)[name = string("attn_weights_33_cast_fp16")]; + fp16 _inversed_attn_weights_35_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_35_y_0_to_fp16"), val = fp16(0x1p-3)]; + tensor _inversed_attn_weights_35_cast_fp16 = mul(x = attn_weights_33_cast_fp16, y = _inversed_attn_weights_35_y_0_to_fp16)[name = string("_inversed_attn_weights_35_cast_fp16")]; + tensor attn_weights_37_cast_fp16 = add(x = _inversed_attn_weights_35_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_37_cast_fp16")]; + int32 var_850 = const()[name = string("op_850"), val = int32(2)]; + tensor attn_weights_39_cast_fp16 = softmax(axis = var_850, x = attn_weights_37_cast_fp16)[name = string("attn_weights_39_cast_fp16")]; + bool var_856_transpose_x_1 = const()[name = string("op_856_transpose_x_1"), val = bool(true)]; + bool var_856_transpose_y_1 = const()[name = string("op_856_transpose_y_1"), val = bool(false)]; + tensor var_856_cast_fp16 = matmul(transpose_x = var_856_transpose_x_1, transpose_y = var_856_transpose_y_1, x = attn_weights_39_cast_fp16, y = var_840_cast_fp16_0)[name = string("op_856_cast_fp16")]; + bool attn_weights_41_transpose_x_0 = const()[name = string("attn_weights_41_transpose_x_0"), val = bool(false)]; + bool attn_weights_41_transpose_y_0 = const()[name = string("attn_weights_41_transpose_y_0"), val = bool(false)]; + tensor attn_weights_41_cast_fp16 = matmul(transpose_x = attn_weights_41_transpose_x_0, transpose_y = attn_weights_41_transpose_y_0, x = var_830_cast_fp16_1, y = var_843_cast_fp16_1)[name = string("attn_weights_41_cast_fp16")]; + fp16 _inversed_attn_weights_43_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_43_y_0_to_fp16"), val = fp16(0x1p-3)]; + tensor _inversed_attn_weights_43_cast_fp16 = mul(x = attn_weights_41_cast_fp16, y = _inversed_attn_weights_43_y_0_to_fp16)[name = string("_inversed_attn_weights_43_cast_fp16")]; + tensor attn_weights_45_cast_fp16 = add(x = _inversed_attn_weights_43_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_45_cast_fp16")]; + int32 var_862 = const()[name = string("op_862"), val = int32(2)]; + tensor attn_weights_47_cast_fp16 = softmax(axis = var_862, x = attn_weights_45_cast_fp16)[name = string("attn_weights_47_cast_fp16")]; + bool attn_output_13_transpose_x_1 = const()[name = string("attn_output_13_transpose_x_1"), val = bool(true)]; + bool attn_output_13_transpose_y_1 = const()[name = string("attn_output_13_transpose_y_1"), val = bool(false)]; + tensor attn_output_13_cast_fp16 = matmul(transpose_x = attn_output_13_transpose_x_1, transpose_y = attn_output_13_transpose_y_1, x = attn_weights_47_cast_fp16, y = var_840_cast_fp16_1)[name = string("attn_output_13_cast_fp16")]; + int32 var_870 = const()[name = string("op_870"), val = int32(1)]; + bool attn_output_15_interleave_0 = const()[name = string("attn_output_15_interleave_0"), val = bool(false)]; + tensor attn_output_15_cast_fp16 = concat(axis = var_870, interleave = attn_output_15_interleave_0, values = (var_856_cast_fp16, attn_output_13_cast_fp16))[name = string("attn_output_15_cast_fp16")]; + tensor var_874_perm_0 = const()[name = string("op_874_perm_0"), val = tensor([0, 1, 3, 2])]; + tensor var_879 = const()[name = string("op_879"), val = tensor([1, 1024, 1, 1])]; + tensor var_874_cast_fp16 = transpose(perm = var_874_perm_0, x = attn_output_15_cast_fp16)[name = string("transpose_9")]; + tensor x_41_cast_fp16 = reshape(shape = var_879, x = var_874_cast_fp16)[name = string("x_41_cast_fp16")]; + string hidden_states_15_pad_type_0 = const()[name = string("hidden_states_15_pad_type_0"), val = string("valid")]; + tensor hidden_states_15_strides_0 = const()[name = string("hidden_states_15_strides_0"), val = tensor([1, 1])]; + tensor hidden_states_15_pad_0 = const()[name = string("hidden_states_15_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor hidden_states_15_dilations_0 = const()[name = string("hidden_states_15_dilations_0"), val = tensor([1, 1])]; + int32 hidden_states_15_groups_0 = const()[name = string("hidden_states_15_groups_0"), val = int32(1)]; + tensor var_886_to_fp16 = const()[name = string("op_886_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(70813376)))]; + tensor hidden_states_15_cast_fp16 = conv(dilations = hidden_states_15_dilations_0, groups = hidden_states_15_groups_0, pad = hidden_states_15_pad_0, pad_type = hidden_states_15_pad_type_0, strides = hidden_states_15_strides_0, weight = var_886_to_fp16, x = x_41_cast_fp16)[name = string("hidden_states_15_cast_fp16")]; + tensor x_43_cast_fp16 = add(x = x_35_cast_fp16, y = hidden_states_15_cast_fp16)[name = string("x_43_cast_fp16")]; + int32 var_898 = const()[name = string("op_898"), val = int32(1)]; + fp16 const_33_promoted_to_fp16 = const()[name = string("const_33_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_901_cast_fp16 = mul(x = x_43_cast_fp16, y = const_33_promoted_to_fp16)[name = string("op_901_cast_fp16")]; + bool x_45_interleave_0 = const()[name = string("x_45_interleave_0"), val = bool(false)]; + tensor x_45_cast_fp16 = concat(axis = var_898, interleave = x_45_interleave_0, values = (x_43_cast_fp16, var_901_cast_fp16))[name = string("x_45_cast_fp16")]; + tensor out_31_axes_0 = const()[name = string("out_31_axes_0"), val = tensor([1])]; + fp16 var_911_to_fp16 = const()[name = string("op_911_to_fp16"), val = fp16(0x1.5p-17)]; + tensor out_31_cast_fp16 = layer_norm(axes = out_31_axes_0, epsilon = var_911_to_fp16, x = x_45_cast_fp16)[name = string("out_31_cast_fp16")]; + tensor layer_layers_2_post_attention_layernorm_weight_to_fp16 = const()[name = string("layer_layers_2_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(72910592)))]; + tensor out_33_cast_fp16 = mul(x = out_31_cast_fp16, y = layer_layers_2_post_attention_layernorm_weight_to_fp16)[name = string("out_33_cast_fp16")]; + tensor var_917_split_sizes_0 = const()[name = string("op_917_split_sizes_0"), val = tensor([1024, 1024])]; + int32 var_917_axis_0 = const()[name = string("op_917_axis_0"), val = int32(1)]; + tensor var_917_cast_fp16_0, tensor var_917_cast_fp16_1 = split(axis = var_917_axis_0, split_sizes = var_917_split_sizes_0, x = out_33_cast_fp16)[name = string("op_917_cast_fp16")]; + string input_5_pad_type_0 = const()[name = string("input_5_pad_type_0"), val = string("valid")]; + tensor input_5_strides_0 = const()[name = string("input_5_strides_0"), val = tensor([1, 1])]; + tensor input_5_pad_0 = const()[name = string("input_5_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor input_5_dilations_0 = const()[name = string("input_5_dilations_0"), val = tensor([1, 1])]; + int32 input_5_groups_0 = const()[name = string("input_5_groups_0"), val = int32(1)]; + tensor var_922_to_fp16 = const()[name = string("op_922_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(72914752)))]; + tensor input_5_cast_fp16 = conv(dilations = input_5_dilations_0, groups = input_5_groups_0, pad = input_5_pad_0, pad_type = input_5_pad_type_0, strides = input_5_strides_0, weight = var_922_to_fp16, x = var_917_cast_fp16_0)[name = string("input_5_cast_fp16")]; + tensor var_933_cast_fp16 = silu(x = input_5_cast_fp16)[name = string("op_933_cast_fp16")]; + string var_938_pad_type_0 = const()[name = string("op_938_pad_type_0"), val = string("valid")]; + tensor var_938_strides_0 = const()[name = string("op_938_strides_0"), val = tensor([1, 1])]; + tensor var_938_pad_0 = const()[name = string("op_938_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_938_dilations_0 = const()[name = string("op_938_dilations_0"), val = tensor([1, 1])]; + int32 var_938_groups_0 = const()[name = string("op_938_groups_0"), val = int32(1)]; + tensor var_921_to_fp16 = const()[name = string("op_921_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(81303424)))]; + tensor var_938_cast_fp16 = conv(dilations = var_938_dilations_0, groups = var_938_groups_0, pad = var_938_pad_0, pad_type = var_938_pad_type_0, strides = var_938_strides_0, weight = var_921_to_fp16, x = var_917_cast_fp16_0)[name = string("op_938_cast_fp16")]; + tensor x_51_cast_fp16 = mul(x = var_933_cast_fp16, y = var_938_cast_fp16)[name = string("x_51_cast_fp16")]; + string hidden_states_17_pad_type_0 = const()[name = string("hidden_states_17_pad_type_0"), val = string("valid")]; + tensor hidden_states_17_strides_0 = const()[name = string("hidden_states_17_strides_0"), val = tensor([1, 1])]; + tensor hidden_states_17_pad_0 = const()[name = string("hidden_states_17_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor hidden_states_17_dilations_0 = const()[name = string("hidden_states_17_dilations_0"), val = tensor([1, 1])]; + int32 hidden_states_17_groups_0 = const()[name = string("hidden_states_17_groups_0"), val = int32(1)]; + tensor var_920_to_fp16 = const()[name = string("op_920_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(89692096)))]; + tensor hidden_states_17_cast_fp16 = conv(dilations = hidden_states_17_dilations_0, groups = hidden_states_17_groups_0, pad = hidden_states_17_pad_0, pad_type = hidden_states_17_pad_type_0, strides = hidden_states_17_strides_0, weight = var_920_to_fp16, x = x_51_cast_fp16)[name = string("hidden_states_17_cast_fp16")]; + tensor x_53_cast_fp16 = add(x = x_43_cast_fp16, y = hidden_states_17_cast_fp16)[name = string("x_53_cast_fp16")]; + int32 var_951 = const()[name = string("op_951"), val = int32(1)]; + fp16 const_34_promoted_to_fp16 = const()[name = string("const_34_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_954_cast_fp16 = mul(x = x_53_cast_fp16, y = const_34_promoted_to_fp16)[name = string("op_954_cast_fp16")]; + bool x_55_interleave_0 = const()[name = string("x_55_interleave_0"), val = bool(false)]; + tensor x_55_cast_fp16 = concat(axis = var_951, interleave = x_55_interleave_0, values = (x_53_cast_fp16, var_954_cast_fp16))[name = string("x_55_cast_fp16")]; + tensor out_37_axes_0 = const()[name = string("out_37_axes_0"), val = tensor([1])]; + fp16 var_964_to_fp16 = const()[name = string("op_964_to_fp16"), val = fp16(0x1.5p-17)]; + tensor out_37_cast_fp16 = layer_norm(axes = out_37_axes_0, epsilon = var_964_to_fp16, x = x_55_cast_fp16)[name = string("out_37_cast_fp16")]; + tensor layer_layers_3_input_layernorm_weight_to_fp16 = const()[name = string("layer_layers_3_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(98080768)))]; + tensor out_39_cast_fp16 = mul(x = out_37_cast_fp16, y = layer_layers_3_input_layernorm_weight_to_fp16)[name = string("out_39_cast_fp16")]; + tensor var_970_split_sizes_0 = const()[name = string("op_970_split_sizes_0"), val = tensor([1024, 1024])]; + int32 var_970_axis_0 = const()[name = string("op_970_axis_0"), val = int32(1)]; + tensor var_970_cast_fp16_0, tensor var_970_cast_fp16_1 = split(axis = var_970_axis_0, split_sizes = var_970_split_sizes_0, x = out_39_cast_fp16)[name = string("op_970_cast_fp16")]; + string query_states_13_pad_type_0 = const()[name = string("query_states_13_pad_type_0"), val = string("valid")]; + tensor query_states_13_strides_0 = const()[name = string("query_states_13_strides_0"), val = tensor([1, 1])]; + tensor query_states_13_pad_0 = const()[name = string("query_states_13_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor query_states_13_dilations_0 = const()[name = string("query_states_13_dilations_0"), val = tensor([1, 1])]; + int32 query_states_13_groups_0 = const()[name = string("query_states_13_groups_0"), val = int32(1)]; + tensor var_992_to_fp16 = const()[name = string("op_992_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(98084928)))]; + tensor query_states_13_cast_fp16 = conv(dilations = query_states_13_dilations_0, groups = query_states_13_groups_0, pad = query_states_13_pad_0, pad_type = query_states_13_pad_type_0, strides = query_states_13_strides_0, weight = var_992_to_fp16, x = var_970_cast_fp16_0)[name = string("query_states_13_cast_fp16")]; + string key_states_13_pad_type_0 = const()[name = string("key_states_13_pad_type_0"), val = string("valid")]; + tensor key_states_13_strides_0 = const()[name = string("key_states_13_strides_0"), val = tensor([1, 1])]; + tensor key_states_13_pad_0 = const()[name = string("key_states_13_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor key_states_13_dilations_0 = const()[name = string("key_states_13_dilations_0"), val = tensor([1, 1])]; + int32 key_states_13_groups_0 = const()[name = string("key_states_13_groups_0"), val = int32(1)]; + tensor var_1003_to_fp16 = const()[name = string("op_1003_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(100182144)))]; + tensor key_states_13_cast_fp16 = conv(dilations = key_states_13_dilations_0, groups = key_states_13_groups_0, pad = key_states_13_pad_0, pad_type = key_states_13_pad_type_0, strides = key_states_13_strides_0, weight = var_1003_to_fp16, x = var_970_cast_fp16_0)[name = string("key_states_13_cast_fp16")]; + string value_states_13_pad_type_0 = const()[name = string("value_states_13_pad_type_0"), val = string("valid")]; + tensor value_states_13_strides_0 = const()[name = string("value_states_13_strides_0"), val = tensor([1, 1])]; + tensor value_states_13_pad_0 = const()[name = string("value_states_13_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor value_states_13_dilations_0 = const()[name = string("value_states_13_dilations_0"), val = tensor([1, 1])]; + int32 value_states_13_groups_0 = const()[name = string("value_states_13_groups_0"), val = int32(1)]; + tensor var_1014_to_fp16 = const()[name = string("op_1014_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(100444352)))]; + tensor value_states_13_cast_fp16 = conv(dilations = value_states_13_dilations_0, groups = value_states_13_groups_0, pad = value_states_13_pad_0, pad_type = value_states_13_pad_type_0, strides = value_states_13_strides_0, weight = var_1014_to_fp16, x = var_970_cast_fp16_0)[name = string("value_states_13_cast_fp16")]; + tensor var_1022 = const()[name = string("op_1022"), val = tensor([1, 16, 64, 1])]; + tensor embed_13_cast_fp16 = reshape(shape = var_1022, x = query_states_13_cast_fp16)[name = string("embed_13_cast_fp16")]; + tensor var_1026 = const()[name = string("op_1026"), val = tensor([1, 2, 64, 1])]; + tensor var_1027_cast_fp16 = reshape(shape = var_1026, x = key_states_13_cast_fp16)[name = string("op_1027_cast_fp16")]; + tensor embed_15_perm_0 = const()[name = string("embed_15_perm_0"), val = tensor([0, 1, 3, 2])]; + tensor var_1033 = const()[name = string("op_1033"), val = tensor([1, 2, 64, 1])]; + tensor var_1034_cast_fp16 = reshape(shape = var_1033, x = value_states_13_cast_fp16)[name = string("op_1034_cast_fp16")]; + tensor value_states_15_perm_0 = const()[name = string("value_states_15_perm_0"), val = tensor([0, 1, 3, 2])]; + tensor var_1038_cast_fp16 = mul(x = embed_13_cast_fp16, y = cos_1_cast_fp16)[name = string("op_1038_cast_fp16")]; + tensor var_1039_split_sizes_0 = const()[name = string("op_1039_split_sizes_0"), val = tensor([32, 32])]; + int32 var_1039_axis_0 = const()[name = string("op_1039_axis_0"), val = int32(-2)]; + tensor var_1039_cast_fp16_0, tensor var_1039_cast_fp16_1 = split(axis = var_1039_axis_0, split_sizes = var_1039_split_sizes_0, x = embed_13_cast_fp16)[name = string("op_1039_cast_fp16")]; + fp16 const_37_promoted_to_fp16 = const()[name = string("const_37_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_1041_cast_fp16 = mul(x = var_1039_cast_fp16_1, y = const_37_promoted_to_fp16)[name = string("op_1041_cast_fp16")]; + int32 var_1043 = const()[name = string("op_1043"), val = int32(-2)]; + bool var_1044_interleave_0 = const()[name = string("op_1044_interleave_0"), val = bool(false)]; + tensor var_1044_cast_fp16 = concat(axis = var_1043, interleave = var_1044_interleave_0, values = (var_1041_cast_fp16, var_1039_cast_fp16_0))[name = string("op_1044_cast_fp16")]; + tensor var_1045_cast_fp16 = mul(x = var_1044_cast_fp16, y = sin_1_cast_fp16)[name = string("op_1045_cast_fp16")]; + tensor query_states_15_cast_fp16 = add(x = var_1038_cast_fp16, y = var_1045_cast_fp16)[name = string("query_states_15_cast_fp16")]; + tensor embed_15_cast_fp16 = transpose(perm = embed_15_perm_0, x = var_1027_cast_fp16)[name = string("transpose_8")]; + tensor var_1048_cast_fp16 = mul(x = embed_15_cast_fp16, y = cos_cast_fp16)[name = string("op_1048_cast_fp16")]; + tensor var_1049_split_sizes_0 = const()[name = string("op_1049_split_sizes_0"), val = tensor([32, 32])]; + int32 var_1049_axis_0 = const()[name = string("op_1049_axis_0"), val = int32(-1)]; + tensor var_1049_cast_fp16_0, tensor var_1049_cast_fp16_1 = split(axis = var_1049_axis_0, split_sizes = var_1049_split_sizes_0, x = embed_15_cast_fp16)[name = string("op_1049_cast_fp16")]; + fp16 const_38_promoted_to_fp16 = const()[name = string("const_38_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_1051_cast_fp16 = mul(x = var_1049_cast_fp16_1, y = const_38_promoted_to_fp16)[name = string("op_1051_cast_fp16")]; + int32 var_1053 = const()[name = string("op_1053"), val = int32(-1)]; + bool var_1054_interleave_0 = const()[name = string("op_1054_interleave_0"), val = bool(false)]; + tensor var_1054_cast_fp16 = concat(axis = var_1053, interleave = var_1054_interleave_0, values = (var_1051_cast_fp16, var_1049_cast_fp16_0))[name = string("op_1054_cast_fp16")]; + tensor var_1055_cast_fp16 = mul(x = var_1054_cast_fp16, y = sin_cast_fp16)[name = string("op_1055_cast_fp16")]; + tensor key_states_15_cast_fp16 = add(x = var_1048_cast_fp16, y = var_1055_cast_fp16)[name = string("key_states_15_cast_fp16")]; + tensor expand_dims_31 = const()[name = string("expand_dims_31"), val = tensor([3])]; + tensor expand_dims_32 = const()[name = string("expand_dims_32"), val = tensor([0])]; + tensor expand_dims_34 = const()[name = string("expand_dims_34"), val = tensor([4])]; + tensor concat_27_values3_0 = const()[name = string("concat_27_values3_0"), val = tensor([0])]; + int32 concat_27_axis_0 = const()[name = string("concat_27_axis_0"), val = int32(0)]; + bool concat_27_interleave_0 = const()[name = string("concat_27_interleave_0"), val = bool(false)]; + tensor concat_27 = concat(axis = concat_27_axis_0, interleave = concat_27_interleave_0, values = (expand_dims_31, expand_dims_32, position_id, concat_27_values3_0))[name = string("concat_27")]; + tensor concat_28_values1_0 = const()[name = string("concat_28_values1_0"), val = tensor([0])]; + tensor concat_28_values3_0 = const()[name = string("concat_28_values3_0"), val = tensor([0])]; + int32 concat_28_axis_0 = const()[name = string("concat_28_axis_0"), val = int32(0)]; + bool concat_28_interleave_0 = const()[name = string("concat_28_interleave_0"), val = bool(false)]; + tensor concat_28 = concat(axis = concat_28_axis_0, interleave = concat_28_interleave_0, values = (expand_dims_34, concat_28_values1_0, var_246, concat_28_values3_0))[name = string("concat_28")]; + tensor key_cache_internal_tensor_assign_4_stride_0 = const()[name = string("key_cache_internal_tensor_assign_4_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor key_cache_internal_tensor_assign_4_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_4_begin_mask_0"), val = tensor([false, false, false, true])]; + tensor key_cache_internal_tensor_assign_4_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_4_end_mask_0"), val = tensor([false, true, false, true])]; + tensor key_cache_internal_tensor_assign_4_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_4_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor key_cache_internal_tensor_assign_4_cast_fp16 = slice_update(begin = concat_27, begin_mask = key_cache_internal_tensor_assign_4_begin_mask_0, end = concat_28, end_mask = key_cache_internal_tensor_assign_4_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_4_squeeze_mask_0, stride = key_cache_internal_tensor_assign_4_stride_0, update = key_states_15_cast_fp16, x = coreml_update_state_16)[name = string("key_cache_internal_tensor_assign_4_cast_fp16")]; + write_state(data = key_cache_internal_tensor_assign_4_cast_fp16, input = key_cache)[name = string("coreml_update_state_18_write_state")]; + tensor coreml_update_state_18 = read_state(input = key_cache)[name = string("coreml_update_state_18")]; + tensor value_cache_internal_tensor_assign_4_stride_0 = const()[name = string("value_cache_internal_tensor_assign_4_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor value_cache_internal_tensor_assign_4_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_4_begin_mask_0"), val = tensor([false, false, false, true])]; + tensor value_cache_internal_tensor_assign_4_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_4_end_mask_0"), val = tensor([false, true, false, true])]; + tensor value_cache_internal_tensor_assign_4_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_4_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor value_states_15_cast_fp16 = transpose(perm = value_states_15_perm_0, x = var_1034_cast_fp16)[name = string("transpose_7")]; + tensor value_cache_internal_tensor_assign_4_cast_fp16 = slice_update(begin = concat_27, begin_mask = value_cache_internal_tensor_assign_4_begin_mask_0, end = concat_28, end_mask = value_cache_internal_tensor_assign_4_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_4_squeeze_mask_0, stride = value_cache_internal_tensor_assign_4_stride_0, update = value_states_15_cast_fp16, x = coreml_update_state_17)[name = string("value_cache_internal_tensor_assign_4_cast_fp16")]; + write_state(data = value_cache_internal_tensor_assign_4_cast_fp16, input = value_cache)[name = string("coreml_update_state_19_write_state")]; + tensor coreml_update_state_19 = read_state(input = value_cache)[name = string("coreml_update_state_19")]; + tensor var_1098_begin_0 = const()[name = string("op_1098_begin_0"), val = tensor([3, 0, 0, 0])]; + tensor var_1098_end_0 = const()[name = string("op_1098_end_0"), val = tensor([4, 2, 2048, 64])]; + tensor var_1098_end_mask_0 = const()[name = string("op_1098_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_1098_cast_fp16 = slice_by_index(begin = var_1098_begin_0, end = var_1098_end_0, end_mask = var_1098_end_mask_0, x = coreml_update_state_18)[name = string("op_1098_cast_fp16")]; + tensor tile_6 = const()[name = string("tile_6"), val = tensor([1, 1])]; + int32 var_1101_axis_0 = const()[name = string("op_1101_axis_0"), val = int32(1)]; + tensor var_1101_cast_fp16_0, tensor var_1101_cast_fp16_1 = split(axis = var_1101_axis_0, split_sizes = tile_6, x = var_1098_cast_fp16)[name = string("op_1101_cast_fp16")]; + tensor var_1108_begin_0 = const()[name = string("op_1108_begin_0"), val = tensor([3, 0, 0, 0])]; + tensor var_1108_end_0 = const()[name = string("op_1108_end_0"), val = tensor([4, 2, 2048, 64])]; + tensor var_1108_end_mask_0 = const()[name = string("op_1108_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_1108_cast_fp16 = slice_by_index(begin = var_1108_begin_0, end = var_1108_end_0, end_mask = var_1108_end_mask_0, x = coreml_update_state_19)[name = string("op_1108_cast_fp16")]; + tensor tile_7 = const()[name = string("tile_7"), val = tensor([1, 1])]; + int32 var_1111_axis_0 = const()[name = string("op_1111_axis_0"), val = int32(1)]; + tensor var_1111_cast_fp16_0, tensor var_1111_cast_fp16_1 = split(axis = var_1111_axis_0, split_sizes = tile_7, x = var_1108_cast_fp16)[name = string("op_1111_cast_fp16")]; + tensor var_1114_split_sizes_0 = const()[name = string("op_1114_split_sizes_0"), val = tensor([8, 8])]; + int32 var_1114_axis_0 = const()[name = string("op_1114_axis_0"), val = int32(1)]; + tensor var_1114_cast_fp16_0, tensor var_1114_cast_fp16_1 = split(axis = var_1114_axis_0, split_sizes = var_1114_split_sizes_0, x = query_states_15_cast_fp16)[name = string("op_1114_cast_fp16")]; + bool attn_weights_49_transpose_x_0 = const()[name = string("attn_weights_49_transpose_x_0"), val = bool(false)]; + bool attn_weights_49_transpose_y_0 = const()[name = string("attn_weights_49_transpose_y_0"), val = bool(false)]; + tensor attn_weights_49_cast_fp16 = matmul(transpose_x = attn_weights_49_transpose_x_0, transpose_y = attn_weights_49_transpose_y_0, x = var_1101_cast_fp16_0, y = var_1114_cast_fp16_0)[name = string("attn_weights_49_cast_fp16")]; + fp16 _inversed_attn_weights_51_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_51_y_0_to_fp16"), val = fp16(0x1p-3)]; + tensor _inversed_attn_weights_51_cast_fp16 = mul(x = attn_weights_49_cast_fp16, y = _inversed_attn_weights_51_y_0_to_fp16)[name = string("_inversed_attn_weights_51_cast_fp16")]; + tensor attn_weights_53_cast_fp16 = add(x = _inversed_attn_weights_51_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_53_cast_fp16")]; + int32 var_1121 = const()[name = string("op_1121"), val = int32(2)]; + tensor attn_weights_55_cast_fp16 = softmax(axis = var_1121, x = attn_weights_53_cast_fp16)[name = string("attn_weights_55_cast_fp16")]; + bool var_1127_transpose_x_1 = const()[name = string("op_1127_transpose_x_1"), val = bool(true)]; + bool var_1127_transpose_y_1 = const()[name = string("op_1127_transpose_y_1"), val = bool(false)]; + tensor var_1127_cast_fp16 = matmul(transpose_x = var_1127_transpose_x_1, transpose_y = var_1127_transpose_y_1, x = attn_weights_55_cast_fp16, y = var_1111_cast_fp16_0)[name = string("op_1127_cast_fp16")]; + bool attn_weights_57_transpose_x_0 = const()[name = string("attn_weights_57_transpose_x_0"), val = bool(false)]; + bool attn_weights_57_transpose_y_0 = const()[name = string("attn_weights_57_transpose_y_0"), val = bool(false)]; + tensor attn_weights_57_cast_fp16 = matmul(transpose_x = attn_weights_57_transpose_x_0, transpose_y = attn_weights_57_transpose_y_0, x = var_1101_cast_fp16_1, y = var_1114_cast_fp16_1)[name = string("attn_weights_57_cast_fp16")]; + fp16 _inversed_attn_weights_59_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_59_y_0_to_fp16"), val = fp16(0x1p-3)]; + tensor _inversed_attn_weights_59_cast_fp16 = mul(x = attn_weights_57_cast_fp16, y = _inversed_attn_weights_59_y_0_to_fp16)[name = string("_inversed_attn_weights_59_cast_fp16")]; + tensor attn_weights_61_cast_fp16 = add(x = _inversed_attn_weights_59_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_61_cast_fp16")]; + int32 var_1133 = const()[name = string("op_1133"), val = int32(2)]; + tensor attn_weights_63_cast_fp16 = softmax(axis = var_1133, x = attn_weights_61_cast_fp16)[name = string("attn_weights_63_cast_fp16")]; + bool attn_output_19_transpose_x_1 = const()[name = string("attn_output_19_transpose_x_1"), val = bool(true)]; + bool attn_output_19_transpose_y_1 = const()[name = string("attn_output_19_transpose_y_1"), val = bool(false)]; + tensor attn_output_19_cast_fp16 = matmul(transpose_x = attn_output_19_transpose_x_1, transpose_y = attn_output_19_transpose_y_1, x = attn_weights_63_cast_fp16, y = var_1111_cast_fp16_1)[name = string("attn_output_19_cast_fp16")]; + int32 var_1141 = const()[name = string("op_1141"), val = int32(1)]; + bool attn_output_21_interleave_0 = const()[name = string("attn_output_21_interleave_0"), val = bool(false)]; + tensor attn_output_21_cast_fp16 = concat(axis = var_1141, interleave = attn_output_21_interleave_0, values = (var_1127_cast_fp16, attn_output_19_cast_fp16))[name = string("attn_output_21_cast_fp16")]; + tensor var_1145_perm_0 = const()[name = string("op_1145_perm_0"), val = tensor([0, 1, 3, 2])]; + tensor var_1150 = const()[name = string("op_1150"), val = tensor([1, 1024, 1, 1])]; + tensor var_1145_cast_fp16 = transpose(perm = var_1145_perm_0, x = attn_output_21_cast_fp16)[name = string("transpose_6")]; + tensor x_59_cast_fp16 = reshape(shape = var_1150, x = var_1145_cast_fp16)[name = string("x_59_cast_fp16")]; + string hidden_states_21_pad_type_0 = const()[name = string("hidden_states_21_pad_type_0"), val = string("valid")]; + tensor hidden_states_21_strides_0 = const()[name = string("hidden_states_21_strides_0"), val = tensor([1, 1])]; + tensor hidden_states_21_pad_0 = const()[name = string("hidden_states_21_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor hidden_states_21_dilations_0 = const()[name = string("hidden_states_21_dilations_0"), val = tensor([1, 1])]; + int32 hidden_states_21_groups_0 = const()[name = string("hidden_states_21_groups_0"), val = int32(1)]; + tensor var_1157_to_fp16 = const()[name = string("op_1157_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(100706560)))]; + tensor hidden_states_21_cast_fp16 = conv(dilations = hidden_states_21_dilations_0, groups = hidden_states_21_groups_0, pad = hidden_states_21_pad_0, pad_type = hidden_states_21_pad_type_0, strides = hidden_states_21_strides_0, weight = var_1157_to_fp16, x = x_59_cast_fp16)[name = string("hidden_states_21_cast_fp16")]; + tensor x_61_cast_fp16 = add(x = x_53_cast_fp16, y = hidden_states_21_cast_fp16)[name = string("x_61_cast_fp16")]; + int32 var_1169 = const()[name = string("op_1169"), val = int32(1)]; + fp16 const_43_promoted_to_fp16 = const()[name = string("const_43_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_1172_cast_fp16 = mul(x = x_61_cast_fp16, y = const_43_promoted_to_fp16)[name = string("op_1172_cast_fp16")]; + bool x_63_interleave_0 = const()[name = string("x_63_interleave_0"), val = bool(false)]; + tensor x_63_cast_fp16 = concat(axis = var_1169, interleave = x_63_interleave_0, values = (x_61_cast_fp16, var_1172_cast_fp16))[name = string("x_63_cast_fp16")]; + tensor out_43_axes_0 = const()[name = string("out_43_axes_0"), val = tensor([1])]; + fp16 var_1182_to_fp16 = const()[name = string("op_1182_to_fp16"), val = fp16(0x1.5p-17)]; + tensor out_43_cast_fp16 = layer_norm(axes = out_43_axes_0, epsilon = var_1182_to_fp16, x = x_63_cast_fp16)[name = string("out_43_cast_fp16")]; + tensor layer_layers_3_post_attention_layernorm_weight_to_fp16 = const()[name = string("layer_layers_3_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(102803776)))]; + tensor out_45_cast_fp16 = mul(x = out_43_cast_fp16, y = layer_layers_3_post_attention_layernorm_weight_to_fp16)[name = string("out_45_cast_fp16")]; + tensor var_1188_split_sizes_0 = const()[name = string("op_1188_split_sizes_0"), val = tensor([1024, 1024])]; + int32 var_1188_axis_0 = const()[name = string("op_1188_axis_0"), val = int32(1)]; + tensor var_1188_cast_fp16_0, tensor var_1188_cast_fp16_1 = split(axis = var_1188_axis_0, split_sizes = var_1188_split_sizes_0, x = out_45_cast_fp16)[name = string("op_1188_cast_fp16")]; + string input_7_pad_type_0 = const()[name = string("input_7_pad_type_0"), val = string("valid")]; + tensor input_7_strides_0 = const()[name = string("input_7_strides_0"), val = tensor([1, 1])]; + tensor input_7_pad_0 = const()[name = string("input_7_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor input_7_dilations_0 = const()[name = string("input_7_dilations_0"), val = tensor([1, 1])]; + int32 input_7_groups_0 = const()[name = string("input_7_groups_0"), val = int32(1)]; + tensor var_1193_to_fp16 = const()[name = string("op_1193_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(102807936)))]; + tensor input_7_cast_fp16 = conv(dilations = input_7_dilations_0, groups = input_7_groups_0, pad = input_7_pad_0, pad_type = input_7_pad_type_0, strides = input_7_strides_0, weight = var_1193_to_fp16, x = var_1188_cast_fp16_0)[name = string("input_7_cast_fp16")]; + tensor var_1204_cast_fp16 = silu(x = input_7_cast_fp16)[name = string("op_1204_cast_fp16")]; + string var_1209_pad_type_0 = const()[name = string("op_1209_pad_type_0"), val = string("valid")]; + tensor var_1209_strides_0 = const()[name = string("op_1209_strides_0"), val = tensor([1, 1])]; + tensor var_1209_pad_0 = const()[name = string("op_1209_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1209_dilations_0 = const()[name = string("op_1209_dilations_0"), val = tensor([1, 1])]; + int32 var_1209_groups_0 = const()[name = string("op_1209_groups_0"), val = int32(1)]; + tensor var_1192_to_fp16 = const()[name = string("op_1192_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(111196608)))]; + tensor var_1209_cast_fp16 = conv(dilations = var_1209_dilations_0, groups = var_1209_groups_0, pad = var_1209_pad_0, pad_type = var_1209_pad_type_0, strides = var_1209_strides_0, weight = var_1192_to_fp16, x = var_1188_cast_fp16_0)[name = string("op_1209_cast_fp16")]; + tensor x_69_cast_fp16 = mul(x = var_1204_cast_fp16, y = var_1209_cast_fp16)[name = string("x_69_cast_fp16")]; + string hidden_states_23_pad_type_0 = const()[name = string("hidden_states_23_pad_type_0"), val = string("valid")]; + tensor hidden_states_23_strides_0 = const()[name = string("hidden_states_23_strides_0"), val = tensor([1, 1])]; + tensor hidden_states_23_pad_0 = const()[name = string("hidden_states_23_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor hidden_states_23_dilations_0 = const()[name = string("hidden_states_23_dilations_0"), val = tensor([1, 1])]; + int32 hidden_states_23_groups_0 = const()[name = string("hidden_states_23_groups_0"), val = int32(1)]; + tensor var_1191_to_fp16 = const()[name = string("op_1191_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(119585280)))]; + tensor hidden_states_23_cast_fp16 = conv(dilations = hidden_states_23_dilations_0, groups = hidden_states_23_groups_0, pad = hidden_states_23_pad_0, pad_type = hidden_states_23_pad_type_0, strides = hidden_states_23_strides_0, weight = var_1191_to_fp16, x = x_69_cast_fp16)[name = string("hidden_states_23_cast_fp16")]; + tensor x_71_cast_fp16 = add(x = x_61_cast_fp16, y = hidden_states_23_cast_fp16)[name = string("x_71_cast_fp16")]; + int32 var_1222 = const()[name = string("op_1222"), val = int32(1)]; + fp16 const_44_promoted_to_fp16 = const()[name = string("const_44_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_1225_cast_fp16 = mul(x = x_71_cast_fp16, y = const_44_promoted_to_fp16)[name = string("op_1225_cast_fp16")]; + bool x_73_interleave_0 = const()[name = string("x_73_interleave_0"), val = bool(false)]; + tensor x_73_cast_fp16 = concat(axis = var_1222, interleave = x_73_interleave_0, values = (x_71_cast_fp16, var_1225_cast_fp16))[name = string("x_73_cast_fp16")]; + tensor out_49_axes_0 = const()[name = string("out_49_axes_0"), val = tensor([1])]; + fp16 var_1235_to_fp16 = const()[name = string("op_1235_to_fp16"), val = fp16(0x1.5p-17)]; + tensor out_49_cast_fp16 = layer_norm(axes = out_49_axes_0, epsilon = var_1235_to_fp16, x = x_73_cast_fp16)[name = string("out_49_cast_fp16")]; + tensor layer_layers_4_input_layernorm_weight_to_fp16 = const()[name = string("layer_layers_4_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(127973952)))]; + tensor out_51_cast_fp16 = mul(x = out_49_cast_fp16, y = layer_layers_4_input_layernorm_weight_to_fp16)[name = string("out_51_cast_fp16")]; + tensor var_1241_split_sizes_0 = const()[name = string("op_1241_split_sizes_0"), val = tensor([1024, 1024])]; + int32 var_1241_axis_0 = const()[name = string("op_1241_axis_0"), val = int32(1)]; + tensor var_1241_cast_fp16_0, tensor var_1241_cast_fp16_1 = split(axis = var_1241_axis_0, split_sizes = var_1241_split_sizes_0, x = out_51_cast_fp16)[name = string("op_1241_cast_fp16")]; + string query_states_17_pad_type_0 = const()[name = string("query_states_17_pad_type_0"), val = string("valid")]; + tensor query_states_17_strides_0 = const()[name = string("query_states_17_strides_0"), val = tensor([1, 1])]; + tensor query_states_17_pad_0 = const()[name = string("query_states_17_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor query_states_17_dilations_0 = const()[name = string("query_states_17_dilations_0"), val = tensor([1, 1])]; + int32 query_states_17_groups_0 = const()[name = string("query_states_17_groups_0"), val = int32(1)]; + tensor var_1263_to_fp16 = const()[name = string("op_1263_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(127978112)))]; + tensor query_states_17_cast_fp16 = conv(dilations = query_states_17_dilations_0, groups = query_states_17_groups_0, pad = query_states_17_pad_0, pad_type = query_states_17_pad_type_0, strides = query_states_17_strides_0, weight = var_1263_to_fp16, x = var_1241_cast_fp16_0)[name = string("query_states_17_cast_fp16")]; + string key_states_17_pad_type_0 = const()[name = string("key_states_17_pad_type_0"), val = string("valid")]; + tensor key_states_17_strides_0 = const()[name = string("key_states_17_strides_0"), val = tensor([1, 1])]; + tensor key_states_17_pad_0 = const()[name = string("key_states_17_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor key_states_17_dilations_0 = const()[name = string("key_states_17_dilations_0"), val = tensor([1, 1])]; + int32 key_states_17_groups_0 = const()[name = string("key_states_17_groups_0"), val = int32(1)]; + tensor var_1274_to_fp16 = const()[name = string("op_1274_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(130075328)))]; + tensor key_states_17_cast_fp16 = conv(dilations = key_states_17_dilations_0, groups = key_states_17_groups_0, pad = key_states_17_pad_0, pad_type = key_states_17_pad_type_0, strides = key_states_17_strides_0, weight = var_1274_to_fp16, x = var_1241_cast_fp16_0)[name = string("key_states_17_cast_fp16")]; + string value_states_17_pad_type_0 = const()[name = string("value_states_17_pad_type_0"), val = string("valid")]; + tensor value_states_17_strides_0 = const()[name = string("value_states_17_strides_0"), val = tensor([1, 1])]; + tensor value_states_17_pad_0 = const()[name = string("value_states_17_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor value_states_17_dilations_0 = const()[name = string("value_states_17_dilations_0"), val = tensor([1, 1])]; + int32 value_states_17_groups_0 = const()[name = string("value_states_17_groups_0"), val = int32(1)]; + tensor var_1285_to_fp16 = const()[name = string("op_1285_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(130337536)))]; + tensor value_states_17_cast_fp16 = conv(dilations = value_states_17_dilations_0, groups = value_states_17_groups_0, pad = value_states_17_pad_0, pad_type = value_states_17_pad_type_0, strides = value_states_17_strides_0, weight = var_1285_to_fp16, x = var_1241_cast_fp16_0)[name = string("value_states_17_cast_fp16")]; + tensor var_1293 = const()[name = string("op_1293"), val = tensor([1, 16, 64, 1])]; + tensor embed_17_cast_fp16 = reshape(shape = var_1293, x = query_states_17_cast_fp16)[name = string("embed_17_cast_fp16")]; + tensor var_1297 = const()[name = string("op_1297"), val = tensor([1, 2, 64, 1])]; + tensor var_1298_cast_fp16 = reshape(shape = var_1297, x = key_states_17_cast_fp16)[name = string("op_1298_cast_fp16")]; + tensor embed_19_perm_0 = const()[name = string("embed_19_perm_0"), val = tensor([0, 1, 3, 2])]; + tensor var_1304 = const()[name = string("op_1304"), val = tensor([1, 2, 64, 1])]; + tensor var_1305_cast_fp16 = reshape(shape = var_1304, x = value_states_17_cast_fp16)[name = string("op_1305_cast_fp16")]; + tensor value_states_19_perm_0 = const()[name = string("value_states_19_perm_0"), val = tensor([0, 1, 3, 2])]; + tensor var_1309_cast_fp16 = mul(x = embed_17_cast_fp16, y = cos_1_cast_fp16)[name = string("op_1309_cast_fp16")]; + tensor var_1310_split_sizes_0 = const()[name = string("op_1310_split_sizes_0"), val = tensor([32, 32])]; + int32 var_1310_axis_0 = const()[name = string("op_1310_axis_0"), val = int32(-2)]; + tensor var_1310_cast_fp16_0, tensor var_1310_cast_fp16_1 = split(axis = var_1310_axis_0, split_sizes = var_1310_split_sizes_0, x = embed_17_cast_fp16)[name = string("op_1310_cast_fp16")]; + fp16 const_47_promoted_to_fp16 = const()[name = string("const_47_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_1312_cast_fp16 = mul(x = var_1310_cast_fp16_1, y = const_47_promoted_to_fp16)[name = string("op_1312_cast_fp16")]; + int32 var_1314 = const()[name = string("op_1314"), val = int32(-2)]; + bool var_1315_interleave_0 = const()[name = string("op_1315_interleave_0"), val = bool(false)]; + tensor var_1315_cast_fp16 = concat(axis = var_1314, interleave = var_1315_interleave_0, values = (var_1312_cast_fp16, var_1310_cast_fp16_0))[name = string("op_1315_cast_fp16")]; + tensor var_1316_cast_fp16 = mul(x = var_1315_cast_fp16, y = sin_1_cast_fp16)[name = string("op_1316_cast_fp16")]; + tensor query_states_19_cast_fp16 = add(x = var_1309_cast_fp16, y = var_1316_cast_fp16)[name = string("query_states_19_cast_fp16")]; + tensor embed_19_cast_fp16 = transpose(perm = embed_19_perm_0, x = var_1298_cast_fp16)[name = string("transpose_5")]; + tensor var_1319_cast_fp16 = mul(x = embed_19_cast_fp16, y = cos_cast_fp16)[name = string("op_1319_cast_fp16")]; + tensor var_1320_split_sizes_0 = const()[name = string("op_1320_split_sizes_0"), val = tensor([32, 32])]; + int32 var_1320_axis_0 = const()[name = string("op_1320_axis_0"), val = int32(-1)]; + tensor var_1320_cast_fp16_0, tensor var_1320_cast_fp16_1 = split(axis = var_1320_axis_0, split_sizes = var_1320_split_sizes_0, x = embed_19_cast_fp16)[name = string("op_1320_cast_fp16")]; + fp16 const_48_promoted_to_fp16 = const()[name = string("const_48_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_1322_cast_fp16 = mul(x = var_1320_cast_fp16_1, y = const_48_promoted_to_fp16)[name = string("op_1322_cast_fp16")]; + int32 var_1324 = const()[name = string("op_1324"), val = int32(-1)]; + bool var_1325_interleave_0 = const()[name = string("op_1325_interleave_0"), val = bool(false)]; + tensor var_1325_cast_fp16 = concat(axis = var_1324, interleave = var_1325_interleave_0, values = (var_1322_cast_fp16, var_1320_cast_fp16_0))[name = string("op_1325_cast_fp16")]; + tensor var_1326_cast_fp16 = mul(x = var_1325_cast_fp16, y = sin_cast_fp16)[name = string("op_1326_cast_fp16")]; + tensor key_states_19_cast_fp16 = add(x = var_1319_cast_fp16, y = var_1326_cast_fp16)[name = string("key_states_19_cast_fp16")]; + tensor expand_dims_41 = const()[name = string("expand_dims_41"), val = tensor([4])]; + tensor expand_dims_42 = const()[name = string("expand_dims_42"), val = tensor([0])]; + tensor expand_dims_44 = const()[name = string("expand_dims_44"), val = tensor([5])]; + tensor concat_35_values3_0 = const()[name = string("concat_35_values3_0"), val = tensor([0])]; + int32 concat_35_axis_0 = const()[name = string("concat_35_axis_0"), val = int32(0)]; + bool concat_35_interleave_0 = const()[name = string("concat_35_interleave_0"), val = bool(false)]; + tensor concat_35 = concat(axis = concat_35_axis_0, interleave = concat_35_interleave_0, values = (expand_dims_41, expand_dims_42, position_id, concat_35_values3_0))[name = string("concat_35")]; + tensor concat_36_values1_0 = const()[name = string("concat_36_values1_0"), val = tensor([0])]; + tensor concat_36_values3_0 = const()[name = string("concat_36_values3_0"), val = tensor([0])]; + int32 concat_36_axis_0 = const()[name = string("concat_36_axis_0"), val = int32(0)]; + bool concat_36_interleave_0 = const()[name = string("concat_36_interleave_0"), val = bool(false)]; + tensor concat_36 = concat(axis = concat_36_axis_0, interleave = concat_36_interleave_0, values = (expand_dims_44, concat_36_values1_0, var_246, concat_36_values3_0))[name = string("concat_36")]; + tensor key_cache_internal_tensor_assign_5_stride_0 = const()[name = string("key_cache_internal_tensor_assign_5_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor key_cache_internal_tensor_assign_5_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_5_begin_mask_0"), val = tensor([false, false, false, true])]; + tensor key_cache_internal_tensor_assign_5_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_5_end_mask_0"), val = tensor([false, true, false, true])]; + tensor key_cache_internal_tensor_assign_5_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_5_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor key_cache_internal_tensor_assign_5_cast_fp16 = slice_update(begin = concat_35, begin_mask = key_cache_internal_tensor_assign_5_begin_mask_0, end = concat_36, end_mask = key_cache_internal_tensor_assign_5_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_5_squeeze_mask_0, stride = key_cache_internal_tensor_assign_5_stride_0, update = key_states_19_cast_fp16, x = coreml_update_state_18)[name = string("key_cache_internal_tensor_assign_5_cast_fp16")]; + write_state(data = key_cache_internal_tensor_assign_5_cast_fp16, input = key_cache)[name = string("coreml_update_state_20_write_state")]; + tensor coreml_update_state_20 = read_state(input = key_cache)[name = string("coreml_update_state_20")]; + tensor value_cache_internal_tensor_assign_5_stride_0 = const()[name = string("value_cache_internal_tensor_assign_5_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor value_cache_internal_tensor_assign_5_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_5_begin_mask_0"), val = tensor([false, false, false, true])]; + tensor value_cache_internal_tensor_assign_5_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_5_end_mask_0"), val = tensor([false, true, false, true])]; + tensor value_cache_internal_tensor_assign_5_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_5_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor value_states_19_cast_fp16 = transpose(perm = value_states_19_perm_0, x = var_1305_cast_fp16)[name = string("transpose_4")]; + tensor value_cache_internal_tensor_assign_5_cast_fp16 = slice_update(begin = concat_35, begin_mask = value_cache_internal_tensor_assign_5_begin_mask_0, end = concat_36, end_mask = value_cache_internal_tensor_assign_5_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_5_squeeze_mask_0, stride = value_cache_internal_tensor_assign_5_stride_0, update = value_states_19_cast_fp16, x = coreml_update_state_19)[name = string("value_cache_internal_tensor_assign_5_cast_fp16")]; + write_state(data = value_cache_internal_tensor_assign_5_cast_fp16, input = value_cache)[name = string("coreml_update_state_21_write_state")]; + tensor coreml_update_state_21 = read_state(input = value_cache)[name = string("coreml_update_state_21")]; + tensor var_1369_begin_0 = const()[name = string("op_1369_begin_0"), val = tensor([4, 0, 0, 0])]; + tensor var_1369_end_0 = const()[name = string("op_1369_end_0"), val = tensor([5, 2, 2048, 64])]; + tensor var_1369_end_mask_0 = const()[name = string("op_1369_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_1369_cast_fp16 = slice_by_index(begin = var_1369_begin_0, end = var_1369_end_0, end_mask = var_1369_end_mask_0, x = coreml_update_state_20)[name = string("op_1369_cast_fp16")]; + tensor tile_8 = const()[name = string("tile_8"), val = tensor([1, 1])]; + int32 var_1372_axis_0 = const()[name = string("op_1372_axis_0"), val = int32(1)]; + tensor var_1372_cast_fp16_0, tensor var_1372_cast_fp16_1 = split(axis = var_1372_axis_0, split_sizes = tile_8, x = var_1369_cast_fp16)[name = string("op_1372_cast_fp16")]; + tensor var_1379_begin_0 = const()[name = string("op_1379_begin_0"), val = tensor([4, 0, 0, 0])]; + tensor var_1379_end_0 = const()[name = string("op_1379_end_0"), val = tensor([5, 2, 2048, 64])]; + tensor var_1379_end_mask_0 = const()[name = string("op_1379_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_1379_cast_fp16 = slice_by_index(begin = var_1379_begin_0, end = var_1379_end_0, end_mask = var_1379_end_mask_0, x = coreml_update_state_21)[name = string("op_1379_cast_fp16")]; + tensor tile_9 = const()[name = string("tile_9"), val = tensor([1, 1])]; + int32 var_1382_axis_0 = const()[name = string("op_1382_axis_0"), val = int32(1)]; + tensor var_1382_cast_fp16_0, tensor var_1382_cast_fp16_1 = split(axis = var_1382_axis_0, split_sizes = tile_9, x = var_1379_cast_fp16)[name = string("op_1382_cast_fp16")]; + tensor var_1385_split_sizes_0 = const()[name = string("op_1385_split_sizes_0"), val = tensor([8, 8])]; + int32 var_1385_axis_0 = const()[name = string("op_1385_axis_0"), val = int32(1)]; + tensor var_1385_cast_fp16_0, tensor var_1385_cast_fp16_1 = split(axis = var_1385_axis_0, split_sizes = var_1385_split_sizes_0, x = query_states_19_cast_fp16)[name = string("op_1385_cast_fp16")]; + bool attn_weights_65_transpose_x_0 = const()[name = string("attn_weights_65_transpose_x_0"), val = bool(false)]; + bool attn_weights_65_transpose_y_0 = const()[name = string("attn_weights_65_transpose_y_0"), val = bool(false)]; + tensor attn_weights_65_cast_fp16 = matmul(transpose_x = attn_weights_65_transpose_x_0, transpose_y = attn_weights_65_transpose_y_0, x = var_1372_cast_fp16_0, y = var_1385_cast_fp16_0)[name = string("attn_weights_65_cast_fp16")]; + fp16 _inversed_attn_weights_67_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_67_y_0_to_fp16"), val = fp16(0x1p-3)]; + tensor _inversed_attn_weights_67_cast_fp16 = mul(x = attn_weights_65_cast_fp16, y = _inversed_attn_weights_67_y_0_to_fp16)[name = string("_inversed_attn_weights_67_cast_fp16")]; + tensor attn_weights_69_cast_fp16 = add(x = _inversed_attn_weights_67_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_69_cast_fp16")]; + int32 var_1392 = const()[name = string("op_1392"), val = int32(2)]; + tensor attn_weights_71_cast_fp16 = softmax(axis = var_1392, x = attn_weights_69_cast_fp16)[name = string("attn_weights_71_cast_fp16")]; + bool var_1398_transpose_x_1 = const()[name = string("op_1398_transpose_x_1"), val = bool(true)]; + bool var_1398_transpose_y_1 = const()[name = string("op_1398_transpose_y_1"), val = bool(false)]; + tensor var_1398_cast_fp16 = matmul(transpose_x = var_1398_transpose_x_1, transpose_y = var_1398_transpose_y_1, x = attn_weights_71_cast_fp16, y = var_1382_cast_fp16_0)[name = string("op_1398_cast_fp16")]; + bool attn_weights_73_transpose_x_0 = const()[name = string("attn_weights_73_transpose_x_0"), val = bool(false)]; + bool attn_weights_73_transpose_y_0 = const()[name = string("attn_weights_73_transpose_y_0"), val = bool(false)]; + tensor attn_weights_73_cast_fp16 = matmul(transpose_x = attn_weights_73_transpose_x_0, transpose_y = attn_weights_73_transpose_y_0, x = var_1372_cast_fp16_1, y = var_1385_cast_fp16_1)[name = string("attn_weights_73_cast_fp16")]; + fp16 _inversed_attn_weights_75_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_75_y_0_to_fp16"), val = fp16(0x1p-3)]; + tensor _inversed_attn_weights_75_cast_fp16 = mul(x = attn_weights_73_cast_fp16, y = _inversed_attn_weights_75_y_0_to_fp16)[name = string("_inversed_attn_weights_75_cast_fp16")]; + tensor attn_weights_77_cast_fp16 = add(x = _inversed_attn_weights_75_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_77_cast_fp16")]; + int32 var_1404 = const()[name = string("op_1404"), val = int32(2)]; + tensor attn_weights_79_cast_fp16 = softmax(axis = var_1404, x = attn_weights_77_cast_fp16)[name = string("attn_weights_79_cast_fp16")]; + bool attn_output_25_transpose_x_1 = const()[name = string("attn_output_25_transpose_x_1"), val = bool(true)]; + bool attn_output_25_transpose_y_1 = const()[name = string("attn_output_25_transpose_y_1"), val = bool(false)]; + tensor attn_output_25_cast_fp16 = matmul(transpose_x = attn_output_25_transpose_x_1, transpose_y = attn_output_25_transpose_y_1, x = attn_weights_79_cast_fp16, y = var_1382_cast_fp16_1)[name = string("attn_output_25_cast_fp16")]; + int32 var_1412 = const()[name = string("op_1412"), val = int32(1)]; + bool attn_output_27_interleave_0 = const()[name = string("attn_output_27_interleave_0"), val = bool(false)]; + tensor attn_output_27_cast_fp16 = concat(axis = var_1412, interleave = attn_output_27_interleave_0, values = (var_1398_cast_fp16, attn_output_25_cast_fp16))[name = string("attn_output_27_cast_fp16")]; + tensor var_1416_perm_0 = const()[name = string("op_1416_perm_0"), val = tensor([0, 1, 3, 2])]; + tensor var_1421 = const()[name = string("op_1421"), val = tensor([1, 1024, 1, 1])]; + tensor var_1416_cast_fp16 = transpose(perm = var_1416_perm_0, x = attn_output_27_cast_fp16)[name = string("transpose_3")]; + tensor x_77_cast_fp16 = reshape(shape = var_1421, x = var_1416_cast_fp16)[name = string("x_77_cast_fp16")]; + string hidden_states_27_pad_type_0 = const()[name = string("hidden_states_27_pad_type_0"), val = string("valid")]; + tensor hidden_states_27_strides_0 = const()[name = string("hidden_states_27_strides_0"), val = tensor([1, 1])]; + tensor hidden_states_27_pad_0 = const()[name = string("hidden_states_27_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor hidden_states_27_dilations_0 = const()[name = string("hidden_states_27_dilations_0"), val = tensor([1, 1])]; + int32 hidden_states_27_groups_0 = const()[name = string("hidden_states_27_groups_0"), val = int32(1)]; + tensor var_1428_to_fp16 = const()[name = string("op_1428_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(130599744)))]; + tensor hidden_states_27_cast_fp16 = conv(dilations = hidden_states_27_dilations_0, groups = hidden_states_27_groups_0, pad = hidden_states_27_pad_0, pad_type = hidden_states_27_pad_type_0, strides = hidden_states_27_strides_0, weight = var_1428_to_fp16, x = x_77_cast_fp16)[name = string("hidden_states_27_cast_fp16")]; + tensor x_79_cast_fp16 = add(x = x_71_cast_fp16, y = hidden_states_27_cast_fp16)[name = string("x_79_cast_fp16")]; + int32 var_1440 = const()[name = string("op_1440"), val = int32(1)]; + fp16 const_53_promoted_to_fp16 = const()[name = string("const_53_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_1443_cast_fp16 = mul(x = x_79_cast_fp16, y = const_53_promoted_to_fp16)[name = string("op_1443_cast_fp16")]; + bool x_81_interleave_0 = const()[name = string("x_81_interleave_0"), val = bool(false)]; + tensor x_81_cast_fp16 = concat(axis = var_1440, interleave = x_81_interleave_0, values = (x_79_cast_fp16, var_1443_cast_fp16))[name = string("x_81_cast_fp16")]; + tensor out_55_axes_0 = const()[name = string("out_55_axes_0"), val = tensor([1])]; + fp16 var_1453_to_fp16 = const()[name = string("op_1453_to_fp16"), val = fp16(0x1.5p-17)]; + tensor out_55_cast_fp16 = layer_norm(axes = out_55_axes_0, epsilon = var_1453_to_fp16, x = x_81_cast_fp16)[name = string("out_55_cast_fp16")]; + tensor layer_layers_4_post_attention_layernorm_weight_to_fp16 = const()[name = string("layer_layers_4_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(132696960)))]; + tensor out_57_cast_fp16 = mul(x = out_55_cast_fp16, y = layer_layers_4_post_attention_layernorm_weight_to_fp16)[name = string("out_57_cast_fp16")]; + tensor var_1459_split_sizes_0 = const()[name = string("op_1459_split_sizes_0"), val = tensor([1024, 1024])]; + int32 var_1459_axis_0 = const()[name = string("op_1459_axis_0"), val = int32(1)]; + tensor var_1459_cast_fp16_0, tensor var_1459_cast_fp16_1 = split(axis = var_1459_axis_0, split_sizes = var_1459_split_sizes_0, x = out_57_cast_fp16)[name = string("op_1459_cast_fp16")]; + string input_9_pad_type_0 = const()[name = string("input_9_pad_type_0"), val = string("valid")]; + tensor input_9_strides_0 = const()[name = string("input_9_strides_0"), val = tensor([1, 1])]; + tensor input_9_pad_0 = const()[name = string("input_9_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor input_9_dilations_0 = const()[name = string("input_9_dilations_0"), val = tensor([1, 1])]; + int32 input_9_groups_0 = const()[name = string("input_9_groups_0"), val = int32(1)]; + tensor var_1464_to_fp16 = const()[name = string("op_1464_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(132701120)))]; + tensor input_9_cast_fp16 = conv(dilations = input_9_dilations_0, groups = input_9_groups_0, pad = input_9_pad_0, pad_type = input_9_pad_type_0, strides = input_9_strides_0, weight = var_1464_to_fp16, x = var_1459_cast_fp16_0)[name = string("input_9_cast_fp16")]; + tensor var_1475_cast_fp16 = silu(x = input_9_cast_fp16)[name = string("op_1475_cast_fp16")]; + string var_1480_pad_type_0 = const()[name = string("op_1480_pad_type_0"), val = string("valid")]; + tensor var_1480_strides_0 = const()[name = string("op_1480_strides_0"), val = tensor([1, 1])]; + tensor var_1480_pad_0 = const()[name = string("op_1480_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1480_dilations_0 = const()[name = string("op_1480_dilations_0"), val = tensor([1, 1])]; + int32 var_1480_groups_0 = const()[name = string("op_1480_groups_0"), val = int32(1)]; + tensor var_1463_to_fp16 = const()[name = string("op_1463_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(141089792)))]; + tensor var_1480_cast_fp16 = conv(dilations = var_1480_dilations_0, groups = var_1480_groups_0, pad = var_1480_pad_0, pad_type = var_1480_pad_type_0, strides = var_1480_strides_0, weight = var_1463_to_fp16, x = var_1459_cast_fp16_0)[name = string("op_1480_cast_fp16")]; + tensor x_87_cast_fp16 = mul(x = var_1475_cast_fp16, y = var_1480_cast_fp16)[name = string("x_87_cast_fp16")]; + string hidden_states_29_pad_type_0 = const()[name = string("hidden_states_29_pad_type_0"), val = string("valid")]; + tensor hidden_states_29_strides_0 = const()[name = string("hidden_states_29_strides_0"), val = tensor([1, 1])]; + tensor hidden_states_29_pad_0 = const()[name = string("hidden_states_29_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor hidden_states_29_dilations_0 = const()[name = string("hidden_states_29_dilations_0"), val = tensor([1, 1])]; + int32 hidden_states_29_groups_0 = const()[name = string("hidden_states_29_groups_0"), val = int32(1)]; + tensor var_1462_to_fp16 = const()[name = string("op_1462_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(149478464)))]; + tensor hidden_states_29_cast_fp16 = conv(dilations = hidden_states_29_dilations_0, groups = hidden_states_29_groups_0, pad = hidden_states_29_pad_0, pad_type = hidden_states_29_pad_type_0, strides = hidden_states_29_strides_0, weight = var_1462_to_fp16, x = x_87_cast_fp16)[name = string("hidden_states_29_cast_fp16")]; + tensor x_89_cast_fp16 = add(x = x_79_cast_fp16, y = hidden_states_29_cast_fp16)[name = string("x_89_cast_fp16")]; + int32 var_1493 = const()[name = string("op_1493"), val = int32(1)]; + fp16 const_54_promoted_to_fp16 = const()[name = string("const_54_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_1496_cast_fp16 = mul(x = x_89_cast_fp16, y = const_54_promoted_to_fp16)[name = string("op_1496_cast_fp16")]; + bool x_91_interleave_0 = const()[name = string("x_91_interleave_0"), val = bool(false)]; + tensor x_91_cast_fp16 = concat(axis = var_1493, interleave = x_91_interleave_0, values = (x_89_cast_fp16, var_1496_cast_fp16))[name = string("x_91_cast_fp16")]; + tensor out_61_axes_0 = const()[name = string("out_61_axes_0"), val = tensor([1])]; + fp16 var_1506_to_fp16 = const()[name = string("op_1506_to_fp16"), val = fp16(0x1.5p-17)]; + tensor out_61_cast_fp16 = layer_norm(axes = out_61_axes_0, epsilon = var_1506_to_fp16, x = x_91_cast_fp16)[name = string("out_61_cast_fp16")]; + tensor layer_layers_5_input_layernorm_weight_to_fp16 = const()[name = string("layer_layers_5_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(157867136)))]; + tensor out_63_cast_fp16 = mul(x = out_61_cast_fp16, y = layer_layers_5_input_layernorm_weight_to_fp16)[name = string("out_63_cast_fp16")]; + tensor var_1512_split_sizes_0 = const()[name = string("op_1512_split_sizes_0"), val = tensor([1024, 1024])]; + int32 var_1512_axis_0 = const()[name = string("op_1512_axis_0"), val = int32(1)]; + tensor var_1512_cast_fp16_0, tensor var_1512_cast_fp16_1 = split(axis = var_1512_axis_0, split_sizes = var_1512_split_sizes_0, x = out_63_cast_fp16)[name = string("op_1512_cast_fp16")]; + string query_states_21_pad_type_0 = const()[name = string("query_states_21_pad_type_0"), val = string("valid")]; + tensor query_states_21_strides_0 = const()[name = string("query_states_21_strides_0"), val = tensor([1, 1])]; + tensor query_states_21_pad_0 = const()[name = string("query_states_21_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor query_states_21_dilations_0 = const()[name = string("query_states_21_dilations_0"), val = tensor([1, 1])]; + int32 query_states_21_groups_0 = const()[name = string("query_states_21_groups_0"), val = int32(1)]; + tensor var_1534_to_fp16 = const()[name = string("op_1534_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(157871296)))]; + tensor query_states_21_cast_fp16 = conv(dilations = query_states_21_dilations_0, groups = query_states_21_groups_0, pad = query_states_21_pad_0, pad_type = query_states_21_pad_type_0, strides = query_states_21_strides_0, weight = var_1534_to_fp16, x = var_1512_cast_fp16_0)[name = string("query_states_21_cast_fp16")]; + string key_states_21_pad_type_0 = const()[name = string("key_states_21_pad_type_0"), val = string("valid")]; + tensor key_states_21_strides_0 = const()[name = string("key_states_21_strides_0"), val = tensor([1, 1])]; + tensor key_states_21_pad_0 = const()[name = string("key_states_21_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor key_states_21_dilations_0 = const()[name = string("key_states_21_dilations_0"), val = tensor([1, 1])]; + int32 key_states_21_groups_0 = const()[name = string("key_states_21_groups_0"), val = int32(1)]; + tensor var_1545_to_fp16 = const()[name = string("op_1545_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(159968512)))]; + tensor key_states_21_cast_fp16 = conv(dilations = key_states_21_dilations_0, groups = key_states_21_groups_0, pad = key_states_21_pad_0, pad_type = key_states_21_pad_type_0, strides = key_states_21_strides_0, weight = var_1545_to_fp16, x = var_1512_cast_fp16_0)[name = string("key_states_21_cast_fp16")]; + string value_states_21_pad_type_0 = const()[name = string("value_states_21_pad_type_0"), val = string("valid")]; + tensor value_states_21_strides_0 = const()[name = string("value_states_21_strides_0"), val = tensor([1, 1])]; + tensor value_states_21_pad_0 = const()[name = string("value_states_21_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor value_states_21_dilations_0 = const()[name = string("value_states_21_dilations_0"), val = tensor([1, 1])]; + int32 value_states_21_groups_0 = const()[name = string("value_states_21_groups_0"), val = int32(1)]; + tensor var_1556_to_fp16 = const()[name = string("op_1556_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(160230720)))]; + tensor value_states_21_cast_fp16 = conv(dilations = value_states_21_dilations_0, groups = value_states_21_groups_0, pad = value_states_21_pad_0, pad_type = value_states_21_pad_type_0, strides = value_states_21_strides_0, weight = var_1556_to_fp16, x = var_1512_cast_fp16_0)[name = string("value_states_21_cast_fp16")]; + tensor var_1564 = const()[name = string("op_1564"), val = tensor([1, 16, 64, 1])]; + tensor embed_21_cast_fp16 = reshape(shape = var_1564, x = query_states_21_cast_fp16)[name = string("embed_21_cast_fp16")]; + tensor var_1568 = const()[name = string("op_1568"), val = tensor([1, 2, 64, 1])]; + tensor var_1569_cast_fp16 = reshape(shape = var_1568, x = key_states_21_cast_fp16)[name = string("op_1569_cast_fp16")]; + tensor embed_perm_0 = const()[name = string("embed_perm_0"), val = tensor([0, 1, 3, 2])]; + tensor var_1575 = const()[name = string("op_1575"), val = tensor([1, 2, 64, 1])]; + tensor var_1576_cast_fp16 = reshape(shape = var_1575, x = value_states_21_cast_fp16)[name = string("op_1576_cast_fp16")]; + tensor value_states_perm_0 = const()[name = string("value_states_perm_0"), val = tensor([0, 1, 3, 2])]; + tensor var_1580_cast_fp16 = mul(x = embed_21_cast_fp16, y = cos_1_cast_fp16)[name = string("op_1580_cast_fp16")]; + tensor var_1581_split_sizes_0 = const()[name = string("op_1581_split_sizes_0"), val = tensor([32, 32])]; + int32 var_1581_axis_0 = const()[name = string("op_1581_axis_0"), val = int32(-2)]; + tensor var_1581_cast_fp16_0, tensor var_1581_cast_fp16_1 = split(axis = var_1581_axis_0, split_sizes = var_1581_split_sizes_0, x = embed_21_cast_fp16)[name = string("op_1581_cast_fp16")]; + fp16 const_57_promoted_to_fp16 = const()[name = string("const_57_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_1583_cast_fp16 = mul(x = var_1581_cast_fp16_1, y = const_57_promoted_to_fp16)[name = string("op_1583_cast_fp16")]; + int32 var_1585 = const()[name = string("op_1585"), val = int32(-2)]; + bool var_1586_interleave_0 = const()[name = string("op_1586_interleave_0"), val = bool(false)]; + tensor var_1586_cast_fp16 = concat(axis = var_1585, interleave = var_1586_interleave_0, values = (var_1583_cast_fp16, var_1581_cast_fp16_0))[name = string("op_1586_cast_fp16")]; + tensor var_1587_cast_fp16 = mul(x = var_1586_cast_fp16, y = sin_1_cast_fp16)[name = string("op_1587_cast_fp16")]; + tensor query_states_cast_fp16 = add(x = var_1580_cast_fp16, y = var_1587_cast_fp16)[name = string("query_states_cast_fp16")]; + tensor embed_cast_fp16 = transpose(perm = embed_perm_0, x = var_1569_cast_fp16)[name = string("transpose_2")]; + tensor var_1590_cast_fp16 = mul(x = embed_cast_fp16, y = cos_cast_fp16)[name = string("op_1590_cast_fp16")]; + tensor var_1591_split_sizes_0 = const()[name = string("op_1591_split_sizes_0"), val = tensor([32, 32])]; + int32 var_1591_axis_0 = const()[name = string("op_1591_axis_0"), val = int32(-1)]; + tensor var_1591_cast_fp16_0, tensor var_1591_cast_fp16_1 = split(axis = var_1591_axis_0, split_sizes = var_1591_split_sizes_0, x = embed_cast_fp16)[name = string("op_1591_cast_fp16")]; + fp16 const_58_promoted_to_fp16 = const()[name = string("const_58_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_1593_cast_fp16 = mul(x = var_1591_cast_fp16_1, y = const_58_promoted_to_fp16)[name = string("op_1593_cast_fp16")]; + int32 var_1595 = const()[name = string("op_1595"), val = int32(-1)]; + bool var_1596_interleave_0 = const()[name = string("op_1596_interleave_0"), val = bool(false)]; + tensor var_1596_cast_fp16 = concat(axis = var_1595, interleave = var_1596_interleave_0, values = (var_1593_cast_fp16, var_1591_cast_fp16_0))[name = string("op_1596_cast_fp16")]; + tensor var_1597_cast_fp16 = mul(x = var_1596_cast_fp16, y = sin_cast_fp16)[name = string("op_1597_cast_fp16")]; + tensor key_states_cast_fp16 = add(x = var_1590_cast_fp16, y = var_1597_cast_fp16)[name = string("key_states_cast_fp16")]; + tensor expand_dims_51 = const()[name = string("expand_dims_51"), val = tensor([5])]; + tensor expand_dims_52 = const()[name = string("expand_dims_52"), val = tensor([0])]; + tensor expand_dims_54 = const()[name = string("expand_dims_54"), val = tensor([6])]; + tensor concat_43_values3_0 = const()[name = string("concat_43_values3_0"), val = tensor([0])]; + int32 concat_43_axis_0 = const()[name = string("concat_43_axis_0"), val = int32(0)]; + bool concat_43_interleave_0 = const()[name = string("concat_43_interleave_0"), val = bool(false)]; + tensor concat_43 = concat(axis = concat_43_axis_0, interleave = concat_43_interleave_0, values = (expand_dims_51, expand_dims_52, position_id, concat_43_values3_0))[name = string("concat_43")]; + tensor concat_44_values1_0 = const()[name = string("concat_44_values1_0"), val = tensor([0])]; + tensor concat_44_values3_0 = const()[name = string("concat_44_values3_0"), val = tensor([0])]; + int32 concat_44_axis_0 = const()[name = string("concat_44_axis_0"), val = int32(0)]; + bool concat_44_interleave_0 = const()[name = string("concat_44_interleave_0"), val = bool(false)]; + tensor concat_44 = concat(axis = concat_44_axis_0, interleave = concat_44_interleave_0, values = (expand_dims_54, concat_44_values1_0, var_246, concat_44_values3_0))[name = string("concat_44")]; + tensor key_cache_internal_tensor_assign_6_stride_0 = const()[name = string("key_cache_internal_tensor_assign_6_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor key_cache_internal_tensor_assign_6_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_6_begin_mask_0"), val = tensor([false, false, false, true])]; + tensor key_cache_internal_tensor_assign_6_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_6_end_mask_0"), val = tensor([false, true, false, true])]; + tensor key_cache_internal_tensor_assign_6_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_6_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor key_cache_internal_tensor_assign_6_cast_fp16 = slice_update(begin = concat_43, begin_mask = key_cache_internal_tensor_assign_6_begin_mask_0, end = concat_44, end_mask = key_cache_internal_tensor_assign_6_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_6_squeeze_mask_0, stride = key_cache_internal_tensor_assign_6_stride_0, update = key_states_cast_fp16, x = coreml_update_state_20)[name = string("key_cache_internal_tensor_assign_6_cast_fp16")]; + write_state(data = key_cache_internal_tensor_assign_6_cast_fp16, input = key_cache)[name = string("coreml_update_state_22_write_state")]; + tensor coreml_update_state_22 = read_state(input = key_cache)[name = string("coreml_update_state_22")]; + tensor value_cache_internal_tensor_assign_6_stride_0 = const()[name = string("value_cache_internal_tensor_assign_6_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor value_cache_internal_tensor_assign_6_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_6_begin_mask_0"), val = tensor([false, false, false, true])]; + tensor value_cache_internal_tensor_assign_6_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_6_end_mask_0"), val = tensor([false, true, false, true])]; + tensor value_cache_internal_tensor_assign_6_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_6_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor value_states_cast_fp16 = transpose(perm = value_states_perm_0, x = var_1576_cast_fp16)[name = string("transpose_1")]; + tensor value_cache_internal_tensor_assign_6_cast_fp16 = slice_update(begin = concat_43, begin_mask = value_cache_internal_tensor_assign_6_begin_mask_0, end = concat_44, end_mask = value_cache_internal_tensor_assign_6_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_6_squeeze_mask_0, stride = value_cache_internal_tensor_assign_6_stride_0, update = value_states_cast_fp16, x = coreml_update_state_21)[name = string("value_cache_internal_tensor_assign_6_cast_fp16")]; + write_state(data = value_cache_internal_tensor_assign_6_cast_fp16, input = value_cache)[name = string("coreml_update_state_23_write_state")]; + tensor coreml_update_state_23 = read_state(input = value_cache)[name = string("coreml_update_state_23")]; + tensor var_1640_begin_0 = const()[name = string("op_1640_begin_0"), val = tensor([5, 0, 0, 0])]; + tensor var_1640_end_0 = const()[name = string("op_1640_end_0"), val = tensor([1, 2, 2048, 64])]; + tensor var_1640_end_mask_0 = const()[name = string("op_1640_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_1640_cast_fp16 = slice_by_index(begin = var_1640_begin_0, end = var_1640_end_0, end_mask = var_1640_end_mask_0, x = coreml_update_state_22)[name = string("op_1640_cast_fp16")]; + tensor tile_10 = const()[name = string("tile_10"), val = tensor([1, 1])]; + int32 var_1643_axis_0 = const()[name = string("op_1643_axis_0"), val = int32(1)]; + tensor var_1643_cast_fp16_0, tensor var_1643_cast_fp16_1 = split(axis = var_1643_axis_0, split_sizes = tile_10, x = var_1640_cast_fp16)[name = string("op_1643_cast_fp16")]; + tensor var_1650_begin_0 = const()[name = string("op_1650_begin_0"), val = tensor([5, 0, 0, 0])]; + tensor var_1650_end_0 = const()[name = string("op_1650_end_0"), val = tensor([1, 2, 2048, 64])]; + tensor var_1650_end_mask_0 = const()[name = string("op_1650_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_1650_cast_fp16 = slice_by_index(begin = var_1650_begin_0, end = var_1650_end_0, end_mask = var_1650_end_mask_0, x = coreml_update_state_23)[name = string("op_1650_cast_fp16")]; + tensor tile_11 = const()[name = string("tile_11"), val = tensor([1, 1])]; + int32 var_1653_axis_0 = const()[name = string("op_1653_axis_0"), val = int32(1)]; + tensor var_1653_cast_fp16_0, tensor var_1653_cast_fp16_1 = split(axis = var_1653_axis_0, split_sizes = tile_11, x = var_1650_cast_fp16)[name = string("op_1653_cast_fp16")]; + tensor var_1656_split_sizes_0 = const()[name = string("op_1656_split_sizes_0"), val = tensor([8, 8])]; + int32 var_1656_axis_0 = const()[name = string("op_1656_axis_0"), val = int32(1)]; + tensor var_1656_cast_fp16_0, tensor var_1656_cast_fp16_1 = split(axis = var_1656_axis_0, split_sizes = var_1656_split_sizes_0, x = query_states_cast_fp16)[name = string("op_1656_cast_fp16")]; + bool attn_weights_81_transpose_x_0 = const()[name = string("attn_weights_81_transpose_x_0"), val = bool(false)]; + bool attn_weights_81_transpose_y_0 = const()[name = string("attn_weights_81_transpose_y_0"), val = bool(false)]; + tensor attn_weights_81_cast_fp16 = matmul(transpose_x = attn_weights_81_transpose_x_0, transpose_y = attn_weights_81_transpose_y_0, x = var_1643_cast_fp16_0, y = var_1656_cast_fp16_0)[name = string("attn_weights_81_cast_fp16")]; + fp16 _inversed_attn_weights_83_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_83_y_0_to_fp16"), val = fp16(0x1p-3)]; + tensor _inversed_attn_weights_83_cast_fp16 = mul(x = attn_weights_81_cast_fp16, y = _inversed_attn_weights_83_y_0_to_fp16)[name = string("_inversed_attn_weights_83_cast_fp16")]; + tensor attn_weights_85_cast_fp16 = add(x = _inversed_attn_weights_83_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_85_cast_fp16")]; + int32 var_1663 = const()[name = string("op_1663"), val = int32(2)]; + tensor attn_weights_87_cast_fp16 = softmax(axis = var_1663, x = attn_weights_85_cast_fp16)[name = string("attn_weights_87_cast_fp16")]; + bool var_1669_transpose_x_1 = const()[name = string("op_1669_transpose_x_1"), val = bool(true)]; + bool var_1669_transpose_y_1 = const()[name = string("op_1669_transpose_y_1"), val = bool(false)]; + tensor var_1669_cast_fp16 = matmul(transpose_x = var_1669_transpose_x_1, transpose_y = var_1669_transpose_y_1, x = attn_weights_87_cast_fp16, y = var_1653_cast_fp16_0)[name = string("op_1669_cast_fp16")]; + bool attn_weights_89_transpose_x_0 = const()[name = string("attn_weights_89_transpose_x_0"), val = bool(false)]; + bool attn_weights_89_transpose_y_0 = const()[name = string("attn_weights_89_transpose_y_0"), val = bool(false)]; + tensor attn_weights_89_cast_fp16 = matmul(transpose_x = attn_weights_89_transpose_x_0, transpose_y = attn_weights_89_transpose_y_0, x = var_1643_cast_fp16_1, y = var_1656_cast_fp16_1)[name = string("attn_weights_89_cast_fp16")]; + fp16 _inversed_attn_weights_91_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_91_y_0_to_fp16"), val = fp16(0x1p-3)]; + tensor _inversed_attn_weights_91_cast_fp16 = mul(x = attn_weights_89_cast_fp16, y = _inversed_attn_weights_91_y_0_to_fp16)[name = string("_inversed_attn_weights_91_cast_fp16")]; + tensor attn_weights_93_cast_fp16 = add(x = _inversed_attn_weights_91_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_93_cast_fp16")]; + int32 var_1675 = const()[name = string("op_1675"), val = int32(2)]; + tensor attn_weights_cast_fp16 = softmax(axis = var_1675, x = attn_weights_93_cast_fp16)[name = string("attn_weights_cast_fp16")]; + bool attn_output_31_transpose_x_1 = const()[name = string("attn_output_31_transpose_x_1"), val = bool(true)]; + bool attn_output_31_transpose_y_1 = const()[name = string("attn_output_31_transpose_y_1"), val = bool(false)]; + tensor attn_output_31_cast_fp16 = matmul(transpose_x = attn_output_31_transpose_x_1, transpose_y = attn_output_31_transpose_y_1, x = attn_weights_cast_fp16, y = var_1653_cast_fp16_1)[name = string("attn_output_31_cast_fp16")]; + int32 var_1683 = const()[name = string("op_1683"), val = int32(1)]; + bool attn_output_33_interleave_0 = const()[name = string("attn_output_33_interleave_0"), val = bool(false)]; + tensor attn_output_33_cast_fp16 = concat(axis = var_1683, interleave = attn_output_33_interleave_0, values = (var_1669_cast_fp16, attn_output_31_cast_fp16))[name = string("attn_output_33_cast_fp16")]; + tensor var_1687_perm_0 = const()[name = string("op_1687_perm_0"), val = tensor([0, 1, 3, 2])]; + tensor var_1692 = const()[name = string("op_1692"), val = tensor([1, 1024, 1, 1])]; + tensor var_1687_cast_fp16 = transpose(perm = var_1687_perm_0, x = attn_output_33_cast_fp16)[name = string("transpose_0")]; + tensor x_95_cast_fp16 = reshape(shape = var_1692, x = var_1687_cast_fp16)[name = string("x_95_cast_fp16")]; + string hidden_states_33_pad_type_0 = const()[name = string("hidden_states_33_pad_type_0"), val = string("valid")]; + tensor hidden_states_33_strides_0 = const()[name = string("hidden_states_33_strides_0"), val = tensor([1, 1])]; + tensor hidden_states_33_pad_0 = const()[name = string("hidden_states_33_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor hidden_states_33_dilations_0 = const()[name = string("hidden_states_33_dilations_0"), val = tensor([1, 1])]; + int32 hidden_states_33_groups_0 = const()[name = string("hidden_states_33_groups_0"), val = int32(1)]; + tensor var_1699_to_fp16 = const()[name = string("op_1699_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(160492928)))]; + tensor hidden_states_33_cast_fp16 = conv(dilations = hidden_states_33_dilations_0, groups = hidden_states_33_groups_0, pad = hidden_states_33_pad_0, pad_type = hidden_states_33_pad_type_0, strides = hidden_states_33_strides_0, weight = var_1699_to_fp16, x = x_95_cast_fp16)[name = string("hidden_states_33_cast_fp16")]; + tensor x_97_cast_fp16 = add(x = x_89_cast_fp16, y = hidden_states_33_cast_fp16)[name = string("x_97_cast_fp16")]; + int32 var_1711 = const()[name = string("op_1711"), val = int32(1)]; + fp16 const_63_promoted_to_fp16 = const()[name = string("const_63_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_1714_cast_fp16 = mul(x = x_97_cast_fp16, y = const_63_promoted_to_fp16)[name = string("op_1714_cast_fp16")]; + bool x_99_interleave_0 = const()[name = string("x_99_interleave_0"), val = bool(false)]; + tensor x_99_cast_fp16 = concat(axis = var_1711, interleave = x_99_interleave_0, values = (x_97_cast_fp16, var_1714_cast_fp16))[name = string("x_99_cast_fp16")]; + tensor out_67_axes_0 = const()[name = string("out_67_axes_0"), val = tensor([1])]; + fp16 var_1724_to_fp16 = const()[name = string("op_1724_to_fp16"), val = fp16(0x1.5p-17)]; + tensor out_67_cast_fp16 = layer_norm(axes = out_67_axes_0, epsilon = var_1724_to_fp16, x = x_99_cast_fp16)[name = string("out_67_cast_fp16")]; + tensor layer_layers_5_post_attention_layernorm_weight_to_fp16 = const()[name = string("layer_layers_5_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(162590144)))]; + tensor out_69_cast_fp16 = mul(x = out_67_cast_fp16, y = layer_layers_5_post_attention_layernorm_weight_to_fp16)[name = string("out_69_cast_fp16")]; + tensor var_1730_split_sizes_0 = const()[name = string("op_1730_split_sizes_0"), val = tensor([1024, 1024])]; + int32 var_1730_axis_0 = const()[name = string("op_1730_axis_0"), val = int32(1)]; + tensor var_1730_cast_fp16_0, tensor var_1730_cast_fp16_1 = split(axis = var_1730_axis_0, split_sizes = var_1730_split_sizes_0, x = out_69_cast_fp16)[name = string("op_1730_cast_fp16")]; + string input_pad_type_0 = const()[name = string("input_pad_type_0"), val = string("valid")]; + tensor input_strides_0 = const()[name = string("input_strides_0"), val = tensor([1, 1])]; + tensor input_pad_0 = const()[name = string("input_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor input_dilations_0 = const()[name = string("input_dilations_0"), val = tensor([1, 1])]; + int32 input_groups_0 = const()[name = string("input_groups_0"), val = int32(1)]; + tensor var_1735_to_fp16 = const()[name = string("op_1735_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(162594304)))]; + tensor input_cast_fp16 = conv(dilations = input_dilations_0, groups = input_groups_0, pad = input_pad_0, pad_type = input_pad_type_0, strides = input_strides_0, weight = var_1735_to_fp16, x = var_1730_cast_fp16_0)[name = string("input_cast_fp16")]; + tensor var_1746_cast_fp16 = silu(x = input_cast_fp16)[name = string("op_1746_cast_fp16")]; + string var_1751_pad_type_0 = const()[name = string("op_1751_pad_type_0"), val = string("valid")]; + tensor var_1751_strides_0 = const()[name = string("op_1751_strides_0"), val = tensor([1, 1])]; + tensor var_1751_pad_0 = const()[name = string("op_1751_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1751_dilations_0 = const()[name = string("op_1751_dilations_0"), val = tensor([1, 1])]; + int32 var_1751_groups_0 = const()[name = string("op_1751_groups_0"), val = int32(1)]; + tensor var_1734_to_fp16 = const()[name = string("op_1734_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(170982976)))]; + tensor var_1751_cast_fp16 = conv(dilations = var_1751_dilations_0, groups = var_1751_groups_0, pad = var_1751_pad_0, pad_type = var_1751_pad_type_0, strides = var_1751_strides_0, weight = var_1734_to_fp16, x = var_1730_cast_fp16_0)[name = string("op_1751_cast_fp16")]; + tensor x_105_cast_fp16 = mul(x = var_1746_cast_fp16, y = var_1751_cast_fp16)[name = string("x_105_cast_fp16")]; + string hidden_states_pad_type_0 = const()[name = string("hidden_states_pad_type_0"), val = string("valid")]; + tensor hidden_states_strides_0 = const()[name = string("hidden_states_strides_0"), val = tensor([1, 1])]; + tensor hidden_states_pad_0 = const()[name = string("hidden_states_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor hidden_states_dilations_0 = const()[name = string("hidden_states_dilations_0"), val = tensor([1, 1])]; + int32 hidden_states_groups_0 = const()[name = string("hidden_states_groups_0"), val = int32(1)]; + tensor var_1733_to_fp16 = const()[name = string("op_1733_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(179371648)))]; + tensor hidden_states_cast_fp16 = conv(dilations = hidden_states_dilations_0, groups = hidden_states_groups_0, pad = hidden_states_pad_0, pad_type = hidden_states_pad_type_0, strides = hidden_states_strides_0, weight = var_1733_to_fp16, x = x_105_cast_fp16)[name = string("hidden_states_cast_fp16")]; + tensor x_107_cast_fp16 = add(x = x_97_cast_fp16, y = hidden_states_cast_fp16)[name = string("x_107_cast_fp16")]; + int32 var_1764 = const()[name = string("op_1764"), val = int32(1)]; + fp16 const_64_promoted_to_fp16 = const()[name = string("const_64_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_1767_cast_fp16 = mul(x = x_107_cast_fp16, y = const_64_promoted_to_fp16)[name = string("op_1767_cast_fp16")]; + bool x_109_interleave_0 = const()[name = string("x_109_interleave_0"), val = bool(false)]; + tensor x_109_cast_fp16 = concat(axis = var_1764, interleave = x_109_interleave_0, values = (x_107_cast_fp16, var_1767_cast_fp16))[name = string("x_109_cast_fp16")]; + tensor out_73_axes_0 = const()[name = string("out_73_axes_0"), val = tensor([1])]; + fp16 var_1777_to_fp16 = const()[name = string("op_1777_to_fp16"), val = fp16(0x1.5p-17)]; + tensor out_73_cast_fp16 = layer_norm(axes = out_73_axes_0, epsilon = var_1777_to_fp16, x = x_109_cast_fp16)[name = string("out_73_cast_fp16")]; + tensor layer_norm_weight_to_fp16 = const()[name = string("layer_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(187760320)))]; + tensor out_75_cast_fp16 = mul(x = out_73_cast_fp16, y = layer_norm_weight_to_fp16)[name = string("out_75_cast_fp16")]; + tensor var_1783_split_sizes_0 = const()[name = string("op_1783_split_sizes_0"), val = tensor([1024, 1024])]; + int32 var_1783_axis_0 = const()[name = string("op_1783_axis_0"), val = int32(1)]; + tensor output, tensor var_1783_cast_fp16_1 = split(axis = var_1783_axis_0, split_sizes = var_1783_split_sizes_0, x = out_75_cast_fp16)[name = string("op_1783_cast_fp16")]; + } -> (output); + func length_128(tensor inputs_embeds, state> key_cache, tensor position_id, state> value_cache) { + tensor var_80 = const()[name = string("op_80"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(187764480)))]; + tensor position_ids_1 = add(x = var_80, y = position_id)[name = string("position_ids_1")]; + int32 cos_batch_dims_0 = const()[name = string("cos_batch_dims_0"), val = int32(0)]; + bool cos_validate_indices_0 = const()[name = string("cos_validate_indices_0"), val = bool(false)]; + tensor layer_rope_emb_cos_cached_to_fp16 = const()[name = string("layer_rope_emb_cos_cached_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64)))]; + int32 greater_equal_0_y_0 = const()[name = string("greater_equal_0_y_0"), val = int32(0)]; + tensor greater_equal_0 = greater_equal(x = position_ids_1, y = greater_equal_0_y_0)[name = string("greater_equal_0")]; + int32 slice_by_index_0 = const()[name = string("slice_by_index_0"), val = int32(32768)]; + tensor add_0 = add(x = position_ids_1, y = slice_by_index_0)[name = string("add_0")]; + tensor select_0 = select(a = position_ids_1, b = add_0, cond = greater_equal_0)[name = string("select_0")]; + int32 cos_cast_fp16_axis_0 = const()[name = string("cos_cast_fp16_axis_0"), val = int32(0)]; + tensor cos_cast_fp16 = gather(axis = cos_cast_fp16_axis_0, batch_dims = cos_batch_dims_0, indices = select_0, validate_indices = cos_validate_indices_0, x = layer_rope_emb_cos_cached_to_fp16)[name = string("cos_cast_fp16")]; + int32 sin_batch_dims_0 = const()[name = string("sin_batch_dims_0"), val = int32(0)]; + bool sin_validate_indices_0 = const()[name = string("sin_validate_indices_0"), val = bool(false)]; + tensor layer_rope_emb_sin_cached_to_fp16 = const()[name = string("layer_rope_emb_sin_cached_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(4194432)))]; + int32 sin_cast_fp16_axis_0 = const()[name = string("sin_cast_fp16_axis_0"), val = int32(0)]; + tensor sin_cast_fp16 = gather(axis = sin_cast_fp16_axis_0, batch_dims = sin_batch_dims_0, indices = select_0, validate_indices = sin_validate_indices_0, x = layer_rope_emb_sin_cached_to_fp16)[name = string("sin_cast_fp16")]; + tensor var_105 = const()[name = string("op_105"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(8388800)))]; + tensor var_112_axes_0 = const()[name = string("op_112_axes_0"), val = tensor([2])]; + tensor var_112 = expand_dims(axes = var_112_axes_0, x = position_ids_1)[name = string("op_112")]; + tensor var_113 = greater(x = var_105, y = var_112)[name = string("op_113")]; + tensor attention_mask_3_axes_0 = const()[name = string("attention_mask_3_axes_0"), val = tensor([1])]; + string attention_mask_1_to_fp16_dtype_0 = const()[name = string("attention_mask_1_to_fp16_dtype_0"), val = string("fp16")]; + tensor var_113_to_fp16 = cast(dtype = attention_mask_1_to_fp16_dtype_0, x = var_113)[name = string("cast_65")]; + tensor attention_mask_3_cast_fp16 = expand_dims(axes = attention_mask_3_axes_0, x = var_113_to_fp16)[name = string("attention_mask_3_cast_fp16")]; + fp16 var_121_promoted_to_fp16 = const()[name = string("op_121_promoted_to_fp16"), val = fp16(0x0p+0)]; + tensor var_122_cast_fp16 = equal(x = attention_mask_3_cast_fp16, y = var_121_promoted_to_fp16)[name = string("op_122_cast_fp16")]; + tensor var_123_after_broadcast_to_fp16 = const()[name = string("op_123_after_broadcast_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(187765056)))]; + tensor attention_mask_cast_fp16 = select(a = attention_mask_3_cast_fp16, b = var_123_after_broadcast_to_fp16, cond = var_122_cast_fp16)[name = string("attention_mask_cast_fp16")]; + tensor cos_1_perm_0 = const()[name = string("cos_1_perm_0"), val = tensor([0, -1, -2])]; + tensor sin_1_perm_0 = const()[name = string("sin_1_perm_0"), val = tensor([0, -1, -2])]; + tensor attn_mask_perm_0 = const()[name = string("attn_mask_perm_0"), val = tensor([0, 1, -1, -2])]; + int32 var_138 = const()[name = string("op_138"), val = int32(1)]; + fp16 const_4_promoted_to_fp16 = const()[name = string("const_4_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_141_cast_fp16 = mul(x = inputs_embeds, y = const_4_promoted_to_fp16)[name = string("op_141_cast_fp16")]; + bool x_1_interleave_0 = const()[name = string("x_1_interleave_0"), val = bool(false)]; + tensor x_1_cast_fp16 = concat(axis = var_138, interleave = x_1_interleave_0, values = (inputs_embeds, var_141_cast_fp16))[name = string("x_1_cast_fp16")]; + tensor out_1_axes_0 = const()[name = string("out_1_axes_0"), val = tensor([1])]; + fp16 var_151_to_fp16 = const()[name = string("op_151_to_fp16"), val = fp16(0x1.5p-17)]; + tensor out_1_cast_fp16 = layer_norm(axes = out_1_axes_0, epsilon = var_151_to_fp16, x = x_1_cast_fp16)[name = string("out_1_cast_fp16")]; + tensor layer_layers_0_input_layernorm_weight_to_fp16 = const()[name = string("layer_layers_0_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(8401216)))]; + tensor out_3_cast_fp16 = mul(x = out_1_cast_fp16, y = layer_layers_0_input_layernorm_weight_to_fp16)[name = string("out_3_cast_fp16")]; + tensor var_157_split_sizes_0 = const()[name = string("op_157_split_sizes_0"), val = tensor([1024, 1024])]; + int32 var_157_axis_0 = const()[name = string("op_157_axis_0"), val = int32(1)]; + tensor var_157_cast_fp16_0, tensor var_157_cast_fp16_1 = split(axis = var_157_axis_0, split_sizes = var_157_split_sizes_0, x = out_3_cast_fp16)[name = string("op_157_cast_fp16")]; + tensor q_len_1 = const()[name = string("q_len_1"), val = tensor([128])]; + string query_states_1_pad_type_0 = const()[name = string("query_states_1_pad_type_0"), val = string("valid")]; + tensor query_states_1_strides_0 = const()[name = string("query_states_1_strides_0"), val = tensor([1, 1])]; + tensor query_states_1_pad_0 = const()[name = string("query_states_1_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor query_states_1_dilations_0 = const()[name = string("query_states_1_dilations_0"), val = tensor([1, 1])]; + int32 query_states_1_groups_0 = const()[name = string("query_states_1_groups_0"), val = int32(1)]; + tensor var_179_to_fp16 = const()[name = string("op_179_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(8405376)))]; + tensor query_states_1_cast_fp16 = conv(dilations = query_states_1_dilations_0, groups = query_states_1_groups_0, pad = query_states_1_pad_0, pad_type = query_states_1_pad_type_0, strides = query_states_1_strides_0, weight = var_179_to_fp16, x = var_157_cast_fp16_0)[name = string("query_states_1_cast_fp16")]; + string key_states_1_pad_type_0 = const()[name = string("key_states_1_pad_type_0"), val = string("valid")]; + tensor key_states_1_strides_0 = const()[name = string("key_states_1_strides_0"), val = tensor([1, 1])]; + tensor key_states_1_pad_0 = const()[name = string("key_states_1_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor key_states_1_dilations_0 = const()[name = string("key_states_1_dilations_0"), val = tensor([1, 1])]; + int32 key_states_1_groups_0 = const()[name = string("key_states_1_groups_0"), val = int32(1)]; + tensor var_190_to_fp16 = const()[name = string("op_190_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(10502592)))]; + tensor key_states_1_cast_fp16 = conv(dilations = key_states_1_dilations_0, groups = key_states_1_groups_0, pad = key_states_1_pad_0, pad_type = key_states_1_pad_type_0, strides = key_states_1_strides_0, weight = var_190_to_fp16, x = var_157_cast_fp16_0)[name = string("key_states_1_cast_fp16")]; + string value_states_1_pad_type_0 = const()[name = string("value_states_1_pad_type_0"), val = string("valid")]; + tensor value_states_1_strides_0 = const()[name = string("value_states_1_strides_0"), val = tensor([1, 1])]; + tensor value_states_1_pad_0 = const()[name = string("value_states_1_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor value_states_1_dilations_0 = const()[name = string("value_states_1_dilations_0"), val = tensor([1, 1])]; + int32 value_states_1_groups_0 = const()[name = string("value_states_1_groups_0"), val = int32(1)]; + tensor var_201_to_fp16 = const()[name = string("op_201_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(10764800)))]; + tensor value_states_1_cast_fp16 = conv(dilations = value_states_1_dilations_0, groups = value_states_1_groups_0, pad = value_states_1_pad_0, pad_type = value_states_1_pad_type_0, strides = value_states_1_strides_0, weight = var_201_to_fp16, x = var_157_cast_fp16_0)[name = string("value_states_1_cast_fp16")]; + tensor var_209 = const()[name = string("op_209"), val = tensor([1, 16, 64, 128])]; + tensor embed_1_cast_fp16 = reshape(shape = var_209, x = query_states_1_cast_fp16)[name = string("embed_1_cast_fp16")]; + tensor var_213 = const()[name = string("op_213"), val = tensor([1, 2, 64, 128])]; + tensor var_214_cast_fp16 = reshape(shape = var_213, x = key_states_1_cast_fp16)[name = string("op_214_cast_fp16")]; + tensor embed_3_perm_0 = const()[name = string("embed_3_perm_0"), val = tensor([0, 1, 3, 2])]; + tensor var_220 = const()[name = string("op_220"), val = tensor([1, 2, 64, 128])]; + tensor var_221_cast_fp16 = reshape(shape = var_220, x = value_states_1_cast_fp16)[name = string("op_221_cast_fp16")]; + tensor value_states_3_perm_0 = const()[name = string("value_states_3_perm_0"), val = tensor([0, 1, 3, 2])]; + tensor cos_1_cast_fp16 = transpose(perm = cos_1_perm_0, x = cos_cast_fp16)[name = string("transpose_20")]; + tensor var_225_cast_fp16 = mul(x = embed_1_cast_fp16, y = cos_1_cast_fp16)[name = string("op_225_cast_fp16")]; + tensor var_226_split_sizes_0 = const()[name = string("op_226_split_sizes_0"), val = tensor([32, 32])]; + int32 var_226_axis_0 = const()[name = string("op_226_axis_0"), val = int32(-2)]; + tensor var_226_cast_fp16_0, tensor var_226_cast_fp16_1 = split(axis = var_226_axis_0, split_sizes = var_226_split_sizes_0, x = embed_1_cast_fp16)[name = string("op_226_cast_fp16")]; + fp16 const_7_promoted_to_fp16 = const()[name = string("const_7_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_228_cast_fp16 = mul(x = var_226_cast_fp16_1, y = const_7_promoted_to_fp16)[name = string("op_228_cast_fp16")]; + int32 var_230 = const()[name = string("op_230"), val = int32(-2)]; + bool var_231_interleave_0 = const()[name = string("op_231_interleave_0"), val = bool(false)]; + tensor var_231_cast_fp16 = concat(axis = var_230, interleave = var_231_interleave_0, values = (var_228_cast_fp16, var_226_cast_fp16_0))[name = string("op_231_cast_fp16")]; + tensor sin_1_cast_fp16 = transpose(perm = sin_1_perm_0, x = sin_cast_fp16)[name = string("transpose_19")]; + tensor var_232_cast_fp16 = mul(x = var_231_cast_fp16, y = sin_1_cast_fp16)[name = string("op_232_cast_fp16")]; + tensor query_states_3_cast_fp16 = add(x = var_225_cast_fp16, y = var_232_cast_fp16)[name = string("query_states_3_cast_fp16")]; + tensor embed_3_cast_fp16 = transpose(perm = embed_3_perm_0, x = var_214_cast_fp16)[name = string("transpose_17")]; + tensor var_235_cast_fp16 = mul(x = embed_3_cast_fp16, y = cos_cast_fp16)[name = string("op_235_cast_fp16")]; + tensor var_236_split_sizes_0 = const()[name = string("op_236_split_sizes_0"), val = tensor([32, 32])]; + int32 var_236_axis_0 = const()[name = string("op_236_axis_0"), val = int32(-1)]; + tensor var_236_cast_fp16_0, tensor var_236_cast_fp16_1 = split(axis = var_236_axis_0, split_sizes = var_236_split_sizes_0, x = embed_3_cast_fp16)[name = string("op_236_cast_fp16")]; + fp16 const_8_promoted_to_fp16 = const()[name = string("const_8_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_238_cast_fp16 = mul(x = var_236_cast_fp16_1, y = const_8_promoted_to_fp16)[name = string("op_238_cast_fp16")]; + int32 var_240 = const()[name = string("op_240"), val = int32(-1)]; + bool var_241_interleave_0 = const()[name = string("op_241_interleave_0"), val = bool(false)]; + tensor var_241_cast_fp16 = concat(axis = var_240, interleave = var_241_interleave_0, values = (var_238_cast_fp16, var_236_cast_fp16_0))[name = string("op_241_cast_fp16")]; + tensor var_242_cast_fp16 = mul(x = var_241_cast_fp16, y = sin_cast_fp16)[name = string("op_242_cast_fp16")]; + tensor key_states_3_cast_fp16 = add(x = var_235_cast_fp16, y = var_242_cast_fp16)[name = string("key_states_3_cast_fp16")]; + tensor var_246 = add(x = position_id, y = q_len_1)[name = string("op_246")]; + tensor read_state_0 = read_state(input = key_cache)[name = string("read_state_0")]; + tensor expand_dims_1 = const()[name = string("expand_dims_1"), val = tensor([0])]; + tensor expand_dims_2 = const()[name = string("expand_dims_2"), val = tensor([0])]; + tensor expand_dims_4 = const()[name = string("expand_dims_4"), val = tensor([1])]; + tensor concat_3_values3_0 = const()[name = string("concat_3_values3_0"), val = tensor([0])]; + int32 concat_3_axis_0 = const()[name = string("concat_3_axis_0"), val = int32(0)]; + bool concat_3_interleave_0 = const()[name = string("concat_3_interleave_0"), val = bool(false)]; + tensor concat_3 = concat(axis = concat_3_axis_0, interleave = concat_3_interleave_0, values = (expand_dims_1, expand_dims_2, position_id, concat_3_values3_0))[name = string("concat_3")]; + tensor concat_4_values1_0 = const()[name = string("concat_4_values1_0"), val = tensor([0])]; + tensor concat_4_values3_0 = const()[name = string("concat_4_values3_0"), val = tensor([0])]; + int32 concat_4_axis_0 = const()[name = string("concat_4_axis_0"), val = int32(0)]; + bool concat_4_interleave_0 = const()[name = string("concat_4_interleave_0"), val = bool(false)]; + tensor concat_4 = concat(axis = concat_4_axis_0, interleave = concat_4_interleave_0, values = (expand_dims_4, concat_4_values1_0, var_246, concat_4_values3_0))[name = string("concat_4")]; + tensor key_cache_internal_tensor_assign_1_stride_0 = const()[name = string("key_cache_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor key_cache_internal_tensor_assign_1_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false, true])]; + tensor key_cache_internal_tensor_assign_1_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_1_end_mask_0"), val = tensor([false, true, false, true])]; + tensor key_cache_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor key_cache_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_3, begin_mask = key_cache_internal_tensor_assign_1_begin_mask_0, end = concat_4, end_mask = key_cache_internal_tensor_assign_1_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_1_squeeze_mask_0, stride = key_cache_internal_tensor_assign_1_stride_0, update = key_states_3_cast_fp16, x = read_state_0)[name = string("key_cache_internal_tensor_assign_1_cast_fp16")]; + write_state(data = key_cache_internal_tensor_assign_1_cast_fp16, input = key_cache)[name = string("coreml_update_state_0_write_state")]; + tensor coreml_update_state_12 = read_state(input = key_cache)[name = string("coreml_update_state_0")]; + tensor read_state_1 = read_state(input = value_cache)[name = string("read_state_1")]; + tensor value_cache_internal_tensor_assign_1_stride_0 = const()[name = string("value_cache_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor value_cache_internal_tensor_assign_1_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false, true])]; + tensor value_cache_internal_tensor_assign_1_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_1_end_mask_0"), val = tensor([false, true, false, true])]; + tensor value_cache_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor value_states_3_cast_fp16 = transpose(perm = value_states_3_perm_0, x = var_221_cast_fp16)[name = string("transpose_16")]; + tensor value_cache_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_3, begin_mask = value_cache_internal_tensor_assign_1_begin_mask_0, end = concat_4, end_mask = value_cache_internal_tensor_assign_1_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_1_squeeze_mask_0, stride = value_cache_internal_tensor_assign_1_stride_0, update = value_states_3_cast_fp16, x = read_state_1)[name = string("value_cache_internal_tensor_assign_1_cast_fp16")]; + write_state(data = value_cache_internal_tensor_assign_1_cast_fp16, input = value_cache)[name = string("coreml_update_state_1_write_state")]; + tensor coreml_update_state_13 = read_state(input = value_cache)[name = string("coreml_update_state_1")]; + tensor var_285_begin_0 = const()[name = string("op_285_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_285_end_0 = const()[name = string("op_285_end_0"), val = tensor([1, 2, 2048, 64])]; + tensor var_285_end_mask_0 = const()[name = string("op_285_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_285_cast_fp16 = slice_by_index(begin = var_285_begin_0, end = var_285_end_0, end_mask = var_285_end_mask_0, x = coreml_update_state_12)[name = string("op_285_cast_fp16")]; + tensor tile_0 = const()[name = string("tile_0"), val = tensor([1, 1])]; + int32 var_288_axis_0 = const()[name = string("op_288_axis_0"), val = int32(1)]; + tensor var_288_cast_fp16_0, tensor var_288_cast_fp16_1 = split(axis = var_288_axis_0, split_sizes = tile_0, x = var_285_cast_fp16)[name = string("op_288_cast_fp16")]; + tensor var_295_begin_0 = const()[name = string("op_295_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_295_end_0 = const()[name = string("op_295_end_0"), val = tensor([1, 2, 2048, 64])]; + tensor var_295_end_mask_0 = const()[name = string("op_295_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_295_cast_fp16 = slice_by_index(begin = var_295_begin_0, end = var_295_end_0, end_mask = var_295_end_mask_0, x = coreml_update_state_13)[name = string("op_295_cast_fp16")]; + tensor tile_1 = const()[name = string("tile_1"), val = tensor([1, 1])]; + int32 var_298_axis_0 = const()[name = string("op_298_axis_0"), val = int32(1)]; + tensor var_298_cast_fp16_0, tensor var_298_cast_fp16_1 = split(axis = var_298_axis_0, split_sizes = tile_1, x = var_295_cast_fp16)[name = string("op_298_cast_fp16")]; + tensor var_301_split_sizes_0 = const()[name = string("op_301_split_sizes_0"), val = tensor([8, 8])]; + int32 var_301_axis_0 = const()[name = string("op_301_axis_0"), val = int32(1)]; + tensor var_301_cast_fp16_0, tensor var_301_cast_fp16_1 = split(axis = var_301_axis_0, split_sizes = var_301_split_sizes_0, x = query_states_3_cast_fp16)[name = string("op_301_cast_fp16")]; + bool attn_weights_1_transpose_x_0 = const()[name = string("attn_weights_1_transpose_x_0"), val = bool(false)]; + bool attn_weights_1_transpose_y_0 = const()[name = string("attn_weights_1_transpose_y_0"), val = bool(false)]; + tensor attn_weights_1_cast_fp16 = matmul(transpose_x = attn_weights_1_transpose_x_0, transpose_y = attn_weights_1_transpose_y_0, x = var_288_cast_fp16_0, y = var_301_cast_fp16_0)[name = string("attn_weights_1_cast_fp16")]; + fp16 _inversed_attn_weights_3_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_3_y_0_to_fp16"), val = fp16(0x1p-3)]; + tensor _inversed_attn_weights_3_cast_fp16 = mul(x = attn_weights_1_cast_fp16, y = _inversed_attn_weights_3_y_0_to_fp16)[name = string("_inversed_attn_weights_3_cast_fp16")]; + tensor attn_mask_cast_fp16 = transpose(perm = attn_mask_perm_0, x = attention_mask_cast_fp16)[name = string("transpose_18")]; + tensor attn_weights_5_cast_fp16 = add(x = _inversed_attn_weights_3_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_5_cast_fp16")]; + int32 var_308 = const()[name = string("op_308"), val = int32(2)]; + tensor attn_weights_7_cast_fp16 = softmax(axis = var_308, x = attn_weights_5_cast_fp16)[name = string("attn_weights_7_cast_fp16")]; + bool var_314_transpose_x_1 = const()[name = string("op_314_transpose_x_1"), val = bool(true)]; + bool var_314_transpose_y_1 = const()[name = string("op_314_transpose_y_1"), val = bool(false)]; + tensor var_314_cast_fp16 = matmul(transpose_x = var_314_transpose_x_1, transpose_y = var_314_transpose_y_1, x = attn_weights_7_cast_fp16, y = var_298_cast_fp16_0)[name = string("op_314_cast_fp16")]; + bool attn_weights_9_transpose_x_0 = const()[name = string("attn_weights_9_transpose_x_0"), val = bool(false)]; + bool attn_weights_9_transpose_y_0 = const()[name = string("attn_weights_9_transpose_y_0"), val = bool(false)]; + tensor attn_weights_9_cast_fp16 = matmul(transpose_x = attn_weights_9_transpose_x_0, transpose_y = attn_weights_9_transpose_y_0, x = var_288_cast_fp16_1, y = var_301_cast_fp16_1)[name = string("attn_weights_9_cast_fp16")]; + fp16 _inversed_attn_weights_11_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_11_y_0_to_fp16"), val = fp16(0x1p-3)]; + tensor _inversed_attn_weights_11_cast_fp16 = mul(x = attn_weights_9_cast_fp16, y = _inversed_attn_weights_11_y_0_to_fp16)[name = string("_inversed_attn_weights_11_cast_fp16")]; + tensor attn_weights_13_cast_fp16 = add(x = _inversed_attn_weights_11_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_13_cast_fp16")]; + int32 var_320 = const()[name = string("op_320"), val = int32(2)]; + tensor attn_weights_15_cast_fp16 = softmax(axis = var_320, x = attn_weights_13_cast_fp16)[name = string("attn_weights_15_cast_fp16")]; + bool attn_output_1_transpose_x_1 = const()[name = string("attn_output_1_transpose_x_1"), val = bool(true)]; + bool attn_output_1_transpose_y_1 = const()[name = string("attn_output_1_transpose_y_1"), val = bool(false)]; + tensor attn_output_1_cast_fp16 = matmul(transpose_x = attn_output_1_transpose_x_1, transpose_y = attn_output_1_transpose_y_1, x = attn_weights_15_cast_fp16, y = var_298_cast_fp16_1)[name = string("attn_output_1_cast_fp16")]; + int32 var_328 = const()[name = string("op_328"), val = int32(1)]; + bool attn_output_3_interleave_0 = const()[name = string("attn_output_3_interleave_0"), val = bool(false)]; + tensor attn_output_3_cast_fp16 = concat(axis = var_328, interleave = attn_output_3_interleave_0, values = (var_314_cast_fp16, attn_output_1_cast_fp16))[name = string("attn_output_3_cast_fp16")]; + tensor var_332_perm_0 = const()[name = string("op_332_perm_0"), val = tensor([0, 1, 3, 2])]; + tensor var_337 = const()[name = string("op_337"), val = tensor([1, 1024, 1, 128])]; + tensor var_332_cast_fp16 = transpose(perm = var_332_perm_0, x = attn_output_3_cast_fp16)[name = string("transpose_15")]; + tensor x_5_cast_fp16 = reshape(shape = var_337, x = var_332_cast_fp16)[name = string("x_5_cast_fp16")]; + string hidden_states_3_pad_type_0 = const()[name = string("hidden_states_3_pad_type_0"), val = string("valid")]; + tensor hidden_states_3_strides_0 = const()[name = string("hidden_states_3_strides_0"), val = tensor([1, 1])]; + tensor hidden_states_3_pad_0 = const()[name = string("hidden_states_3_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor hidden_states_3_dilations_0 = const()[name = string("hidden_states_3_dilations_0"), val = tensor([1, 1])]; + int32 hidden_states_3_groups_0 = const()[name = string("hidden_states_3_groups_0"), val = int32(1)]; + tensor var_344_to_fp16 = const()[name = string("op_344_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(11027008)))]; + tensor hidden_states_3_cast_fp16 = conv(dilations = hidden_states_3_dilations_0, groups = hidden_states_3_groups_0, pad = hidden_states_3_pad_0, pad_type = hidden_states_3_pad_type_0, strides = hidden_states_3_strides_0, weight = var_344_to_fp16, x = x_5_cast_fp16)[name = string("hidden_states_3_cast_fp16")]; + tensor x_7_cast_fp16 = add(x = inputs_embeds, y = hidden_states_3_cast_fp16)[name = string("x_7_cast_fp16")]; + int32 var_356 = const()[name = string("op_356"), val = int32(1)]; + fp16 const_13_promoted_to_fp16 = const()[name = string("const_13_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_359_cast_fp16 = mul(x = x_7_cast_fp16, y = const_13_promoted_to_fp16)[name = string("op_359_cast_fp16")]; + bool x_9_interleave_0 = const()[name = string("x_9_interleave_0"), val = bool(false)]; + tensor x_9_cast_fp16 = concat(axis = var_356, interleave = x_9_interleave_0, values = (x_7_cast_fp16, var_359_cast_fp16))[name = string("x_9_cast_fp16")]; + tensor out_7_axes_0 = const()[name = string("out_7_axes_0"), val = tensor([1])]; + fp16 var_369_to_fp16 = const()[name = string("op_369_to_fp16"), val = fp16(0x1.5p-17)]; + tensor out_7_cast_fp16 = layer_norm(axes = out_7_axes_0, epsilon = var_369_to_fp16, x = x_9_cast_fp16)[name = string("out_7_cast_fp16")]; + tensor layer_layers_0_post_attention_layernorm_weight_to_fp16 = const()[name = string("layer_layers_0_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(13124224)))]; + tensor out_9_cast_fp16 = mul(x = out_7_cast_fp16, y = layer_layers_0_post_attention_layernorm_weight_to_fp16)[name = string("out_9_cast_fp16")]; + tensor var_375_split_sizes_0 = const()[name = string("op_375_split_sizes_0"), val = tensor([1024, 1024])]; + int32 var_375_axis_0 = const()[name = string("op_375_axis_0"), val = int32(1)]; + tensor var_375_cast_fp16_0, tensor var_375_cast_fp16_1 = split(axis = var_375_axis_0, split_sizes = var_375_split_sizes_0, x = out_9_cast_fp16)[name = string("op_375_cast_fp16")]; + string input_1_pad_type_0 = const()[name = string("input_1_pad_type_0"), val = string("valid")]; + tensor input_1_strides_0 = const()[name = string("input_1_strides_0"), val = tensor([1, 1])]; + tensor input_1_pad_0 = const()[name = string("input_1_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor input_1_dilations_0 = const()[name = string("input_1_dilations_0"), val = tensor([1, 1])]; + int32 input_1_groups_0 = const()[name = string("input_1_groups_0"), val = int32(1)]; + tensor var_380_to_fp16 = const()[name = string("op_380_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(13128384)))]; + tensor input_1_cast_fp16 = conv(dilations = input_1_dilations_0, groups = input_1_groups_0, pad = input_1_pad_0, pad_type = input_1_pad_type_0, strides = input_1_strides_0, weight = var_380_to_fp16, x = var_375_cast_fp16_0)[name = string("input_1_cast_fp16")]; + tensor var_391_cast_fp16 = silu(x = input_1_cast_fp16)[name = string("op_391_cast_fp16")]; + string var_396_pad_type_0 = const()[name = string("op_396_pad_type_0"), val = string("valid")]; + tensor var_396_strides_0 = const()[name = string("op_396_strides_0"), val = tensor([1, 1])]; + tensor var_396_pad_0 = const()[name = string("op_396_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_396_dilations_0 = const()[name = string("op_396_dilations_0"), val = tensor([1, 1])]; + int32 var_396_groups_0 = const()[name = string("op_396_groups_0"), val = int32(1)]; + tensor var_379_to_fp16 = const()[name = string("op_379_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(21517056)))]; + tensor var_396_cast_fp16 = conv(dilations = var_396_dilations_0, groups = var_396_groups_0, pad = var_396_pad_0, pad_type = var_396_pad_type_0, strides = var_396_strides_0, weight = var_379_to_fp16, x = var_375_cast_fp16_0)[name = string("op_396_cast_fp16")]; + tensor x_15_cast_fp16 = mul(x = var_391_cast_fp16, y = var_396_cast_fp16)[name = string("x_15_cast_fp16")]; + string hidden_states_5_pad_type_0 = const()[name = string("hidden_states_5_pad_type_0"), val = string("valid")]; + tensor hidden_states_5_strides_0 = const()[name = string("hidden_states_5_strides_0"), val = tensor([1, 1])]; + tensor hidden_states_5_pad_0 = const()[name = string("hidden_states_5_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor hidden_states_5_dilations_0 = const()[name = string("hidden_states_5_dilations_0"), val = tensor([1, 1])]; + int32 hidden_states_5_groups_0 = const()[name = string("hidden_states_5_groups_0"), val = int32(1)]; + tensor var_378_to_fp16 = const()[name = string("op_378_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(29905728)))]; + tensor hidden_states_5_cast_fp16 = conv(dilations = hidden_states_5_dilations_0, groups = hidden_states_5_groups_0, pad = hidden_states_5_pad_0, pad_type = hidden_states_5_pad_type_0, strides = hidden_states_5_strides_0, weight = var_378_to_fp16, x = x_15_cast_fp16)[name = string("hidden_states_5_cast_fp16")]; + tensor x_17_cast_fp16 = add(x = x_7_cast_fp16, y = hidden_states_5_cast_fp16)[name = string("x_17_cast_fp16")]; + int32 var_409 = const()[name = string("op_409"), val = int32(1)]; + fp16 const_14_promoted_to_fp16 = const()[name = string("const_14_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_412_cast_fp16 = mul(x = x_17_cast_fp16, y = const_14_promoted_to_fp16)[name = string("op_412_cast_fp16")]; + bool x_19_interleave_0 = const()[name = string("x_19_interleave_0"), val = bool(false)]; + tensor x_19_cast_fp16 = concat(axis = var_409, interleave = x_19_interleave_0, values = (x_17_cast_fp16, var_412_cast_fp16))[name = string("x_19_cast_fp16")]; + tensor out_13_axes_0 = const()[name = string("out_13_axes_0"), val = tensor([1])]; + fp16 var_422_to_fp16 = const()[name = string("op_422_to_fp16"), val = fp16(0x1.5p-17)]; + tensor out_13_cast_fp16 = layer_norm(axes = out_13_axes_0, epsilon = var_422_to_fp16, x = x_19_cast_fp16)[name = string("out_13_cast_fp16")]; + tensor layer_layers_1_input_layernorm_weight_to_fp16 = const()[name = string("layer_layers_1_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(38294400)))]; + tensor out_15_cast_fp16 = mul(x = out_13_cast_fp16, y = layer_layers_1_input_layernorm_weight_to_fp16)[name = string("out_15_cast_fp16")]; + tensor var_428_split_sizes_0 = const()[name = string("op_428_split_sizes_0"), val = tensor([1024, 1024])]; + int32 var_428_axis_0 = const()[name = string("op_428_axis_0"), val = int32(1)]; + tensor var_428_cast_fp16_0, tensor var_428_cast_fp16_1 = split(axis = var_428_axis_0, split_sizes = var_428_split_sizes_0, x = out_15_cast_fp16)[name = string("op_428_cast_fp16")]; + string query_states_5_pad_type_0 = const()[name = string("query_states_5_pad_type_0"), val = string("valid")]; + tensor query_states_5_strides_0 = const()[name = string("query_states_5_strides_0"), val = tensor([1, 1])]; + tensor query_states_5_pad_0 = const()[name = string("query_states_5_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor query_states_5_dilations_0 = const()[name = string("query_states_5_dilations_0"), val = tensor([1, 1])]; + int32 query_states_5_groups_0 = const()[name = string("query_states_5_groups_0"), val = int32(1)]; + tensor var_450_to_fp16 = const()[name = string("op_450_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(38298560)))]; + tensor query_states_5_cast_fp16 = conv(dilations = query_states_5_dilations_0, groups = query_states_5_groups_0, pad = query_states_5_pad_0, pad_type = query_states_5_pad_type_0, strides = query_states_5_strides_0, weight = var_450_to_fp16, x = var_428_cast_fp16_0)[name = string("query_states_5_cast_fp16")]; + string key_states_5_pad_type_0 = const()[name = string("key_states_5_pad_type_0"), val = string("valid")]; + tensor key_states_5_strides_0 = const()[name = string("key_states_5_strides_0"), val = tensor([1, 1])]; + tensor key_states_5_pad_0 = const()[name = string("key_states_5_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor key_states_5_dilations_0 = const()[name = string("key_states_5_dilations_0"), val = tensor([1, 1])]; + int32 key_states_5_groups_0 = const()[name = string("key_states_5_groups_0"), val = int32(1)]; + tensor var_461_to_fp16 = const()[name = string("op_461_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(40395776)))]; + tensor key_states_5_cast_fp16 = conv(dilations = key_states_5_dilations_0, groups = key_states_5_groups_0, pad = key_states_5_pad_0, pad_type = key_states_5_pad_type_0, strides = key_states_5_strides_0, weight = var_461_to_fp16, x = var_428_cast_fp16_0)[name = string("key_states_5_cast_fp16")]; + string value_states_5_pad_type_0 = const()[name = string("value_states_5_pad_type_0"), val = string("valid")]; + tensor value_states_5_strides_0 = const()[name = string("value_states_5_strides_0"), val = tensor([1, 1])]; + tensor value_states_5_pad_0 = const()[name = string("value_states_5_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor value_states_5_dilations_0 = const()[name = string("value_states_5_dilations_0"), val = tensor([1, 1])]; + int32 value_states_5_groups_0 = const()[name = string("value_states_5_groups_0"), val = int32(1)]; + tensor var_472_to_fp16 = const()[name = string("op_472_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(40657984)))]; + tensor value_states_5_cast_fp16 = conv(dilations = value_states_5_dilations_0, groups = value_states_5_groups_0, pad = value_states_5_pad_0, pad_type = value_states_5_pad_type_0, strides = value_states_5_strides_0, weight = var_472_to_fp16, x = var_428_cast_fp16_0)[name = string("value_states_5_cast_fp16")]; + tensor var_480 = const()[name = string("op_480"), val = tensor([1, 16, 64, 128])]; + tensor embed_5_cast_fp16 = reshape(shape = var_480, x = query_states_5_cast_fp16)[name = string("embed_5_cast_fp16")]; + tensor var_484 = const()[name = string("op_484"), val = tensor([1, 2, 64, 128])]; + tensor var_485_cast_fp16 = reshape(shape = var_484, x = key_states_5_cast_fp16)[name = string("op_485_cast_fp16")]; + tensor embed_7_perm_0 = const()[name = string("embed_7_perm_0"), val = tensor([0, 1, 3, 2])]; + tensor var_491 = const()[name = string("op_491"), val = tensor([1, 2, 64, 128])]; + tensor var_492_cast_fp16 = reshape(shape = var_491, x = value_states_5_cast_fp16)[name = string("op_492_cast_fp16")]; + tensor value_states_7_perm_0 = const()[name = string("value_states_7_perm_0"), val = tensor([0, 1, 3, 2])]; + tensor var_496_cast_fp16 = mul(x = embed_5_cast_fp16, y = cos_1_cast_fp16)[name = string("op_496_cast_fp16")]; + tensor var_497_split_sizes_0 = const()[name = string("op_497_split_sizes_0"), val = tensor([32, 32])]; + int32 var_497_axis_0 = const()[name = string("op_497_axis_0"), val = int32(-2)]; + tensor var_497_cast_fp16_0, tensor var_497_cast_fp16_1 = split(axis = var_497_axis_0, split_sizes = var_497_split_sizes_0, x = embed_5_cast_fp16)[name = string("op_497_cast_fp16")]; + fp16 const_17_promoted_to_fp16 = const()[name = string("const_17_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_499_cast_fp16 = mul(x = var_497_cast_fp16_1, y = const_17_promoted_to_fp16)[name = string("op_499_cast_fp16")]; + int32 var_501 = const()[name = string("op_501"), val = int32(-2)]; + bool var_502_interleave_0 = const()[name = string("op_502_interleave_0"), val = bool(false)]; + tensor var_502_cast_fp16 = concat(axis = var_501, interleave = var_502_interleave_0, values = (var_499_cast_fp16, var_497_cast_fp16_0))[name = string("op_502_cast_fp16")]; + tensor var_503_cast_fp16 = mul(x = var_502_cast_fp16, y = sin_1_cast_fp16)[name = string("op_503_cast_fp16")]; + tensor query_states_7_cast_fp16 = add(x = var_496_cast_fp16, y = var_503_cast_fp16)[name = string("query_states_7_cast_fp16")]; + tensor embed_7_cast_fp16 = transpose(perm = embed_7_perm_0, x = var_485_cast_fp16)[name = string("transpose_14")]; + tensor var_506_cast_fp16 = mul(x = embed_7_cast_fp16, y = cos_cast_fp16)[name = string("op_506_cast_fp16")]; + tensor var_507_split_sizes_0 = const()[name = string("op_507_split_sizes_0"), val = tensor([32, 32])]; + int32 var_507_axis_0 = const()[name = string("op_507_axis_0"), val = int32(-1)]; + tensor var_507_cast_fp16_0, tensor var_507_cast_fp16_1 = split(axis = var_507_axis_0, split_sizes = var_507_split_sizes_0, x = embed_7_cast_fp16)[name = string("op_507_cast_fp16")]; + fp16 const_18_promoted_to_fp16 = const()[name = string("const_18_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_509_cast_fp16 = mul(x = var_507_cast_fp16_1, y = const_18_promoted_to_fp16)[name = string("op_509_cast_fp16")]; + int32 var_511 = const()[name = string("op_511"), val = int32(-1)]; + bool var_512_interleave_0 = const()[name = string("op_512_interleave_0"), val = bool(false)]; + tensor var_512_cast_fp16 = concat(axis = var_511, interleave = var_512_interleave_0, values = (var_509_cast_fp16, var_507_cast_fp16_0))[name = string("op_512_cast_fp16")]; + tensor var_513_cast_fp16 = mul(x = var_512_cast_fp16, y = sin_cast_fp16)[name = string("op_513_cast_fp16")]; + tensor key_states_7_cast_fp16 = add(x = var_506_cast_fp16, y = var_513_cast_fp16)[name = string("key_states_7_cast_fp16")]; + tensor expand_dims_11 = const()[name = string("expand_dims_11"), val = tensor([1])]; + tensor expand_dims_12 = const()[name = string("expand_dims_12"), val = tensor([0])]; + tensor expand_dims_14 = const()[name = string("expand_dims_14"), val = tensor([2])]; + tensor concat_11_values3_0 = const()[name = string("concat_11_values3_0"), val = tensor([0])]; + int32 concat_11_axis_0 = const()[name = string("concat_11_axis_0"), val = int32(0)]; + bool concat_11_interleave_0 = const()[name = string("concat_11_interleave_0"), val = bool(false)]; + tensor concat_11 = concat(axis = concat_11_axis_0, interleave = concat_11_interleave_0, values = (expand_dims_11, expand_dims_12, position_id, concat_11_values3_0))[name = string("concat_11")]; + tensor concat_12_values1_0 = const()[name = string("concat_12_values1_0"), val = tensor([0])]; + tensor concat_12_values3_0 = const()[name = string("concat_12_values3_0"), val = tensor([0])]; + int32 concat_12_axis_0 = const()[name = string("concat_12_axis_0"), val = int32(0)]; + bool concat_12_interleave_0 = const()[name = string("concat_12_interleave_0"), val = bool(false)]; + tensor concat_12 = concat(axis = concat_12_axis_0, interleave = concat_12_interleave_0, values = (expand_dims_14, concat_12_values1_0, var_246, concat_12_values3_0))[name = string("concat_12")]; + tensor key_cache_internal_tensor_assign_2_stride_0 = const()[name = string("key_cache_internal_tensor_assign_2_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor key_cache_internal_tensor_assign_2_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_2_begin_mask_0"), val = tensor([false, false, false, true])]; + tensor key_cache_internal_tensor_assign_2_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_2_end_mask_0"), val = tensor([false, true, false, true])]; + tensor key_cache_internal_tensor_assign_2_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_2_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor key_cache_internal_tensor_assign_2_cast_fp16 = slice_update(begin = concat_11, begin_mask = key_cache_internal_tensor_assign_2_begin_mask_0, end = concat_12, end_mask = key_cache_internal_tensor_assign_2_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_2_squeeze_mask_0, stride = key_cache_internal_tensor_assign_2_stride_0, update = key_states_7_cast_fp16, x = coreml_update_state_12)[name = string("key_cache_internal_tensor_assign_2_cast_fp16")]; + write_state(data = key_cache_internal_tensor_assign_2_cast_fp16, input = key_cache)[name = string("coreml_update_state_2_write_state")]; + tensor coreml_update_state_14 = read_state(input = key_cache)[name = string("coreml_update_state_2")]; + tensor value_cache_internal_tensor_assign_2_stride_0 = const()[name = string("value_cache_internal_tensor_assign_2_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor value_cache_internal_tensor_assign_2_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_2_begin_mask_0"), val = tensor([false, false, false, true])]; + tensor value_cache_internal_tensor_assign_2_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_2_end_mask_0"), val = tensor([false, true, false, true])]; + tensor value_cache_internal_tensor_assign_2_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_2_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor value_states_7_cast_fp16 = transpose(perm = value_states_7_perm_0, x = var_492_cast_fp16)[name = string("transpose_13")]; + tensor value_cache_internal_tensor_assign_2_cast_fp16 = slice_update(begin = concat_11, begin_mask = value_cache_internal_tensor_assign_2_begin_mask_0, end = concat_12, end_mask = value_cache_internal_tensor_assign_2_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_2_squeeze_mask_0, stride = value_cache_internal_tensor_assign_2_stride_0, update = value_states_7_cast_fp16, x = coreml_update_state_13)[name = string("value_cache_internal_tensor_assign_2_cast_fp16")]; + write_state(data = value_cache_internal_tensor_assign_2_cast_fp16, input = value_cache)[name = string("coreml_update_state_3_write_state")]; + tensor coreml_update_state_15 = read_state(input = value_cache)[name = string("coreml_update_state_3")]; + tensor var_556_begin_0 = const()[name = string("op_556_begin_0"), val = tensor([1, 0, 0, 0])]; + tensor var_556_end_0 = const()[name = string("op_556_end_0"), val = tensor([2, 2, 2048, 64])]; + tensor var_556_end_mask_0 = const()[name = string("op_556_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_556_cast_fp16 = slice_by_index(begin = var_556_begin_0, end = var_556_end_0, end_mask = var_556_end_mask_0, x = coreml_update_state_14)[name = string("op_556_cast_fp16")]; + tensor tile_2 = const()[name = string("tile_2"), val = tensor([1, 1])]; + int32 var_559_axis_0 = const()[name = string("op_559_axis_0"), val = int32(1)]; + tensor var_559_cast_fp16_0, tensor var_559_cast_fp16_1 = split(axis = var_559_axis_0, split_sizes = tile_2, x = var_556_cast_fp16)[name = string("op_559_cast_fp16")]; + tensor var_566_begin_0 = const()[name = string("op_566_begin_0"), val = tensor([1, 0, 0, 0])]; + tensor var_566_end_0 = const()[name = string("op_566_end_0"), val = tensor([2, 2, 2048, 64])]; + tensor var_566_end_mask_0 = const()[name = string("op_566_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_566_cast_fp16 = slice_by_index(begin = var_566_begin_0, end = var_566_end_0, end_mask = var_566_end_mask_0, x = coreml_update_state_15)[name = string("op_566_cast_fp16")]; + tensor tile_3 = const()[name = string("tile_3"), val = tensor([1, 1])]; + int32 var_569_axis_0 = const()[name = string("op_569_axis_0"), val = int32(1)]; + tensor var_569_cast_fp16_0, tensor var_569_cast_fp16_1 = split(axis = var_569_axis_0, split_sizes = tile_3, x = var_566_cast_fp16)[name = string("op_569_cast_fp16")]; + tensor var_572_split_sizes_0 = const()[name = string("op_572_split_sizes_0"), val = tensor([8, 8])]; + int32 var_572_axis_0 = const()[name = string("op_572_axis_0"), val = int32(1)]; + tensor var_572_cast_fp16_0, tensor var_572_cast_fp16_1 = split(axis = var_572_axis_0, split_sizes = var_572_split_sizes_0, x = query_states_7_cast_fp16)[name = string("op_572_cast_fp16")]; + bool attn_weights_17_transpose_x_0 = const()[name = string("attn_weights_17_transpose_x_0"), val = bool(false)]; + bool attn_weights_17_transpose_y_0 = const()[name = string("attn_weights_17_transpose_y_0"), val = bool(false)]; + tensor attn_weights_17_cast_fp16 = matmul(transpose_x = attn_weights_17_transpose_x_0, transpose_y = attn_weights_17_transpose_y_0, x = var_559_cast_fp16_0, y = var_572_cast_fp16_0)[name = string("attn_weights_17_cast_fp16")]; + fp16 _inversed_attn_weights_19_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_19_y_0_to_fp16"), val = fp16(0x1p-3)]; + tensor _inversed_attn_weights_19_cast_fp16 = mul(x = attn_weights_17_cast_fp16, y = _inversed_attn_weights_19_y_0_to_fp16)[name = string("_inversed_attn_weights_19_cast_fp16")]; + tensor attn_weights_21_cast_fp16 = add(x = _inversed_attn_weights_19_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_21_cast_fp16")]; + int32 var_579 = const()[name = string("op_579"), val = int32(2)]; + tensor attn_weights_23_cast_fp16 = softmax(axis = var_579, x = attn_weights_21_cast_fp16)[name = string("attn_weights_23_cast_fp16")]; + bool var_585_transpose_x_1 = const()[name = string("op_585_transpose_x_1"), val = bool(true)]; + bool var_585_transpose_y_1 = const()[name = string("op_585_transpose_y_1"), val = bool(false)]; + tensor var_585_cast_fp16 = matmul(transpose_x = var_585_transpose_x_1, transpose_y = var_585_transpose_y_1, x = attn_weights_23_cast_fp16, y = var_569_cast_fp16_0)[name = string("op_585_cast_fp16")]; + bool attn_weights_25_transpose_x_0 = const()[name = string("attn_weights_25_transpose_x_0"), val = bool(false)]; + bool attn_weights_25_transpose_y_0 = const()[name = string("attn_weights_25_transpose_y_0"), val = bool(false)]; + tensor attn_weights_25_cast_fp16 = matmul(transpose_x = attn_weights_25_transpose_x_0, transpose_y = attn_weights_25_transpose_y_0, x = var_559_cast_fp16_1, y = var_572_cast_fp16_1)[name = string("attn_weights_25_cast_fp16")]; + fp16 _inversed_attn_weights_27_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_27_y_0_to_fp16"), val = fp16(0x1p-3)]; + tensor _inversed_attn_weights_27_cast_fp16 = mul(x = attn_weights_25_cast_fp16, y = _inversed_attn_weights_27_y_0_to_fp16)[name = string("_inversed_attn_weights_27_cast_fp16")]; + tensor attn_weights_29_cast_fp16 = add(x = _inversed_attn_weights_27_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_29_cast_fp16")]; + int32 var_591 = const()[name = string("op_591"), val = int32(2)]; + tensor attn_weights_31_cast_fp16 = softmax(axis = var_591, x = attn_weights_29_cast_fp16)[name = string("attn_weights_31_cast_fp16")]; + bool attn_output_7_transpose_x_1 = const()[name = string("attn_output_7_transpose_x_1"), val = bool(true)]; + bool attn_output_7_transpose_y_1 = const()[name = string("attn_output_7_transpose_y_1"), val = bool(false)]; + tensor attn_output_7_cast_fp16 = matmul(transpose_x = attn_output_7_transpose_x_1, transpose_y = attn_output_7_transpose_y_1, x = attn_weights_31_cast_fp16, y = var_569_cast_fp16_1)[name = string("attn_output_7_cast_fp16")]; + int32 var_599 = const()[name = string("op_599"), val = int32(1)]; + bool attn_output_9_interleave_0 = const()[name = string("attn_output_9_interleave_0"), val = bool(false)]; + tensor attn_output_9_cast_fp16 = concat(axis = var_599, interleave = attn_output_9_interleave_0, values = (var_585_cast_fp16, attn_output_7_cast_fp16))[name = string("attn_output_9_cast_fp16")]; + tensor var_603_perm_0 = const()[name = string("op_603_perm_0"), val = tensor([0, 1, 3, 2])]; + tensor var_608 = const()[name = string("op_608"), val = tensor([1, 1024, 1, 128])]; + tensor var_603_cast_fp16 = transpose(perm = var_603_perm_0, x = attn_output_9_cast_fp16)[name = string("transpose_12")]; + tensor x_23_cast_fp16 = reshape(shape = var_608, x = var_603_cast_fp16)[name = string("x_23_cast_fp16")]; + string hidden_states_9_pad_type_0 = const()[name = string("hidden_states_9_pad_type_0"), val = string("valid")]; + tensor hidden_states_9_strides_0 = const()[name = string("hidden_states_9_strides_0"), val = tensor([1, 1])]; + tensor hidden_states_9_pad_0 = const()[name = string("hidden_states_9_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor hidden_states_9_dilations_0 = const()[name = string("hidden_states_9_dilations_0"), val = tensor([1, 1])]; + int32 hidden_states_9_groups_0 = const()[name = string("hidden_states_9_groups_0"), val = int32(1)]; + tensor var_615_to_fp16 = const()[name = string("op_615_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(40920192)))]; + tensor hidden_states_9_cast_fp16 = conv(dilations = hidden_states_9_dilations_0, groups = hidden_states_9_groups_0, pad = hidden_states_9_pad_0, pad_type = hidden_states_9_pad_type_0, strides = hidden_states_9_strides_0, weight = var_615_to_fp16, x = x_23_cast_fp16)[name = string("hidden_states_9_cast_fp16")]; + tensor x_25_cast_fp16 = add(x = x_17_cast_fp16, y = hidden_states_9_cast_fp16)[name = string("x_25_cast_fp16")]; + int32 var_627 = const()[name = string("op_627"), val = int32(1)]; + fp16 const_23_promoted_to_fp16 = const()[name = string("const_23_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_630_cast_fp16 = mul(x = x_25_cast_fp16, y = const_23_promoted_to_fp16)[name = string("op_630_cast_fp16")]; + bool x_27_interleave_0 = const()[name = string("x_27_interleave_0"), val = bool(false)]; + tensor x_27_cast_fp16 = concat(axis = var_627, interleave = x_27_interleave_0, values = (x_25_cast_fp16, var_630_cast_fp16))[name = string("x_27_cast_fp16")]; + tensor out_19_axes_0 = const()[name = string("out_19_axes_0"), val = tensor([1])]; + fp16 var_640_to_fp16 = const()[name = string("op_640_to_fp16"), val = fp16(0x1.5p-17)]; + tensor out_19_cast_fp16 = layer_norm(axes = out_19_axes_0, epsilon = var_640_to_fp16, x = x_27_cast_fp16)[name = string("out_19_cast_fp16")]; + tensor layer_layers_1_post_attention_layernorm_weight_to_fp16 = const()[name = string("layer_layers_1_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(43017408)))]; + tensor out_21_cast_fp16 = mul(x = out_19_cast_fp16, y = layer_layers_1_post_attention_layernorm_weight_to_fp16)[name = string("out_21_cast_fp16")]; + tensor var_646_split_sizes_0 = const()[name = string("op_646_split_sizes_0"), val = tensor([1024, 1024])]; + int32 var_646_axis_0 = const()[name = string("op_646_axis_0"), val = int32(1)]; + tensor var_646_cast_fp16_0, tensor var_646_cast_fp16_1 = split(axis = var_646_axis_0, split_sizes = var_646_split_sizes_0, x = out_21_cast_fp16)[name = string("op_646_cast_fp16")]; + string input_3_pad_type_0 = const()[name = string("input_3_pad_type_0"), val = string("valid")]; + tensor input_3_strides_0 = const()[name = string("input_3_strides_0"), val = tensor([1, 1])]; + tensor input_3_pad_0 = const()[name = string("input_3_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor input_3_dilations_0 = const()[name = string("input_3_dilations_0"), val = tensor([1, 1])]; + int32 input_3_groups_0 = const()[name = string("input_3_groups_0"), val = int32(1)]; + tensor var_651_to_fp16 = const()[name = string("op_651_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(43021568)))]; + tensor input_3_cast_fp16 = conv(dilations = input_3_dilations_0, groups = input_3_groups_0, pad = input_3_pad_0, pad_type = input_3_pad_type_0, strides = input_3_strides_0, weight = var_651_to_fp16, x = var_646_cast_fp16_0)[name = string("input_3_cast_fp16")]; + tensor var_662_cast_fp16 = silu(x = input_3_cast_fp16)[name = string("op_662_cast_fp16")]; + string var_667_pad_type_0 = const()[name = string("op_667_pad_type_0"), val = string("valid")]; + tensor var_667_strides_0 = const()[name = string("op_667_strides_0"), val = tensor([1, 1])]; + tensor var_667_pad_0 = const()[name = string("op_667_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_667_dilations_0 = const()[name = string("op_667_dilations_0"), val = tensor([1, 1])]; + int32 var_667_groups_0 = const()[name = string("op_667_groups_0"), val = int32(1)]; + tensor var_650_to_fp16 = const()[name = string("op_650_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(51410240)))]; + tensor var_667_cast_fp16 = conv(dilations = var_667_dilations_0, groups = var_667_groups_0, pad = var_667_pad_0, pad_type = var_667_pad_type_0, strides = var_667_strides_0, weight = var_650_to_fp16, x = var_646_cast_fp16_0)[name = string("op_667_cast_fp16")]; + tensor x_33_cast_fp16 = mul(x = var_662_cast_fp16, y = var_667_cast_fp16)[name = string("x_33_cast_fp16")]; + string hidden_states_11_pad_type_0 = const()[name = string("hidden_states_11_pad_type_0"), val = string("valid")]; + tensor hidden_states_11_strides_0 = const()[name = string("hidden_states_11_strides_0"), val = tensor([1, 1])]; + tensor hidden_states_11_pad_0 = const()[name = string("hidden_states_11_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor hidden_states_11_dilations_0 = const()[name = string("hidden_states_11_dilations_0"), val = tensor([1, 1])]; + int32 hidden_states_11_groups_0 = const()[name = string("hidden_states_11_groups_0"), val = int32(1)]; + tensor var_649_to_fp16 = const()[name = string("op_649_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(59798912)))]; + tensor hidden_states_11_cast_fp16 = conv(dilations = hidden_states_11_dilations_0, groups = hidden_states_11_groups_0, pad = hidden_states_11_pad_0, pad_type = hidden_states_11_pad_type_0, strides = hidden_states_11_strides_0, weight = var_649_to_fp16, x = x_33_cast_fp16)[name = string("hidden_states_11_cast_fp16")]; + tensor x_35_cast_fp16 = add(x = x_25_cast_fp16, y = hidden_states_11_cast_fp16)[name = string("x_35_cast_fp16")]; + int32 var_680 = const()[name = string("op_680"), val = int32(1)]; + fp16 const_24_promoted_to_fp16 = const()[name = string("const_24_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_683_cast_fp16 = mul(x = x_35_cast_fp16, y = const_24_promoted_to_fp16)[name = string("op_683_cast_fp16")]; + bool x_37_interleave_0 = const()[name = string("x_37_interleave_0"), val = bool(false)]; + tensor x_37_cast_fp16 = concat(axis = var_680, interleave = x_37_interleave_0, values = (x_35_cast_fp16, var_683_cast_fp16))[name = string("x_37_cast_fp16")]; + tensor out_25_axes_0 = const()[name = string("out_25_axes_0"), val = tensor([1])]; + fp16 var_693_to_fp16 = const()[name = string("op_693_to_fp16"), val = fp16(0x1.5p-17)]; + tensor out_25_cast_fp16 = layer_norm(axes = out_25_axes_0, epsilon = var_693_to_fp16, x = x_37_cast_fp16)[name = string("out_25_cast_fp16")]; + tensor layer_layers_2_input_layernorm_weight_to_fp16 = const()[name = string("layer_layers_2_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(68187584)))]; + tensor out_27_cast_fp16 = mul(x = out_25_cast_fp16, y = layer_layers_2_input_layernorm_weight_to_fp16)[name = string("out_27_cast_fp16")]; + tensor var_699_split_sizes_0 = const()[name = string("op_699_split_sizes_0"), val = tensor([1024, 1024])]; + int32 var_699_axis_0 = const()[name = string("op_699_axis_0"), val = int32(1)]; + tensor var_699_cast_fp16_0, tensor var_699_cast_fp16_1 = split(axis = var_699_axis_0, split_sizes = var_699_split_sizes_0, x = out_27_cast_fp16)[name = string("op_699_cast_fp16")]; + string query_states_9_pad_type_0 = const()[name = string("query_states_9_pad_type_0"), val = string("valid")]; + tensor query_states_9_strides_0 = const()[name = string("query_states_9_strides_0"), val = tensor([1, 1])]; + tensor query_states_9_pad_0 = const()[name = string("query_states_9_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor query_states_9_dilations_0 = const()[name = string("query_states_9_dilations_0"), val = tensor([1, 1])]; + int32 query_states_9_groups_0 = const()[name = string("query_states_9_groups_0"), val = int32(1)]; + tensor var_721_to_fp16 = const()[name = string("op_721_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(68191744)))]; + tensor query_states_9_cast_fp16 = conv(dilations = query_states_9_dilations_0, groups = query_states_9_groups_0, pad = query_states_9_pad_0, pad_type = query_states_9_pad_type_0, strides = query_states_9_strides_0, weight = var_721_to_fp16, x = var_699_cast_fp16_0)[name = string("query_states_9_cast_fp16")]; + string key_states_9_pad_type_0 = const()[name = string("key_states_9_pad_type_0"), val = string("valid")]; + tensor key_states_9_strides_0 = const()[name = string("key_states_9_strides_0"), val = tensor([1, 1])]; + tensor key_states_9_pad_0 = const()[name = string("key_states_9_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor key_states_9_dilations_0 = const()[name = string("key_states_9_dilations_0"), val = tensor([1, 1])]; + int32 key_states_9_groups_0 = const()[name = string("key_states_9_groups_0"), val = int32(1)]; + tensor var_732_to_fp16 = const()[name = string("op_732_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(70288960)))]; + tensor key_states_9_cast_fp16 = conv(dilations = key_states_9_dilations_0, groups = key_states_9_groups_0, pad = key_states_9_pad_0, pad_type = key_states_9_pad_type_0, strides = key_states_9_strides_0, weight = var_732_to_fp16, x = var_699_cast_fp16_0)[name = string("key_states_9_cast_fp16")]; + string value_states_9_pad_type_0 = const()[name = string("value_states_9_pad_type_0"), val = string("valid")]; + tensor value_states_9_strides_0 = const()[name = string("value_states_9_strides_0"), val = tensor([1, 1])]; + tensor value_states_9_pad_0 = const()[name = string("value_states_9_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor value_states_9_dilations_0 = const()[name = string("value_states_9_dilations_0"), val = tensor([1, 1])]; + int32 value_states_9_groups_0 = const()[name = string("value_states_9_groups_0"), val = int32(1)]; + tensor var_743_to_fp16 = const()[name = string("op_743_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(70551168)))]; + tensor value_states_9_cast_fp16 = conv(dilations = value_states_9_dilations_0, groups = value_states_9_groups_0, pad = value_states_9_pad_0, pad_type = value_states_9_pad_type_0, strides = value_states_9_strides_0, weight = var_743_to_fp16, x = var_699_cast_fp16_0)[name = string("value_states_9_cast_fp16")]; + tensor var_751 = const()[name = string("op_751"), val = tensor([1, 16, 64, 128])]; + tensor embed_9_cast_fp16 = reshape(shape = var_751, x = query_states_9_cast_fp16)[name = string("embed_9_cast_fp16")]; + tensor var_755 = const()[name = string("op_755"), val = tensor([1, 2, 64, 128])]; + tensor var_756_cast_fp16 = reshape(shape = var_755, x = key_states_9_cast_fp16)[name = string("op_756_cast_fp16")]; + tensor embed_11_perm_0 = const()[name = string("embed_11_perm_0"), val = tensor([0, 1, 3, 2])]; + tensor var_762 = const()[name = string("op_762"), val = tensor([1, 2, 64, 128])]; + tensor var_763_cast_fp16 = reshape(shape = var_762, x = value_states_9_cast_fp16)[name = string("op_763_cast_fp16")]; + tensor value_states_11_perm_0 = const()[name = string("value_states_11_perm_0"), val = tensor([0, 1, 3, 2])]; + tensor var_767_cast_fp16 = mul(x = embed_9_cast_fp16, y = cos_1_cast_fp16)[name = string("op_767_cast_fp16")]; + tensor var_768_split_sizes_0 = const()[name = string("op_768_split_sizes_0"), val = tensor([32, 32])]; + int32 var_768_axis_0 = const()[name = string("op_768_axis_0"), val = int32(-2)]; + tensor var_768_cast_fp16_0, tensor var_768_cast_fp16_1 = split(axis = var_768_axis_0, split_sizes = var_768_split_sizes_0, x = embed_9_cast_fp16)[name = string("op_768_cast_fp16")]; + fp16 const_27_promoted_to_fp16 = const()[name = string("const_27_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_770_cast_fp16 = mul(x = var_768_cast_fp16_1, y = const_27_promoted_to_fp16)[name = string("op_770_cast_fp16")]; + int32 var_772 = const()[name = string("op_772"), val = int32(-2)]; + bool var_773_interleave_0 = const()[name = string("op_773_interleave_0"), val = bool(false)]; + tensor var_773_cast_fp16 = concat(axis = var_772, interleave = var_773_interleave_0, values = (var_770_cast_fp16, var_768_cast_fp16_0))[name = string("op_773_cast_fp16")]; + tensor var_774_cast_fp16 = mul(x = var_773_cast_fp16, y = sin_1_cast_fp16)[name = string("op_774_cast_fp16")]; + tensor query_states_11_cast_fp16 = add(x = var_767_cast_fp16, y = var_774_cast_fp16)[name = string("query_states_11_cast_fp16")]; + tensor embed_11_cast_fp16 = transpose(perm = embed_11_perm_0, x = var_756_cast_fp16)[name = string("transpose_11")]; + tensor var_777_cast_fp16 = mul(x = embed_11_cast_fp16, y = cos_cast_fp16)[name = string("op_777_cast_fp16")]; + tensor var_778_split_sizes_0 = const()[name = string("op_778_split_sizes_0"), val = tensor([32, 32])]; + int32 var_778_axis_0 = const()[name = string("op_778_axis_0"), val = int32(-1)]; + tensor var_778_cast_fp16_0, tensor var_778_cast_fp16_1 = split(axis = var_778_axis_0, split_sizes = var_778_split_sizes_0, x = embed_11_cast_fp16)[name = string("op_778_cast_fp16")]; + fp16 const_28_promoted_to_fp16 = const()[name = string("const_28_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_780_cast_fp16 = mul(x = var_778_cast_fp16_1, y = const_28_promoted_to_fp16)[name = string("op_780_cast_fp16")]; + int32 var_782 = const()[name = string("op_782"), val = int32(-1)]; + bool var_783_interleave_0 = const()[name = string("op_783_interleave_0"), val = bool(false)]; + tensor var_783_cast_fp16 = concat(axis = var_782, interleave = var_783_interleave_0, values = (var_780_cast_fp16, var_778_cast_fp16_0))[name = string("op_783_cast_fp16")]; + tensor var_784_cast_fp16 = mul(x = var_783_cast_fp16, y = sin_cast_fp16)[name = string("op_784_cast_fp16")]; + tensor key_states_11_cast_fp16 = add(x = var_777_cast_fp16, y = var_784_cast_fp16)[name = string("key_states_11_cast_fp16")]; + tensor expand_dims_21 = const()[name = string("expand_dims_21"), val = tensor([2])]; + tensor expand_dims_22 = const()[name = string("expand_dims_22"), val = tensor([0])]; + tensor expand_dims_24 = const()[name = string("expand_dims_24"), val = tensor([3])]; + tensor concat_19_values3_0 = const()[name = string("concat_19_values3_0"), val = tensor([0])]; + int32 concat_19_axis_0 = const()[name = string("concat_19_axis_0"), val = int32(0)]; + bool concat_19_interleave_0 = const()[name = string("concat_19_interleave_0"), val = bool(false)]; + tensor concat_19 = concat(axis = concat_19_axis_0, interleave = concat_19_interleave_0, values = (expand_dims_21, expand_dims_22, position_id, concat_19_values3_0))[name = string("concat_19")]; + tensor concat_20_values1_0 = const()[name = string("concat_20_values1_0"), val = tensor([0])]; + tensor concat_20_values3_0 = const()[name = string("concat_20_values3_0"), val = tensor([0])]; + int32 concat_20_axis_0 = const()[name = string("concat_20_axis_0"), val = int32(0)]; + bool concat_20_interleave_0 = const()[name = string("concat_20_interleave_0"), val = bool(false)]; + tensor concat_20 = concat(axis = concat_20_axis_0, interleave = concat_20_interleave_0, values = (expand_dims_24, concat_20_values1_0, var_246, concat_20_values3_0))[name = string("concat_20")]; + tensor key_cache_internal_tensor_assign_3_stride_0 = const()[name = string("key_cache_internal_tensor_assign_3_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor key_cache_internal_tensor_assign_3_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_3_begin_mask_0"), val = tensor([false, false, false, true])]; + tensor key_cache_internal_tensor_assign_3_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_3_end_mask_0"), val = tensor([false, true, false, true])]; + tensor key_cache_internal_tensor_assign_3_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_3_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor key_cache_internal_tensor_assign_3_cast_fp16 = slice_update(begin = concat_19, begin_mask = key_cache_internal_tensor_assign_3_begin_mask_0, end = concat_20, end_mask = key_cache_internal_tensor_assign_3_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_3_squeeze_mask_0, stride = key_cache_internal_tensor_assign_3_stride_0, update = key_states_11_cast_fp16, x = coreml_update_state_14)[name = string("key_cache_internal_tensor_assign_3_cast_fp16")]; + write_state(data = key_cache_internal_tensor_assign_3_cast_fp16, input = key_cache)[name = string("coreml_update_state_4_write_state")]; + tensor coreml_update_state_16 = read_state(input = key_cache)[name = string("coreml_update_state_4")]; + tensor value_cache_internal_tensor_assign_3_stride_0 = const()[name = string("value_cache_internal_tensor_assign_3_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor value_cache_internal_tensor_assign_3_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_3_begin_mask_0"), val = tensor([false, false, false, true])]; + tensor value_cache_internal_tensor_assign_3_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_3_end_mask_0"), val = tensor([false, true, false, true])]; + tensor value_cache_internal_tensor_assign_3_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_3_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor value_states_11_cast_fp16 = transpose(perm = value_states_11_perm_0, x = var_763_cast_fp16)[name = string("transpose_10")]; + tensor value_cache_internal_tensor_assign_3_cast_fp16 = slice_update(begin = concat_19, begin_mask = value_cache_internal_tensor_assign_3_begin_mask_0, end = concat_20, end_mask = value_cache_internal_tensor_assign_3_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_3_squeeze_mask_0, stride = value_cache_internal_tensor_assign_3_stride_0, update = value_states_11_cast_fp16, x = coreml_update_state_15)[name = string("value_cache_internal_tensor_assign_3_cast_fp16")]; + write_state(data = value_cache_internal_tensor_assign_3_cast_fp16, input = value_cache)[name = string("coreml_update_state_5_write_state")]; + tensor coreml_update_state_17 = read_state(input = value_cache)[name = string("coreml_update_state_5")]; + tensor var_827_begin_0 = const()[name = string("op_827_begin_0"), val = tensor([2, 0, 0, 0])]; + tensor var_827_end_0 = const()[name = string("op_827_end_0"), val = tensor([3, 2, 2048, 64])]; + tensor var_827_end_mask_0 = const()[name = string("op_827_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_827_cast_fp16 = slice_by_index(begin = var_827_begin_0, end = var_827_end_0, end_mask = var_827_end_mask_0, x = coreml_update_state_16)[name = string("op_827_cast_fp16")]; + tensor tile_4 = const()[name = string("tile_4"), val = tensor([1, 1])]; + int32 var_830_axis_0 = const()[name = string("op_830_axis_0"), val = int32(1)]; + tensor var_830_cast_fp16_0, tensor var_830_cast_fp16_1 = split(axis = var_830_axis_0, split_sizes = tile_4, x = var_827_cast_fp16)[name = string("op_830_cast_fp16")]; + tensor var_837_begin_0 = const()[name = string("op_837_begin_0"), val = tensor([2, 0, 0, 0])]; + tensor var_837_end_0 = const()[name = string("op_837_end_0"), val = tensor([3, 2, 2048, 64])]; + tensor var_837_end_mask_0 = const()[name = string("op_837_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_837_cast_fp16 = slice_by_index(begin = var_837_begin_0, end = var_837_end_0, end_mask = var_837_end_mask_0, x = coreml_update_state_17)[name = string("op_837_cast_fp16")]; + tensor tile_5 = const()[name = string("tile_5"), val = tensor([1, 1])]; + int32 var_840_axis_0 = const()[name = string("op_840_axis_0"), val = int32(1)]; + tensor var_840_cast_fp16_0, tensor var_840_cast_fp16_1 = split(axis = var_840_axis_0, split_sizes = tile_5, x = var_837_cast_fp16)[name = string("op_840_cast_fp16")]; + tensor var_843_split_sizes_0 = const()[name = string("op_843_split_sizes_0"), val = tensor([8, 8])]; + int32 var_843_axis_0 = const()[name = string("op_843_axis_0"), val = int32(1)]; + tensor var_843_cast_fp16_0, tensor var_843_cast_fp16_1 = split(axis = var_843_axis_0, split_sizes = var_843_split_sizes_0, x = query_states_11_cast_fp16)[name = string("op_843_cast_fp16")]; + bool attn_weights_33_transpose_x_0 = const()[name = string("attn_weights_33_transpose_x_0"), val = bool(false)]; + bool attn_weights_33_transpose_y_0 = const()[name = string("attn_weights_33_transpose_y_0"), val = bool(false)]; + tensor attn_weights_33_cast_fp16 = matmul(transpose_x = attn_weights_33_transpose_x_0, transpose_y = attn_weights_33_transpose_y_0, x = var_830_cast_fp16_0, y = var_843_cast_fp16_0)[name = string("attn_weights_33_cast_fp16")]; + fp16 _inversed_attn_weights_35_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_35_y_0_to_fp16"), val = fp16(0x1p-3)]; + tensor _inversed_attn_weights_35_cast_fp16 = mul(x = attn_weights_33_cast_fp16, y = _inversed_attn_weights_35_y_0_to_fp16)[name = string("_inversed_attn_weights_35_cast_fp16")]; + tensor attn_weights_37_cast_fp16 = add(x = _inversed_attn_weights_35_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_37_cast_fp16")]; + int32 var_850 = const()[name = string("op_850"), val = int32(2)]; + tensor attn_weights_39_cast_fp16 = softmax(axis = var_850, x = attn_weights_37_cast_fp16)[name = string("attn_weights_39_cast_fp16")]; + bool var_856_transpose_x_1 = const()[name = string("op_856_transpose_x_1"), val = bool(true)]; + bool var_856_transpose_y_1 = const()[name = string("op_856_transpose_y_1"), val = bool(false)]; + tensor var_856_cast_fp16 = matmul(transpose_x = var_856_transpose_x_1, transpose_y = var_856_transpose_y_1, x = attn_weights_39_cast_fp16, y = var_840_cast_fp16_0)[name = string("op_856_cast_fp16")]; + bool attn_weights_41_transpose_x_0 = const()[name = string("attn_weights_41_transpose_x_0"), val = bool(false)]; + bool attn_weights_41_transpose_y_0 = const()[name = string("attn_weights_41_transpose_y_0"), val = bool(false)]; + tensor attn_weights_41_cast_fp16 = matmul(transpose_x = attn_weights_41_transpose_x_0, transpose_y = attn_weights_41_transpose_y_0, x = var_830_cast_fp16_1, y = var_843_cast_fp16_1)[name = string("attn_weights_41_cast_fp16")]; + fp16 _inversed_attn_weights_43_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_43_y_0_to_fp16"), val = fp16(0x1p-3)]; + tensor _inversed_attn_weights_43_cast_fp16 = mul(x = attn_weights_41_cast_fp16, y = _inversed_attn_weights_43_y_0_to_fp16)[name = string("_inversed_attn_weights_43_cast_fp16")]; + tensor attn_weights_45_cast_fp16 = add(x = _inversed_attn_weights_43_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_45_cast_fp16")]; + int32 var_862 = const()[name = string("op_862"), val = int32(2)]; + tensor attn_weights_47_cast_fp16 = softmax(axis = var_862, x = attn_weights_45_cast_fp16)[name = string("attn_weights_47_cast_fp16")]; + bool attn_output_13_transpose_x_1 = const()[name = string("attn_output_13_transpose_x_1"), val = bool(true)]; + bool attn_output_13_transpose_y_1 = const()[name = string("attn_output_13_transpose_y_1"), val = bool(false)]; + tensor attn_output_13_cast_fp16 = matmul(transpose_x = attn_output_13_transpose_x_1, transpose_y = attn_output_13_transpose_y_1, x = attn_weights_47_cast_fp16, y = var_840_cast_fp16_1)[name = string("attn_output_13_cast_fp16")]; + int32 var_870 = const()[name = string("op_870"), val = int32(1)]; + bool attn_output_15_interleave_0 = const()[name = string("attn_output_15_interleave_0"), val = bool(false)]; + tensor attn_output_15_cast_fp16 = concat(axis = var_870, interleave = attn_output_15_interleave_0, values = (var_856_cast_fp16, attn_output_13_cast_fp16))[name = string("attn_output_15_cast_fp16")]; + tensor var_874_perm_0 = const()[name = string("op_874_perm_0"), val = tensor([0, 1, 3, 2])]; + tensor var_879 = const()[name = string("op_879"), val = tensor([1, 1024, 1, 128])]; + tensor var_874_cast_fp16 = transpose(perm = var_874_perm_0, x = attn_output_15_cast_fp16)[name = string("transpose_9")]; + tensor x_41_cast_fp16 = reshape(shape = var_879, x = var_874_cast_fp16)[name = string("x_41_cast_fp16")]; + string hidden_states_15_pad_type_0 = const()[name = string("hidden_states_15_pad_type_0"), val = string("valid")]; + tensor hidden_states_15_strides_0 = const()[name = string("hidden_states_15_strides_0"), val = tensor([1, 1])]; + tensor hidden_states_15_pad_0 = const()[name = string("hidden_states_15_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor hidden_states_15_dilations_0 = const()[name = string("hidden_states_15_dilations_0"), val = tensor([1, 1])]; + int32 hidden_states_15_groups_0 = const()[name = string("hidden_states_15_groups_0"), val = int32(1)]; + tensor var_886_to_fp16 = const()[name = string("op_886_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(70813376)))]; + tensor hidden_states_15_cast_fp16 = conv(dilations = hidden_states_15_dilations_0, groups = hidden_states_15_groups_0, pad = hidden_states_15_pad_0, pad_type = hidden_states_15_pad_type_0, strides = hidden_states_15_strides_0, weight = var_886_to_fp16, x = x_41_cast_fp16)[name = string("hidden_states_15_cast_fp16")]; + tensor x_43_cast_fp16 = add(x = x_35_cast_fp16, y = hidden_states_15_cast_fp16)[name = string("x_43_cast_fp16")]; + int32 var_898 = const()[name = string("op_898"), val = int32(1)]; + fp16 const_33_promoted_to_fp16 = const()[name = string("const_33_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_901_cast_fp16 = mul(x = x_43_cast_fp16, y = const_33_promoted_to_fp16)[name = string("op_901_cast_fp16")]; + bool x_45_interleave_0 = const()[name = string("x_45_interleave_0"), val = bool(false)]; + tensor x_45_cast_fp16 = concat(axis = var_898, interleave = x_45_interleave_0, values = (x_43_cast_fp16, var_901_cast_fp16))[name = string("x_45_cast_fp16")]; + tensor out_31_axes_0 = const()[name = string("out_31_axes_0"), val = tensor([1])]; + fp16 var_911_to_fp16 = const()[name = string("op_911_to_fp16"), val = fp16(0x1.5p-17)]; + tensor out_31_cast_fp16 = layer_norm(axes = out_31_axes_0, epsilon = var_911_to_fp16, x = x_45_cast_fp16)[name = string("out_31_cast_fp16")]; + tensor layer_layers_2_post_attention_layernorm_weight_to_fp16 = const()[name = string("layer_layers_2_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(72910592)))]; + tensor out_33_cast_fp16 = mul(x = out_31_cast_fp16, y = layer_layers_2_post_attention_layernorm_weight_to_fp16)[name = string("out_33_cast_fp16")]; + tensor var_917_split_sizes_0 = const()[name = string("op_917_split_sizes_0"), val = tensor([1024, 1024])]; + int32 var_917_axis_0 = const()[name = string("op_917_axis_0"), val = int32(1)]; + tensor var_917_cast_fp16_0, tensor var_917_cast_fp16_1 = split(axis = var_917_axis_0, split_sizes = var_917_split_sizes_0, x = out_33_cast_fp16)[name = string("op_917_cast_fp16")]; + string input_5_pad_type_0 = const()[name = string("input_5_pad_type_0"), val = string("valid")]; + tensor input_5_strides_0 = const()[name = string("input_5_strides_0"), val = tensor([1, 1])]; + tensor input_5_pad_0 = const()[name = string("input_5_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor input_5_dilations_0 = const()[name = string("input_5_dilations_0"), val = tensor([1, 1])]; + int32 input_5_groups_0 = const()[name = string("input_5_groups_0"), val = int32(1)]; + tensor var_922_to_fp16 = const()[name = string("op_922_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(72914752)))]; + tensor input_5_cast_fp16 = conv(dilations = input_5_dilations_0, groups = input_5_groups_0, pad = input_5_pad_0, pad_type = input_5_pad_type_0, strides = input_5_strides_0, weight = var_922_to_fp16, x = var_917_cast_fp16_0)[name = string("input_5_cast_fp16")]; + tensor var_933_cast_fp16 = silu(x = input_5_cast_fp16)[name = string("op_933_cast_fp16")]; + string var_938_pad_type_0 = const()[name = string("op_938_pad_type_0"), val = string("valid")]; + tensor var_938_strides_0 = const()[name = string("op_938_strides_0"), val = tensor([1, 1])]; + tensor var_938_pad_0 = const()[name = string("op_938_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_938_dilations_0 = const()[name = string("op_938_dilations_0"), val = tensor([1, 1])]; + int32 var_938_groups_0 = const()[name = string("op_938_groups_0"), val = int32(1)]; + tensor var_921_to_fp16 = const()[name = string("op_921_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(81303424)))]; + tensor var_938_cast_fp16 = conv(dilations = var_938_dilations_0, groups = var_938_groups_0, pad = var_938_pad_0, pad_type = var_938_pad_type_0, strides = var_938_strides_0, weight = var_921_to_fp16, x = var_917_cast_fp16_0)[name = string("op_938_cast_fp16")]; + tensor x_51_cast_fp16 = mul(x = var_933_cast_fp16, y = var_938_cast_fp16)[name = string("x_51_cast_fp16")]; + string hidden_states_17_pad_type_0 = const()[name = string("hidden_states_17_pad_type_0"), val = string("valid")]; + tensor hidden_states_17_strides_0 = const()[name = string("hidden_states_17_strides_0"), val = tensor([1, 1])]; + tensor hidden_states_17_pad_0 = const()[name = string("hidden_states_17_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor hidden_states_17_dilations_0 = const()[name = string("hidden_states_17_dilations_0"), val = tensor([1, 1])]; + int32 hidden_states_17_groups_0 = const()[name = string("hidden_states_17_groups_0"), val = int32(1)]; + tensor var_920_to_fp16 = const()[name = string("op_920_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(89692096)))]; + tensor hidden_states_17_cast_fp16 = conv(dilations = hidden_states_17_dilations_0, groups = hidden_states_17_groups_0, pad = hidden_states_17_pad_0, pad_type = hidden_states_17_pad_type_0, strides = hidden_states_17_strides_0, weight = var_920_to_fp16, x = x_51_cast_fp16)[name = string("hidden_states_17_cast_fp16")]; + tensor x_53_cast_fp16 = add(x = x_43_cast_fp16, y = hidden_states_17_cast_fp16)[name = string("x_53_cast_fp16")]; + int32 var_951 = const()[name = string("op_951"), val = int32(1)]; + fp16 const_34_promoted_to_fp16 = const()[name = string("const_34_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_954_cast_fp16 = mul(x = x_53_cast_fp16, y = const_34_promoted_to_fp16)[name = string("op_954_cast_fp16")]; + bool x_55_interleave_0 = const()[name = string("x_55_interleave_0"), val = bool(false)]; + tensor x_55_cast_fp16 = concat(axis = var_951, interleave = x_55_interleave_0, values = (x_53_cast_fp16, var_954_cast_fp16))[name = string("x_55_cast_fp16")]; + tensor out_37_axes_0 = const()[name = string("out_37_axes_0"), val = tensor([1])]; + fp16 var_964_to_fp16 = const()[name = string("op_964_to_fp16"), val = fp16(0x1.5p-17)]; + tensor out_37_cast_fp16 = layer_norm(axes = out_37_axes_0, epsilon = var_964_to_fp16, x = x_55_cast_fp16)[name = string("out_37_cast_fp16")]; + tensor layer_layers_3_input_layernorm_weight_to_fp16 = const()[name = string("layer_layers_3_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(98080768)))]; + tensor out_39_cast_fp16 = mul(x = out_37_cast_fp16, y = layer_layers_3_input_layernorm_weight_to_fp16)[name = string("out_39_cast_fp16")]; + tensor var_970_split_sizes_0 = const()[name = string("op_970_split_sizes_0"), val = tensor([1024, 1024])]; + int32 var_970_axis_0 = const()[name = string("op_970_axis_0"), val = int32(1)]; + tensor var_970_cast_fp16_0, tensor var_970_cast_fp16_1 = split(axis = var_970_axis_0, split_sizes = var_970_split_sizes_0, x = out_39_cast_fp16)[name = string("op_970_cast_fp16")]; + string query_states_13_pad_type_0 = const()[name = string("query_states_13_pad_type_0"), val = string("valid")]; + tensor query_states_13_strides_0 = const()[name = string("query_states_13_strides_0"), val = tensor([1, 1])]; + tensor query_states_13_pad_0 = const()[name = string("query_states_13_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor query_states_13_dilations_0 = const()[name = string("query_states_13_dilations_0"), val = tensor([1, 1])]; + int32 query_states_13_groups_0 = const()[name = string("query_states_13_groups_0"), val = int32(1)]; + tensor var_992_to_fp16 = const()[name = string("op_992_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(98084928)))]; + tensor query_states_13_cast_fp16 = conv(dilations = query_states_13_dilations_0, groups = query_states_13_groups_0, pad = query_states_13_pad_0, pad_type = query_states_13_pad_type_0, strides = query_states_13_strides_0, weight = var_992_to_fp16, x = var_970_cast_fp16_0)[name = string("query_states_13_cast_fp16")]; + string key_states_13_pad_type_0 = const()[name = string("key_states_13_pad_type_0"), val = string("valid")]; + tensor key_states_13_strides_0 = const()[name = string("key_states_13_strides_0"), val = tensor([1, 1])]; + tensor key_states_13_pad_0 = const()[name = string("key_states_13_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor key_states_13_dilations_0 = const()[name = string("key_states_13_dilations_0"), val = tensor([1, 1])]; + int32 key_states_13_groups_0 = const()[name = string("key_states_13_groups_0"), val = int32(1)]; + tensor var_1003_to_fp16 = const()[name = string("op_1003_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(100182144)))]; + tensor key_states_13_cast_fp16 = conv(dilations = key_states_13_dilations_0, groups = key_states_13_groups_0, pad = key_states_13_pad_0, pad_type = key_states_13_pad_type_0, strides = key_states_13_strides_0, weight = var_1003_to_fp16, x = var_970_cast_fp16_0)[name = string("key_states_13_cast_fp16")]; + string value_states_13_pad_type_0 = const()[name = string("value_states_13_pad_type_0"), val = string("valid")]; + tensor value_states_13_strides_0 = const()[name = string("value_states_13_strides_0"), val = tensor([1, 1])]; + tensor value_states_13_pad_0 = const()[name = string("value_states_13_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor value_states_13_dilations_0 = const()[name = string("value_states_13_dilations_0"), val = tensor([1, 1])]; + int32 value_states_13_groups_0 = const()[name = string("value_states_13_groups_0"), val = int32(1)]; + tensor var_1014_to_fp16 = const()[name = string("op_1014_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(100444352)))]; + tensor value_states_13_cast_fp16 = conv(dilations = value_states_13_dilations_0, groups = value_states_13_groups_0, pad = value_states_13_pad_0, pad_type = value_states_13_pad_type_0, strides = value_states_13_strides_0, weight = var_1014_to_fp16, x = var_970_cast_fp16_0)[name = string("value_states_13_cast_fp16")]; + tensor var_1022 = const()[name = string("op_1022"), val = tensor([1, 16, 64, 128])]; + tensor embed_13_cast_fp16 = reshape(shape = var_1022, x = query_states_13_cast_fp16)[name = string("embed_13_cast_fp16")]; + tensor var_1026 = const()[name = string("op_1026"), val = tensor([1, 2, 64, 128])]; + tensor var_1027_cast_fp16 = reshape(shape = var_1026, x = key_states_13_cast_fp16)[name = string("op_1027_cast_fp16")]; + tensor embed_15_perm_0 = const()[name = string("embed_15_perm_0"), val = tensor([0, 1, 3, 2])]; + tensor var_1033 = const()[name = string("op_1033"), val = tensor([1, 2, 64, 128])]; + tensor var_1034_cast_fp16 = reshape(shape = var_1033, x = value_states_13_cast_fp16)[name = string("op_1034_cast_fp16")]; + tensor value_states_15_perm_0 = const()[name = string("value_states_15_perm_0"), val = tensor([0, 1, 3, 2])]; + tensor var_1038_cast_fp16 = mul(x = embed_13_cast_fp16, y = cos_1_cast_fp16)[name = string("op_1038_cast_fp16")]; + tensor var_1039_split_sizes_0 = const()[name = string("op_1039_split_sizes_0"), val = tensor([32, 32])]; + int32 var_1039_axis_0 = const()[name = string("op_1039_axis_0"), val = int32(-2)]; + tensor var_1039_cast_fp16_0, tensor var_1039_cast_fp16_1 = split(axis = var_1039_axis_0, split_sizes = var_1039_split_sizes_0, x = embed_13_cast_fp16)[name = string("op_1039_cast_fp16")]; + fp16 const_37_promoted_to_fp16 = const()[name = string("const_37_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_1041_cast_fp16 = mul(x = var_1039_cast_fp16_1, y = const_37_promoted_to_fp16)[name = string("op_1041_cast_fp16")]; + int32 var_1043 = const()[name = string("op_1043"), val = int32(-2)]; + bool var_1044_interleave_0 = const()[name = string("op_1044_interleave_0"), val = bool(false)]; + tensor var_1044_cast_fp16 = concat(axis = var_1043, interleave = var_1044_interleave_0, values = (var_1041_cast_fp16, var_1039_cast_fp16_0))[name = string("op_1044_cast_fp16")]; + tensor var_1045_cast_fp16 = mul(x = var_1044_cast_fp16, y = sin_1_cast_fp16)[name = string("op_1045_cast_fp16")]; + tensor query_states_15_cast_fp16 = add(x = var_1038_cast_fp16, y = var_1045_cast_fp16)[name = string("query_states_15_cast_fp16")]; + tensor embed_15_cast_fp16 = transpose(perm = embed_15_perm_0, x = var_1027_cast_fp16)[name = string("transpose_8")]; + tensor var_1048_cast_fp16 = mul(x = embed_15_cast_fp16, y = cos_cast_fp16)[name = string("op_1048_cast_fp16")]; + tensor var_1049_split_sizes_0 = const()[name = string("op_1049_split_sizes_0"), val = tensor([32, 32])]; + int32 var_1049_axis_0 = const()[name = string("op_1049_axis_0"), val = int32(-1)]; + tensor var_1049_cast_fp16_0, tensor var_1049_cast_fp16_1 = split(axis = var_1049_axis_0, split_sizes = var_1049_split_sizes_0, x = embed_15_cast_fp16)[name = string("op_1049_cast_fp16")]; + fp16 const_38_promoted_to_fp16 = const()[name = string("const_38_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_1051_cast_fp16 = mul(x = var_1049_cast_fp16_1, y = const_38_promoted_to_fp16)[name = string("op_1051_cast_fp16")]; + int32 var_1053 = const()[name = string("op_1053"), val = int32(-1)]; + bool var_1054_interleave_0 = const()[name = string("op_1054_interleave_0"), val = bool(false)]; + tensor var_1054_cast_fp16 = concat(axis = var_1053, interleave = var_1054_interleave_0, values = (var_1051_cast_fp16, var_1049_cast_fp16_0))[name = string("op_1054_cast_fp16")]; + tensor var_1055_cast_fp16 = mul(x = var_1054_cast_fp16, y = sin_cast_fp16)[name = string("op_1055_cast_fp16")]; + tensor key_states_15_cast_fp16 = add(x = var_1048_cast_fp16, y = var_1055_cast_fp16)[name = string("key_states_15_cast_fp16")]; + tensor expand_dims_31 = const()[name = string("expand_dims_31"), val = tensor([3])]; + tensor expand_dims_32 = const()[name = string("expand_dims_32"), val = tensor([0])]; + tensor expand_dims_34 = const()[name = string("expand_dims_34"), val = tensor([4])]; + tensor concat_27_values3_0 = const()[name = string("concat_27_values3_0"), val = tensor([0])]; + int32 concat_27_axis_0 = const()[name = string("concat_27_axis_0"), val = int32(0)]; + bool concat_27_interleave_0 = const()[name = string("concat_27_interleave_0"), val = bool(false)]; + tensor concat_27 = concat(axis = concat_27_axis_0, interleave = concat_27_interleave_0, values = (expand_dims_31, expand_dims_32, position_id, concat_27_values3_0))[name = string("concat_27")]; + tensor concat_28_values1_0 = const()[name = string("concat_28_values1_0"), val = tensor([0])]; + tensor concat_28_values3_0 = const()[name = string("concat_28_values3_0"), val = tensor([0])]; + int32 concat_28_axis_0 = const()[name = string("concat_28_axis_0"), val = int32(0)]; + bool concat_28_interleave_0 = const()[name = string("concat_28_interleave_0"), val = bool(false)]; + tensor concat_28 = concat(axis = concat_28_axis_0, interleave = concat_28_interleave_0, values = (expand_dims_34, concat_28_values1_0, var_246, concat_28_values3_0))[name = string("concat_28")]; + tensor key_cache_internal_tensor_assign_4_stride_0 = const()[name = string("key_cache_internal_tensor_assign_4_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor key_cache_internal_tensor_assign_4_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_4_begin_mask_0"), val = tensor([false, false, false, true])]; + tensor key_cache_internal_tensor_assign_4_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_4_end_mask_0"), val = tensor([false, true, false, true])]; + tensor key_cache_internal_tensor_assign_4_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_4_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor key_cache_internal_tensor_assign_4_cast_fp16 = slice_update(begin = concat_27, begin_mask = key_cache_internal_tensor_assign_4_begin_mask_0, end = concat_28, end_mask = key_cache_internal_tensor_assign_4_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_4_squeeze_mask_0, stride = key_cache_internal_tensor_assign_4_stride_0, update = key_states_15_cast_fp16, x = coreml_update_state_16)[name = string("key_cache_internal_tensor_assign_4_cast_fp16")]; + write_state(data = key_cache_internal_tensor_assign_4_cast_fp16, input = key_cache)[name = string("coreml_update_state_6_write_state")]; + tensor coreml_update_state_18 = read_state(input = key_cache)[name = string("coreml_update_state_6")]; + tensor value_cache_internal_tensor_assign_4_stride_0 = const()[name = string("value_cache_internal_tensor_assign_4_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor value_cache_internal_tensor_assign_4_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_4_begin_mask_0"), val = tensor([false, false, false, true])]; + tensor value_cache_internal_tensor_assign_4_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_4_end_mask_0"), val = tensor([false, true, false, true])]; + tensor value_cache_internal_tensor_assign_4_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_4_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor value_states_15_cast_fp16 = transpose(perm = value_states_15_perm_0, x = var_1034_cast_fp16)[name = string("transpose_7")]; + tensor value_cache_internal_tensor_assign_4_cast_fp16 = slice_update(begin = concat_27, begin_mask = value_cache_internal_tensor_assign_4_begin_mask_0, end = concat_28, end_mask = value_cache_internal_tensor_assign_4_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_4_squeeze_mask_0, stride = value_cache_internal_tensor_assign_4_stride_0, update = value_states_15_cast_fp16, x = coreml_update_state_17)[name = string("value_cache_internal_tensor_assign_4_cast_fp16")]; + write_state(data = value_cache_internal_tensor_assign_4_cast_fp16, input = value_cache)[name = string("coreml_update_state_7_write_state")]; + tensor coreml_update_state_19 = read_state(input = value_cache)[name = string("coreml_update_state_7")]; + tensor var_1098_begin_0 = const()[name = string("op_1098_begin_0"), val = tensor([3, 0, 0, 0])]; + tensor var_1098_end_0 = const()[name = string("op_1098_end_0"), val = tensor([4, 2, 2048, 64])]; + tensor var_1098_end_mask_0 = const()[name = string("op_1098_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_1098_cast_fp16 = slice_by_index(begin = var_1098_begin_0, end = var_1098_end_0, end_mask = var_1098_end_mask_0, x = coreml_update_state_18)[name = string("op_1098_cast_fp16")]; + tensor tile_6 = const()[name = string("tile_6"), val = tensor([1, 1])]; + int32 var_1101_axis_0 = const()[name = string("op_1101_axis_0"), val = int32(1)]; + tensor var_1101_cast_fp16_0, tensor var_1101_cast_fp16_1 = split(axis = var_1101_axis_0, split_sizes = tile_6, x = var_1098_cast_fp16)[name = string("op_1101_cast_fp16")]; + tensor var_1108_begin_0 = const()[name = string("op_1108_begin_0"), val = tensor([3, 0, 0, 0])]; + tensor var_1108_end_0 = const()[name = string("op_1108_end_0"), val = tensor([4, 2, 2048, 64])]; + tensor var_1108_end_mask_0 = const()[name = string("op_1108_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_1108_cast_fp16 = slice_by_index(begin = var_1108_begin_0, end = var_1108_end_0, end_mask = var_1108_end_mask_0, x = coreml_update_state_19)[name = string("op_1108_cast_fp16")]; + tensor tile_7 = const()[name = string("tile_7"), val = tensor([1, 1])]; + int32 var_1111_axis_0 = const()[name = string("op_1111_axis_0"), val = int32(1)]; + tensor var_1111_cast_fp16_0, tensor var_1111_cast_fp16_1 = split(axis = var_1111_axis_0, split_sizes = tile_7, x = var_1108_cast_fp16)[name = string("op_1111_cast_fp16")]; + tensor var_1114_split_sizes_0 = const()[name = string("op_1114_split_sizes_0"), val = tensor([8, 8])]; + int32 var_1114_axis_0 = const()[name = string("op_1114_axis_0"), val = int32(1)]; + tensor var_1114_cast_fp16_0, tensor var_1114_cast_fp16_1 = split(axis = var_1114_axis_0, split_sizes = var_1114_split_sizes_0, x = query_states_15_cast_fp16)[name = string("op_1114_cast_fp16")]; + bool attn_weights_49_transpose_x_0 = const()[name = string("attn_weights_49_transpose_x_0"), val = bool(false)]; + bool attn_weights_49_transpose_y_0 = const()[name = string("attn_weights_49_transpose_y_0"), val = bool(false)]; + tensor attn_weights_49_cast_fp16 = matmul(transpose_x = attn_weights_49_transpose_x_0, transpose_y = attn_weights_49_transpose_y_0, x = var_1101_cast_fp16_0, y = var_1114_cast_fp16_0)[name = string("attn_weights_49_cast_fp16")]; + fp16 _inversed_attn_weights_51_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_51_y_0_to_fp16"), val = fp16(0x1p-3)]; + tensor _inversed_attn_weights_51_cast_fp16 = mul(x = attn_weights_49_cast_fp16, y = _inversed_attn_weights_51_y_0_to_fp16)[name = string("_inversed_attn_weights_51_cast_fp16")]; + tensor attn_weights_53_cast_fp16 = add(x = _inversed_attn_weights_51_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_53_cast_fp16")]; + int32 var_1121 = const()[name = string("op_1121"), val = int32(2)]; + tensor attn_weights_55_cast_fp16 = softmax(axis = var_1121, x = attn_weights_53_cast_fp16)[name = string("attn_weights_55_cast_fp16")]; + bool var_1127_transpose_x_1 = const()[name = string("op_1127_transpose_x_1"), val = bool(true)]; + bool var_1127_transpose_y_1 = const()[name = string("op_1127_transpose_y_1"), val = bool(false)]; + tensor var_1127_cast_fp16 = matmul(transpose_x = var_1127_transpose_x_1, transpose_y = var_1127_transpose_y_1, x = attn_weights_55_cast_fp16, y = var_1111_cast_fp16_0)[name = string("op_1127_cast_fp16")]; + bool attn_weights_57_transpose_x_0 = const()[name = string("attn_weights_57_transpose_x_0"), val = bool(false)]; + bool attn_weights_57_transpose_y_0 = const()[name = string("attn_weights_57_transpose_y_0"), val = bool(false)]; + tensor attn_weights_57_cast_fp16 = matmul(transpose_x = attn_weights_57_transpose_x_0, transpose_y = attn_weights_57_transpose_y_0, x = var_1101_cast_fp16_1, y = var_1114_cast_fp16_1)[name = string("attn_weights_57_cast_fp16")]; + fp16 _inversed_attn_weights_59_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_59_y_0_to_fp16"), val = fp16(0x1p-3)]; + tensor _inversed_attn_weights_59_cast_fp16 = mul(x = attn_weights_57_cast_fp16, y = _inversed_attn_weights_59_y_0_to_fp16)[name = string("_inversed_attn_weights_59_cast_fp16")]; + tensor attn_weights_61_cast_fp16 = add(x = _inversed_attn_weights_59_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_61_cast_fp16")]; + int32 var_1133 = const()[name = string("op_1133"), val = int32(2)]; + tensor attn_weights_63_cast_fp16 = softmax(axis = var_1133, x = attn_weights_61_cast_fp16)[name = string("attn_weights_63_cast_fp16")]; + bool attn_output_19_transpose_x_1 = const()[name = string("attn_output_19_transpose_x_1"), val = bool(true)]; + bool attn_output_19_transpose_y_1 = const()[name = string("attn_output_19_transpose_y_1"), val = bool(false)]; + tensor attn_output_19_cast_fp16 = matmul(transpose_x = attn_output_19_transpose_x_1, transpose_y = attn_output_19_transpose_y_1, x = attn_weights_63_cast_fp16, y = var_1111_cast_fp16_1)[name = string("attn_output_19_cast_fp16")]; + int32 var_1141 = const()[name = string("op_1141"), val = int32(1)]; + bool attn_output_21_interleave_0 = const()[name = string("attn_output_21_interleave_0"), val = bool(false)]; + tensor attn_output_21_cast_fp16 = concat(axis = var_1141, interleave = attn_output_21_interleave_0, values = (var_1127_cast_fp16, attn_output_19_cast_fp16))[name = string("attn_output_21_cast_fp16")]; + tensor var_1145_perm_0 = const()[name = string("op_1145_perm_0"), val = tensor([0, 1, 3, 2])]; + tensor var_1150 = const()[name = string("op_1150"), val = tensor([1, 1024, 1, 128])]; + tensor var_1145_cast_fp16 = transpose(perm = var_1145_perm_0, x = attn_output_21_cast_fp16)[name = string("transpose_6")]; + tensor x_59_cast_fp16 = reshape(shape = var_1150, x = var_1145_cast_fp16)[name = string("x_59_cast_fp16")]; + string hidden_states_21_pad_type_0 = const()[name = string("hidden_states_21_pad_type_0"), val = string("valid")]; + tensor hidden_states_21_strides_0 = const()[name = string("hidden_states_21_strides_0"), val = tensor([1, 1])]; + tensor hidden_states_21_pad_0 = const()[name = string("hidden_states_21_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor hidden_states_21_dilations_0 = const()[name = string("hidden_states_21_dilations_0"), val = tensor([1, 1])]; + int32 hidden_states_21_groups_0 = const()[name = string("hidden_states_21_groups_0"), val = int32(1)]; + tensor var_1157_to_fp16 = const()[name = string("op_1157_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(100706560)))]; + tensor hidden_states_21_cast_fp16 = conv(dilations = hidden_states_21_dilations_0, groups = hidden_states_21_groups_0, pad = hidden_states_21_pad_0, pad_type = hidden_states_21_pad_type_0, strides = hidden_states_21_strides_0, weight = var_1157_to_fp16, x = x_59_cast_fp16)[name = string("hidden_states_21_cast_fp16")]; + tensor x_61_cast_fp16 = add(x = x_53_cast_fp16, y = hidden_states_21_cast_fp16)[name = string("x_61_cast_fp16")]; + int32 var_1169 = const()[name = string("op_1169"), val = int32(1)]; + fp16 const_43_promoted_to_fp16 = const()[name = string("const_43_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_1172_cast_fp16 = mul(x = x_61_cast_fp16, y = const_43_promoted_to_fp16)[name = string("op_1172_cast_fp16")]; + bool x_63_interleave_0 = const()[name = string("x_63_interleave_0"), val = bool(false)]; + tensor x_63_cast_fp16 = concat(axis = var_1169, interleave = x_63_interleave_0, values = (x_61_cast_fp16, var_1172_cast_fp16))[name = string("x_63_cast_fp16")]; + tensor out_43_axes_0 = const()[name = string("out_43_axes_0"), val = tensor([1])]; + fp16 var_1182_to_fp16 = const()[name = string("op_1182_to_fp16"), val = fp16(0x1.5p-17)]; + tensor out_43_cast_fp16 = layer_norm(axes = out_43_axes_0, epsilon = var_1182_to_fp16, x = x_63_cast_fp16)[name = string("out_43_cast_fp16")]; + tensor layer_layers_3_post_attention_layernorm_weight_to_fp16 = const()[name = string("layer_layers_3_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(102803776)))]; + tensor out_45_cast_fp16 = mul(x = out_43_cast_fp16, y = layer_layers_3_post_attention_layernorm_weight_to_fp16)[name = string("out_45_cast_fp16")]; + tensor var_1188_split_sizes_0 = const()[name = string("op_1188_split_sizes_0"), val = tensor([1024, 1024])]; + int32 var_1188_axis_0 = const()[name = string("op_1188_axis_0"), val = int32(1)]; + tensor var_1188_cast_fp16_0, tensor var_1188_cast_fp16_1 = split(axis = var_1188_axis_0, split_sizes = var_1188_split_sizes_0, x = out_45_cast_fp16)[name = string("op_1188_cast_fp16")]; + string input_7_pad_type_0 = const()[name = string("input_7_pad_type_0"), val = string("valid")]; + tensor input_7_strides_0 = const()[name = string("input_7_strides_0"), val = tensor([1, 1])]; + tensor input_7_pad_0 = const()[name = string("input_7_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor input_7_dilations_0 = const()[name = string("input_7_dilations_0"), val = tensor([1, 1])]; + int32 input_7_groups_0 = const()[name = string("input_7_groups_0"), val = int32(1)]; + tensor var_1193_to_fp16 = const()[name = string("op_1193_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(102807936)))]; + tensor input_7_cast_fp16 = conv(dilations = input_7_dilations_0, groups = input_7_groups_0, pad = input_7_pad_0, pad_type = input_7_pad_type_0, strides = input_7_strides_0, weight = var_1193_to_fp16, x = var_1188_cast_fp16_0)[name = string("input_7_cast_fp16")]; + tensor var_1204_cast_fp16 = silu(x = input_7_cast_fp16)[name = string("op_1204_cast_fp16")]; + string var_1209_pad_type_0 = const()[name = string("op_1209_pad_type_0"), val = string("valid")]; + tensor var_1209_strides_0 = const()[name = string("op_1209_strides_0"), val = tensor([1, 1])]; + tensor var_1209_pad_0 = const()[name = string("op_1209_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1209_dilations_0 = const()[name = string("op_1209_dilations_0"), val = tensor([1, 1])]; + int32 var_1209_groups_0 = const()[name = string("op_1209_groups_0"), val = int32(1)]; + tensor var_1192_to_fp16 = const()[name = string("op_1192_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(111196608)))]; + tensor var_1209_cast_fp16 = conv(dilations = var_1209_dilations_0, groups = var_1209_groups_0, pad = var_1209_pad_0, pad_type = var_1209_pad_type_0, strides = var_1209_strides_0, weight = var_1192_to_fp16, x = var_1188_cast_fp16_0)[name = string("op_1209_cast_fp16")]; + tensor x_69_cast_fp16 = mul(x = var_1204_cast_fp16, y = var_1209_cast_fp16)[name = string("x_69_cast_fp16")]; + string hidden_states_23_pad_type_0 = const()[name = string("hidden_states_23_pad_type_0"), val = string("valid")]; + tensor hidden_states_23_strides_0 = const()[name = string("hidden_states_23_strides_0"), val = tensor([1, 1])]; + tensor hidden_states_23_pad_0 = const()[name = string("hidden_states_23_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor hidden_states_23_dilations_0 = const()[name = string("hidden_states_23_dilations_0"), val = tensor([1, 1])]; + int32 hidden_states_23_groups_0 = const()[name = string("hidden_states_23_groups_0"), val = int32(1)]; + tensor var_1191_to_fp16 = const()[name = string("op_1191_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(119585280)))]; + tensor hidden_states_23_cast_fp16 = conv(dilations = hidden_states_23_dilations_0, groups = hidden_states_23_groups_0, pad = hidden_states_23_pad_0, pad_type = hidden_states_23_pad_type_0, strides = hidden_states_23_strides_0, weight = var_1191_to_fp16, x = x_69_cast_fp16)[name = string("hidden_states_23_cast_fp16")]; + tensor x_71_cast_fp16 = add(x = x_61_cast_fp16, y = hidden_states_23_cast_fp16)[name = string("x_71_cast_fp16")]; + int32 var_1222 = const()[name = string("op_1222"), val = int32(1)]; + fp16 const_44_promoted_to_fp16 = const()[name = string("const_44_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_1225_cast_fp16 = mul(x = x_71_cast_fp16, y = const_44_promoted_to_fp16)[name = string("op_1225_cast_fp16")]; + bool x_73_interleave_0 = const()[name = string("x_73_interleave_0"), val = bool(false)]; + tensor x_73_cast_fp16 = concat(axis = var_1222, interleave = x_73_interleave_0, values = (x_71_cast_fp16, var_1225_cast_fp16))[name = string("x_73_cast_fp16")]; + tensor out_49_axes_0 = const()[name = string("out_49_axes_0"), val = tensor([1])]; + fp16 var_1235_to_fp16 = const()[name = string("op_1235_to_fp16"), val = fp16(0x1.5p-17)]; + tensor out_49_cast_fp16 = layer_norm(axes = out_49_axes_0, epsilon = var_1235_to_fp16, x = x_73_cast_fp16)[name = string("out_49_cast_fp16")]; + tensor layer_layers_4_input_layernorm_weight_to_fp16 = const()[name = string("layer_layers_4_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(127973952)))]; + tensor out_51_cast_fp16 = mul(x = out_49_cast_fp16, y = layer_layers_4_input_layernorm_weight_to_fp16)[name = string("out_51_cast_fp16")]; + tensor var_1241_split_sizes_0 = const()[name = string("op_1241_split_sizes_0"), val = tensor([1024, 1024])]; + int32 var_1241_axis_0 = const()[name = string("op_1241_axis_0"), val = int32(1)]; + tensor var_1241_cast_fp16_0, tensor var_1241_cast_fp16_1 = split(axis = var_1241_axis_0, split_sizes = var_1241_split_sizes_0, x = out_51_cast_fp16)[name = string("op_1241_cast_fp16")]; + string query_states_17_pad_type_0 = const()[name = string("query_states_17_pad_type_0"), val = string("valid")]; + tensor query_states_17_strides_0 = const()[name = string("query_states_17_strides_0"), val = tensor([1, 1])]; + tensor query_states_17_pad_0 = const()[name = string("query_states_17_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor query_states_17_dilations_0 = const()[name = string("query_states_17_dilations_0"), val = tensor([1, 1])]; + int32 query_states_17_groups_0 = const()[name = string("query_states_17_groups_0"), val = int32(1)]; + tensor var_1263_to_fp16 = const()[name = string("op_1263_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(127978112)))]; + tensor query_states_17_cast_fp16 = conv(dilations = query_states_17_dilations_0, groups = query_states_17_groups_0, pad = query_states_17_pad_0, pad_type = query_states_17_pad_type_0, strides = query_states_17_strides_0, weight = var_1263_to_fp16, x = var_1241_cast_fp16_0)[name = string("query_states_17_cast_fp16")]; + string key_states_17_pad_type_0 = const()[name = string("key_states_17_pad_type_0"), val = string("valid")]; + tensor key_states_17_strides_0 = const()[name = string("key_states_17_strides_0"), val = tensor([1, 1])]; + tensor key_states_17_pad_0 = const()[name = string("key_states_17_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor key_states_17_dilations_0 = const()[name = string("key_states_17_dilations_0"), val = tensor([1, 1])]; + int32 key_states_17_groups_0 = const()[name = string("key_states_17_groups_0"), val = int32(1)]; + tensor var_1274_to_fp16 = const()[name = string("op_1274_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(130075328)))]; + tensor key_states_17_cast_fp16 = conv(dilations = key_states_17_dilations_0, groups = key_states_17_groups_0, pad = key_states_17_pad_0, pad_type = key_states_17_pad_type_0, strides = key_states_17_strides_0, weight = var_1274_to_fp16, x = var_1241_cast_fp16_0)[name = string("key_states_17_cast_fp16")]; + string value_states_17_pad_type_0 = const()[name = string("value_states_17_pad_type_0"), val = string("valid")]; + tensor value_states_17_strides_0 = const()[name = string("value_states_17_strides_0"), val = tensor([1, 1])]; + tensor value_states_17_pad_0 = const()[name = string("value_states_17_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor value_states_17_dilations_0 = const()[name = string("value_states_17_dilations_0"), val = tensor([1, 1])]; + int32 value_states_17_groups_0 = const()[name = string("value_states_17_groups_0"), val = int32(1)]; + tensor var_1285_to_fp16 = const()[name = string("op_1285_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(130337536)))]; + tensor value_states_17_cast_fp16 = conv(dilations = value_states_17_dilations_0, groups = value_states_17_groups_0, pad = value_states_17_pad_0, pad_type = value_states_17_pad_type_0, strides = value_states_17_strides_0, weight = var_1285_to_fp16, x = var_1241_cast_fp16_0)[name = string("value_states_17_cast_fp16")]; + tensor var_1293 = const()[name = string("op_1293"), val = tensor([1, 16, 64, 128])]; + tensor embed_17_cast_fp16 = reshape(shape = var_1293, x = query_states_17_cast_fp16)[name = string("embed_17_cast_fp16")]; + tensor var_1297 = const()[name = string("op_1297"), val = tensor([1, 2, 64, 128])]; + tensor var_1298_cast_fp16 = reshape(shape = var_1297, x = key_states_17_cast_fp16)[name = string("op_1298_cast_fp16")]; + tensor embed_19_perm_0 = const()[name = string("embed_19_perm_0"), val = tensor([0, 1, 3, 2])]; + tensor var_1304 = const()[name = string("op_1304"), val = tensor([1, 2, 64, 128])]; + tensor var_1305_cast_fp16 = reshape(shape = var_1304, x = value_states_17_cast_fp16)[name = string("op_1305_cast_fp16")]; + tensor value_states_19_perm_0 = const()[name = string("value_states_19_perm_0"), val = tensor([0, 1, 3, 2])]; + tensor var_1309_cast_fp16 = mul(x = embed_17_cast_fp16, y = cos_1_cast_fp16)[name = string("op_1309_cast_fp16")]; + tensor var_1310_split_sizes_0 = const()[name = string("op_1310_split_sizes_0"), val = tensor([32, 32])]; + int32 var_1310_axis_0 = const()[name = string("op_1310_axis_0"), val = int32(-2)]; + tensor var_1310_cast_fp16_0, tensor var_1310_cast_fp16_1 = split(axis = var_1310_axis_0, split_sizes = var_1310_split_sizes_0, x = embed_17_cast_fp16)[name = string("op_1310_cast_fp16")]; + fp16 const_47_promoted_to_fp16 = const()[name = string("const_47_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_1312_cast_fp16 = mul(x = var_1310_cast_fp16_1, y = const_47_promoted_to_fp16)[name = string("op_1312_cast_fp16")]; + int32 var_1314 = const()[name = string("op_1314"), val = int32(-2)]; + bool var_1315_interleave_0 = const()[name = string("op_1315_interleave_0"), val = bool(false)]; + tensor var_1315_cast_fp16 = concat(axis = var_1314, interleave = var_1315_interleave_0, values = (var_1312_cast_fp16, var_1310_cast_fp16_0))[name = string("op_1315_cast_fp16")]; + tensor var_1316_cast_fp16 = mul(x = var_1315_cast_fp16, y = sin_1_cast_fp16)[name = string("op_1316_cast_fp16")]; + tensor query_states_19_cast_fp16 = add(x = var_1309_cast_fp16, y = var_1316_cast_fp16)[name = string("query_states_19_cast_fp16")]; + tensor embed_19_cast_fp16 = transpose(perm = embed_19_perm_0, x = var_1298_cast_fp16)[name = string("transpose_5")]; + tensor var_1319_cast_fp16 = mul(x = embed_19_cast_fp16, y = cos_cast_fp16)[name = string("op_1319_cast_fp16")]; + tensor var_1320_split_sizes_0 = const()[name = string("op_1320_split_sizes_0"), val = tensor([32, 32])]; + int32 var_1320_axis_0 = const()[name = string("op_1320_axis_0"), val = int32(-1)]; + tensor var_1320_cast_fp16_0, tensor var_1320_cast_fp16_1 = split(axis = var_1320_axis_0, split_sizes = var_1320_split_sizes_0, x = embed_19_cast_fp16)[name = string("op_1320_cast_fp16")]; + fp16 const_48_promoted_to_fp16 = const()[name = string("const_48_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_1322_cast_fp16 = mul(x = var_1320_cast_fp16_1, y = const_48_promoted_to_fp16)[name = string("op_1322_cast_fp16")]; + int32 var_1324 = const()[name = string("op_1324"), val = int32(-1)]; + bool var_1325_interleave_0 = const()[name = string("op_1325_interleave_0"), val = bool(false)]; + tensor var_1325_cast_fp16 = concat(axis = var_1324, interleave = var_1325_interleave_0, values = (var_1322_cast_fp16, var_1320_cast_fp16_0))[name = string("op_1325_cast_fp16")]; + tensor var_1326_cast_fp16 = mul(x = var_1325_cast_fp16, y = sin_cast_fp16)[name = string("op_1326_cast_fp16")]; + tensor key_states_19_cast_fp16 = add(x = var_1319_cast_fp16, y = var_1326_cast_fp16)[name = string("key_states_19_cast_fp16")]; + tensor expand_dims_41 = const()[name = string("expand_dims_41"), val = tensor([4])]; + tensor expand_dims_42 = const()[name = string("expand_dims_42"), val = tensor([0])]; + tensor expand_dims_44 = const()[name = string("expand_dims_44"), val = tensor([5])]; + tensor concat_35_values3_0 = const()[name = string("concat_35_values3_0"), val = tensor([0])]; + int32 concat_35_axis_0 = const()[name = string("concat_35_axis_0"), val = int32(0)]; + bool concat_35_interleave_0 = const()[name = string("concat_35_interleave_0"), val = bool(false)]; + tensor concat_35 = concat(axis = concat_35_axis_0, interleave = concat_35_interleave_0, values = (expand_dims_41, expand_dims_42, position_id, concat_35_values3_0))[name = string("concat_35")]; + tensor concat_36_values1_0 = const()[name = string("concat_36_values1_0"), val = tensor([0])]; + tensor concat_36_values3_0 = const()[name = string("concat_36_values3_0"), val = tensor([0])]; + int32 concat_36_axis_0 = const()[name = string("concat_36_axis_0"), val = int32(0)]; + bool concat_36_interleave_0 = const()[name = string("concat_36_interleave_0"), val = bool(false)]; + tensor concat_36 = concat(axis = concat_36_axis_0, interleave = concat_36_interleave_0, values = (expand_dims_44, concat_36_values1_0, var_246, concat_36_values3_0))[name = string("concat_36")]; + tensor key_cache_internal_tensor_assign_5_stride_0 = const()[name = string("key_cache_internal_tensor_assign_5_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor key_cache_internal_tensor_assign_5_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_5_begin_mask_0"), val = tensor([false, false, false, true])]; + tensor key_cache_internal_tensor_assign_5_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_5_end_mask_0"), val = tensor([false, true, false, true])]; + tensor key_cache_internal_tensor_assign_5_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_5_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor key_cache_internal_tensor_assign_5_cast_fp16 = slice_update(begin = concat_35, begin_mask = key_cache_internal_tensor_assign_5_begin_mask_0, end = concat_36, end_mask = key_cache_internal_tensor_assign_5_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_5_squeeze_mask_0, stride = key_cache_internal_tensor_assign_5_stride_0, update = key_states_19_cast_fp16, x = coreml_update_state_18)[name = string("key_cache_internal_tensor_assign_5_cast_fp16")]; + write_state(data = key_cache_internal_tensor_assign_5_cast_fp16, input = key_cache)[name = string("coreml_update_state_8_write_state")]; + tensor coreml_update_state_20 = read_state(input = key_cache)[name = string("coreml_update_state_8")]; + tensor value_cache_internal_tensor_assign_5_stride_0 = const()[name = string("value_cache_internal_tensor_assign_5_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor value_cache_internal_tensor_assign_5_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_5_begin_mask_0"), val = tensor([false, false, false, true])]; + tensor value_cache_internal_tensor_assign_5_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_5_end_mask_0"), val = tensor([false, true, false, true])]; + tensor value_cache_internal_tensor_assign_5_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_5_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor value_states_19_cast_fp16 = transpose(perm = value_states_19_perm_0, x = var_1305_cast_fp16)[name = string("transpose_4")]; + tensor value_cache_internal_tensor_assign_5_cast_fp16 = slice_update(begin = concat_35, begin_mask = value_cache_internal_tensor_assign_5_begin_mask_0, end = concat_36, end_mask = value_cache_internal_tensor_assign_5_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_5_squeeze_mask_0, stride = value_cache_internal_tensor_assign_5_stride_0, update = value_states_19_cast_fp16, x = coreml_update_state_19)[name = string("value_cache_internal_tensor_assign_5_cast_fp16")]; + write_state(data = value_cache_internal_tensor_assign_5_cast_fp16, input = value_cache)[name = string("coreml_update_state_9_write_state")]; + tensor coreml_update_state_21 = read_state(input = value_cache)[name = string("coreml_update_state_9")]; + tensor var_1369_begin_0 = const()[name = string("op_1369_begin_0"), val = tensor([4, 0, 0, 0])]; + tensor var_1369_end_0 = const()[name = string("op_1369_end_0"), val = tensor([5, 2, 2048, 64])]; + tensor var_1369_end_mask_0 = const()[name = string("op_1369_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_1369_cast_fp16 = slice_by_index(begin = var_1369_begin_0, end = var_1369_end_0, end_mask = var_1369_end_mask_0, x = coreml_update_state_20)[name = string("op_1369_cast_fp16")]; + tensor tile_8 = const()[name = string("tile_8"), val = tensor([1, 1])]; + int32 var_1372_axis_0 = const()[name = string("op_1372_axis_0"), val = int32(1)]; + tensor var_1372_cast_fp16_0, tensor var_1372_cast_fp16_1 = split(axis = var_1372_axis_0, split_sizes = tile_8, x = var_1369_cast_fp16)[name = string("op_1372_cast_fp16")]; + tensor var_1379_begin_0 = const()[name = string("op_1379_begin_0"), val = tensor([4, 0, 0, 0])]; + tensor var_1379_end_0 = const()[name = string("op_1379_end_0"), val = tensor([5, 2, 2048, 64])]; + tensor var_1379_end_mask_0 = const()[name = string("op_1379_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_1379_cast_fp16 = slice_by_index(begin = var_1379_begin_0, end = var_1379_end_0, end_mask = var_1379_end_mask_0, x = coreml_update_state_21)[name = string("op_1379_cast_fp16")]; + tensor tile_9 = const()[name = string("tile_9"), val = tensor([1, 1])]; + int32 var_1382_axis_0 = const()[name = string("op_1382_axis_0"), val = int32(1)]; + tensor var_1382_cast_fp16_0, tensor var_1382_cast_fp16_1 = split(axis = var_1382_axis_0, split_sizes = tile_9, x = var_1379_cast_fp16)[name = string("op_1382_cast_fp16")]; + tensor var_1385_split_sizes_0 = const()[name = string("op_1385_split_sizes_0"), val = tensor([8, 8])]; + int32 var_1385_axis_0 = const()[name = string("op_1385_axis_0"), val = int32(1)]; + tensor var_1385_cast_fp16_0, tensor var_1385_cast_fp16_1 = split(axis = var_1385_axis_0, split_sizes = var_1385_split_sizes_0, x = query_states_19_cast_fp16)[name = string("op_1385_cast_fp16")]; + bool attn_weights_65_transpose_x_0 = const()[name = string("attn_weights_65_transpose_x_0"), val = bool(false)]; + bool attn_weights_65_transpose_y_0 = const()[name = string("attn_weights_65_transpose_y_0"), val = bool(false)]; + tensor attn_weights_65_cast_fp16 = matmul(transpose_x = attn_weights_65_transpose_x_0, transpose_y = attn_weights_65_transpose_y_0, x = var_1372_cast_fp16_0, y = var_1385_cast_fp16_0)[name = string("attn_weights_65_cast_fp16")]; + fp16 _inversed_attn_weights_67_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_67_y_0_to_fp16"), val = fp16(0x1p-3)]; + tensor _inversed_attn_weights_67_cast_fp16 = mul(x = attn_weights_65_cast_fp16, y = _inversed_attn_weights_67_y_0_to_fp16)[name = string("_inversed_attn_weights_67_cast_fp16")]; + tensor attn_weights_69_cast_fp16 = add(x = _inversed_attn_weights_67_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_69_cast_fp16")]; + int32 var_1392 = const()[name = string("op_1392"), val = int32(2)]; + tensor attn_weights_71_cast_fp16 = softmax(axis = var_1392, x = attn_weights_69_cast_fp16)[name = string("attn_weights_71_cast_fp16")]; + bool var_1398_transpose_x_1 = const()[name = string("op_1398_transpose_x_1"), val = bool(true)]; + bool var_1398_transpose_y_1 = const()[name = string("op_1398_transpose_y_1"), val = bool(false)]; + tensor var_1398_cast_fp16 = matmul(transpose_x = var_1398_transpose_x_1, transpose_y = var_1398_transpose_y_1, x = attn_weights_71_cast_fp16, y = var_1382_cast_fp16_0)[name = string("op_1398_cast_fp16")]; + bool attn_weights_73_transpose_x_0 = const()[name = string("attn_weights_73_transpose_x_0"), val = bool(false)]; + bool attn_weights_73_transpose_y_0 = const()[name = string("attn_weights_73_transpose_y_0"), val = bool(false)]; + tensor attn_weights_73_cast_fp16 = matmul(transpose_x = attn_weights_73_transpose_x_0, transpose_y = attn_weights_73_transpose_y_0, x = var_1372_cast_fp16_1, y = var_1385_cast_fp16_1)[name = string("attn_weights_73_cast_fp16")]; + fp16 _inversed_attn_weights_75_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_75_y_0_to_fp16"), val = fp16(0x1p-3)]; + tensor _inversed_attn_weights_75_cast_fp16 = mul(x = attn_weights_73_cast_fp16, y = _inversed_attn_weights_75_y_0_to_fp16)[name = string("_inversed_attn_weights_75_cast_fp16")]; + tensor attn_weights_77_cast_fp16 = add(x = _inversed_attn_weights_75_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_77_cast_fp16")]; + int32 var_1404 = const()[name = string("op_1404"), val = int32(2)]; + tensor attn_weights_79_cast_fp16 = softmax(axis = var_1404, x = attn_weights_77_cast_fp16)[name = string("attn_weights_79_cast_fp16")]; + bool attn_output_25_transpose_x_1 = const()[name = string("attn_output_25_transpose_x_1"), val = bool(true)]; + bool attn_output_25_transpose_y_1 = const()[name = string("attn_output_25_transpose_y_1"), val = bool(false)]; + tensor attn_output_25_cast_fp16 = matmul(transpose_x = attn_output_25_transpose_x_1, transpose_y = attn_output_25_transpose_y_1, x = attn_weights_79_cast_fp16, y = var_1382_cast_fp16_1)[name = string("attn_output_25_cast_fp16")]; + int32 var_1412 = const()[name = string("op_1412"), val = int32(1)]; + bool attn_output_27_interleave_0 = const()[name = string("attn_output_27_interleave_0"), val = bool(false)]; + tensor attn_output_27_cast_fp16 = concat(axis = var_1412, interleave = attn_output_27_interleave_0, values = (var_1398_cast_fp16, attn_output_25_cast_fp16))[name = string("attn_output_27_cast_fp16")]; + tensor var_1416_perm_0 = const()[name = string("op_1416_perm_0"), val = tensor([0, 1, 3, 2])]; + tensor var_1421 = const()[name = string("op_1421"), val = tensor([1, 1024, 1, 128])]; + tensor var_1416_cast_fp16 = transpose(perm = var_1416_perm_0, x = attn_output_27_cast_fp16)[name = string("transpose_3")]; + tensor x_77_cast_fp16 = reshape(shape = var_1421, x = var_1416_cast_fp16)[name = string("x_77_cast_fp16")]; + string hidden_states_27_pad_type_0 = const()[name = string("hidden_states_27_pad_type_0"), val = string("valid")]; + tensor hidden_states_27_strides_0 = const()[name = string("hidden_states_27_strides_0"), val = tensor([1, 1])]; + tensor hidden_states_27_pad_0 = const()[name = string("hidden_states_27_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor hidden_states_27_dilations_0 = const()[name = string("hidden_states_27_dilations_0"), val = tensor([1, 1])]; + int32 hidden_states_27_groups_0 = const()[name = string("hidden_states_27_groups_0"), val = int32(1)]; + tensor var_1428_to_fp16 = const()[name = string("op_1428_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(130599744)))]; + tensor hidden_states_27_cast_fp16 = conv(dilations = hidden_states_27_dilations_0, groups = hidden_states_27_groups_0, pad = hidden_states_27_pad_0, pad_type = hidden_states_27_pad_type_0, strides = hidden_states_27_strides_0, weight = var_1428_to_fp16, x = x_77_cast_fp16)[name = string("hidden_states_27_cast_fp16")]; + tensor x_79_cast_fp16 = add(x = x_71_cast_fp16, y = hidden_states_27_cast_fp16)[name = string("x_79_cast_fp16")]; + int32 var_1440 = const()[name = string("op_1440"), val = int32(1)]; + fp16 const_53_promoted_to_fp16 = const()[name = string("const_53_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_1443_cast_fp16 = mul(x = x_79_cast_fp16, y = const_53_promoted_to_fp16)[name = string("op_1443_cast_fp16")]; + bool x_81_interleave_0 = const()[name = string("x_81_interleave_0"), val = bool(false)]; + tensor x_81_cast_fp16 = concat(axis = var_1440, interleave = x_81_interleave_0, values = (x_79_cast_fp16, var_1443_cast_fp16))[name = string("x_81_cast_fp16")]; + tensor out_55_axes_0 = const()[name = string("out_55_axes_0"), val = tensor([1])]; + fp16 var_1453_to_fp16 = const()[name = string("op_1453_to_fp16"), val = fp16(0x1.5p-17)]; + tensor out_55_cast_fp16 = layer_norm(axes = out_55_axes_0, epsilon = var_1453_to_fp16, x = x_81_cast_fp16)[name = string("out_55_cast_fp16")]; + tensor layer_layers_4_post_attention_layernorm_weight_to_fp16 = const()[name = string("layer_layers_4_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(132696960)))]; + tensor out_57_cast_fp16 = mul(x = out_55_cast_fp16, y = layer_layers_4_post_attention_layernorm_weight_to_fp16)[name = string("out_57_cast_fp16")]; + tensor var_1459_split_sizes_0 = const()[name = string("op_1459_split_sizes_0"), val = tensor([1024, 1024])]; + int32 var_1459_axis_0 = const()[name = string("op_1459_axis_0"), val = int32(1)]; + tensor var_1459_cast_fp16_0, tensor var_1459_cast_fp16_1 = split(axis = var_1459_axis_0, split_sizes = var_1459_split_sizes_0, x = out_57_cast_fp16)[name = string("op_1459_cast_fp16")]; + string input_9_pad_type_0 = const()[name = string("input_9_pad_type_0"), val = string("valid")]; + tensor input_9_strides_0 = const()[name = string("input_9_strides_0"), val = tensor([1, 1])]; + tensor input_9_pad_0 = const()[name = string("input_9_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor input_9_dilations_0 = const()[name = string("input_9_dilations_0"), val = tensor([1, 1])]; + int32 input_9_groups_0 = const()[name = string("input_9_groups_0"), val = int32(1)]; + tensor var_1464_to_fp16 = const()[name = string("op_1464_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(132701120)))]; + tensor input_9_cast_fp16 = conv(dilations = input_9_dilations_0, groups = input_9_groups_0, pad = input_9_pad_0, pad_type = input_9_pad_type_0, strides = input_9_strides_0, weight = var_1464_to_fp16, x = var_1459_cast_fp16_0)[name = string("input_9_cast_fp16")]; + tensor var_1475_cast_fp16 = silu(x = input_9_cast_fp16)[name = string("op_1475_cast_fp16")]; + string var_1480_pad_type_0 = const()[name = string("op_1480_pad_type_0"), val = string("valid")]; + tensor var_1480_strides_0 = const()[name = string("op_1480_strides_0"), val = tensor([1, 1])]; + tensor var_1480_pad_0 = const()[name = string("op_1480_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1480_dilations_0 = const()[name = string("op_1480_dilations_0"), val = tensor([1, 1])]; + int32 var_1480_groups_0 = const()[name = string("op_1480_groups_0"), val = int32(1)]; + tensor var_1463_to_fp16 = const()[name = string("op_1463_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(141089792)))]; + tensor var_1480_cast_fp16 = conv(dilations = var_1480_dilations_0, groups = var_1480_groups_0, pad = var_1480_pad_0, pad_type = var_1480_pad_type_0, strides = var_1480_strides_0, weight = var_1463_to_fp16, x = var_1459_cast_fp16_0)[name = string("op_1480_cast_fp16")]; + tensor x_87_cast_fp16 = mul(x = var_1475_cast_fp16, y = var_1480_cast_fp16)[name = string("x_87_cast_fp16")]; + string hidden_states_29_pad_type_0 = const()[name = string("hidden_states_29_pad_type_0"), val = string("valid")]; + tensor hidden_states_29_strides_0 = const()[name = string("hidden_states_29_strides_0"), val = tensor([1, 1])]; + tensor hidden_states_29_pad_0 = const()[name = string("hidden_states_29_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor hidden_states_29_dilations_0 = const()[name = string("hidden_states_29_dilations_0"), val = tensor([1, 1])]; + int32 hidden_states_29_groups_0 = const()[name = string("hidden_states_29_groups_0"), val = int32(1)]; + tensor var_1462_to_fp16 = const()[name = string("op_1462_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(149478464)))]; + tensor hidden_states_29_cast_fp16 = conv(dilations = hidden_states_29_dilations_0, groups = hidden_states_29_groups_0, pad = hidden_states_29_pad_0, pad_type = hidden_states_29_pad_type_0, strides = hidden_states_29_strides_0, weight = var_1462_to_fp16, x = x_87_cast_fp16)[name = string("hidden_states_29_cast_fp16")]; + tensor x_89_cast_fp16 = add(x = x_79_cast_fp16, y = hidden_states_29_cast_fp16)[name = string("x_89_cast_fp16")]; + int32 var_1493 = const()[name = string("op_1493"), val = int32(1)]; + fp16 const_54_promoted_to_fp16 = const()[name = string("const_54_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_1496_cast_fp16 = mul(x = x_89_cast_fp16, y = const_54_promoted_to_fp16)[name = string("op_1496_cast_fp16")]; + bool x_91_interleave_0 = const()[name = string("x_91_interleave_0"), val = bool(false)]; + tensor x_91_cast_fp16 = concat(axis = var_1493, interleave = x_91_interleave_0, values = (x_89_cast_fp16, var_1496_cast_fp16))[name = string("x_91_cast_fp16")]; + tensor out_61_axes_0 = const()[name = string("out_61_axes_0"), val = tensor([1])]; + fp16 var_1506_to_fp16 = const()[name = string("op_1506_to_fp16"), val = fp16(0x1.5p-17)]; + tensor out_61_cast_fp16 = layer_norm(axes = out_61_axes_0, epsilon = var_1506_to_fp16, x = x_91_cast_fp16)[name = string("out_61_cast_fp16")]; + tensor layer_layers_5_input_layernorm_weight_to_fp16 = const()[name = string("layer_layers_5_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(157867136)))]; + tensor out_63_cast_fp16 = mul(x = out_61_cast_fp16, y = layer_layers_5_input_layernorm_weight_to_fp16)[name = string("out_63_cast_fp16")]; + tensor var_1512_split_sizes_0 = const()[name = string("op_1512_split_sizes_0"), val = tensor([1024, 1024])]; + int32 var_1512_axis_0 = const()[name = string("op_1512_axis_0"), val = int32(1)]; + tensor var_1512_cast_fp16_0, tensor var_1512_cast_fp16_1 = split(axis = var_1512_axis_0, split_sizes = var_1512_split_sizes_0, x = out_63_cast_fp16)[name = string("op_1512_cast_fp16")]; + string query_states_21_pad_type_0 = const()[name = string("query_states_21_pad_type_0"), val = string("valid")]; + tensor query_states_21_strides_0 = const()[name = string("query_states_21_strides_0"), val = tensor([1, 1])]; + tensor query_states_21_pad_0 = const()[name = string("query_states_21_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor query_states_21_dilations_0 = const()[name = string("query_states_21_dilations_0"), val = tensor([1, 1])]; + int32 query_states_21_groups_0 = const()[name = string("query_states_21_groups_0"), val = int32(1)]; + tensor var_1534_to_fp16 = const()[name = string("op_1534_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(157871296)))]; + tensor query_states_21_cast_fp16 = conv(dilations = query_states_21_dilations_0, groups = query_states_21_groups_0, pad = query_states_21_pad_0, pad_type = query_states_21_pad_type_0, strides = query_states_21_strides_0, weight = var_1534_to_fp16, x = var_1512_cast_fp16_0)[name = string("query_states_21_cast_fp16")]; + string key_states_21_pad_type_0 = const()[name = string("key_states_21_pad_type_0"), val = string("valid")]; + tensor key_states_21_strides_0 = const()[name = string("key_states_21_strides_0"), val = tensor([1, 1])]; + tensor key_states_21_pad_0 = const()[name = string("key_states_21_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor key_states_21_dilations_0 = const()[name = string("key_states_21_dilations_0"), val = tensor([1, 1])]; + int32 key_states_21_groups_0 = const()[name = string("key_states_21_groups_0"), val = int32(1)]; + tensor var_1545_to_fp16 = const()[name = string("op_1545_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(159968512)))]; + tensor key_states_21_cast_fp16 = conv(dilations = key_states_21_dilations_0, groups = key_states_21_groups_0, pad = key_states_21_pad_0, pad_type = key_states_21_pad_type_0, strides = key_states_21_strides_0, weight = var_1545_to_fp16, x = var_1512_cast_fp16_0)[name = string("key_states_21_cast_fp16")]; + string value_states_21_pad_type_0 = const()[name = string("value_states_21_pad_type_0"), val = string("valid")]; + tensor value_states_21_strides_0 = const()[name = string("value_states_21_strides_0"), val = tensor([1, 1])]; + tensor value_states_21_pad_0 = const()[name = string("value_states_21_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor value_states_21_dilations_0 = const()[name = string("value_states_21_dilations_0"), val = tensor([1, 1])]; + int32 value_states_21_groups_0 = const()[name = string("value_states_21_groups_0"), val = int32(1)]; + tensor var_1556_to_fp16 = const()[name = string("op_1556_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(160230720)))]; + tensor value_states_21_cast_fp16 = conv(dilations = value_states_21_dilations_0, groups = value_states_21_groups_0, pad = value_states_21_pad_0, pad_type = value_states_21_pad_type_0, strides = value_states_21_strides_0, weight = var_1556_to_fp16, x = var_1512_cast_fp16_0)[name = string("value_states_21_cast_fp16")]; + tensor var_1564 = const()[name = string("op_1564"), val = tensor([1, 16, 64, 128])]; + tensor embed_21_cast_fp16 = reshape(shape = var_1564, x = query_states_21_cast_fp16)[name = string("embed_21_cast_fp16")]; + tensor var_1568 = const()[name = string("op_1568"), val = tensor([1, 2, 64, 128])]; + tensor var_1569_cast_fp16 = reshape(shape = var_1568, x = key_states_21_cast_fp16)[name = string("op_1569_cast_fp16")]; + tensor embed_perm_0 = const()[name = string("embed_perm_0"), val = tensor([0, 1, 3, 2])]; + tensor var_1575 = const()[name = string("op_1575"), val = tensor([1, 2, 64, 128])]; + tensor var_1576_cast_fp16 = reshape(shape = var_1575, x = value_states_21_cast_fp16)[name = string("op_1576_cast_fp16")]; + tensor value_states_perm_0 = const()[name = string("value_states_perm_0"), val = tensor([0, 1, 3, 2])]; + tensor var_1580_cast_fp16 = mul(x = embed_21_cast_fp16, y = cos_1_cast_fp16)[name = string("op_1580_cast_fp16")]; + tensor var_1581_split_sizes_0 = const()[name = string("op_1581_split_sizes_0"), val = tensor([32, 32])]; + int32 var_1581_axis_0 = const()[name = string("op_1581_axis_0"), val = int32(-2)]; + tensor var_1581_cast_fp16_0, tensor var_1581_cast_fp16_1 = split(axis = var_1581_axis_0, split_sizes = var_1581_split_sizes_0, x = embed_21_cast_fp16)[name = string("op_1581_cast_fp16")]; + fp16 const_57_promoted_to_fp16 = const()[name = string("const_57_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_1583_cast_fp16 = mul(x = var_1581_cast_fp16_1, y = const_57_promoted_to_fp16)[name = string("op_1583_cast_fp16")]; + int32 var_1585 = const()[name = string("op_1585"), val = int32(-2)]; + bool var_1586_interleave_0 = const()[name = string("op_1586_interleave_0"), val = bool(false)]; + tensor var_1586_cast_fp16 = concat(axis = var_1585, interleave = var_1586_interleave_0, values = (var_1583_cast_fp16, var_1581_cast_fp16_0))[name = string("op_1586_cast_fp16")]; + tensor var_1587_cast_fp16 = mul(x = var_1586_cast_fp16, y = sin_1_cast_fp16)[name = string("op_1587_cast_fp16")]; + tensor query_states_cast_fp16 = add(x = var_1580_cast_fp16, y = var_1587_cast_fp16)[name = string("query_states_cast_fp16")]; + tensor embed_cast_fp16 = transpose(perm = embed_perm_0, x = var_1569_cast_fp16)[name = string("transpose_2")]; + tensor var_1590_cast_fp16 = mul(x = embed_cast_fp16, y = cos_cast_fp16)[name = string("op_1590_cast_fp16")]; + tensor var_1591_split_sizes_0 = const()[name = string("op_1591_split_sizes_0"), val = tensor([32, 32])]; + int32 var_1591_axis_0 = const()[name = string("op_1591_axis_0"), val = int32(-1)]; + tensor var_1591_cast_fp16_0, tensor var_1591_cast_fp16_1 = split(axis = var_1591_axis_0, split_sizes = var_1591_split_sizes_0, x = embed_cast_fp16)[name = string("op_1591_cast_fp16")]; + fp16 const_58_promoted_to_fp16 = const()[name = string("const_58_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_1593_cast_fp16 = mul(x = var_1591_cast_fp16_1, y = const_58_promoted_to_fp16)[name = string("op_1593_cast_fp16")]; + int32 var_1595 = const()[name = string("op_1595"), val = int32(-1)]; + bool var_1596_interleave_0 = const()[name = string("op_1596_interleave_0"), val = bool(false)]; + tensor var_1596_cast_fp16 = concat(axis = var_1595, interleave = var_1596_interleave_0, values = (var_1593_cast_fp16, var_1591_cast_fp16_0))[name = string("op_1596_cast_fp16")]; + tensor var_1597_cast_fp16 = mul(x = var_1596_cast_fp16, y = sin_cast_fp16)[name = string("op_1597_cast_fp16")]; + tensor key_states_cast_fp16 = add(x = var_1590_cast_fp16, y = var_1597_cast_fp16)[name = string("key_states_cast_fp16")]; + tensor expand_dims_51 = const()[name = string("expand_dims_51"), val = tensor([5])]; + tensor expand_dims_52 = const()[name = string("expand_dims_52"), val = tensor([0])]; + tensor expand_dims_54 = const()[name = string("expand_dims_54"), val = tensor([6])]; + tensor concat_43_values3_0 = const()[name = string("concat_43_values3_0"), val = tensor([0])]; + int32 concat_43_axis_0 = const()[name = string("concat_43_axis_0"), val = int32(0)]; + bool concat_43_interleave_0 = const()[name = string("concat_43_interleave_0"), val = bool(false)]; + tensor concat_43 = concat(axis = concat_43_axis_0, interleave = concat_43_interleave_0, values = (expand_dims_51, expand_dims_52, position_id, concat_43_values3_0))[name = string("concat_43")]; + tensor concat_44_values1_0 = const()[name = string("concat_44_values1_0"), val = tensor([0])]; + tensor concat_44_values3_0 = const()[name = string("concat_44_values3_0"), val = tensor([0])]; + int32 concat_44_axis_0 = const()[name = string("concat_44_axis_0"), val = int32(0)]; + bool concat_44_interleave_0 = const()[name = string("concat_44_interleave_0"), val = bool(false)]; + tensor concat_44 = concat(axis = concat_44_axis_0, interleave = concat_44_interleave_0, values = (expand_dims_54, concat_44_values1_0, var_246, concat_44_values3_0))[name = string("concat_44")]; + tensor key_cache_internal_tensor_assign_6_stride_0 = const()[name = string("key_cache_internal_tensor_assign_6_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor key_cache_internal_tensor_assign_6_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_6_begin_mask_0"), val = tensor([false, false, false, true])]; + tensor key_cache_internal_tensor_assign_6_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_6_end_mask_0"), val = tensor([false, true, false, true])]; + tensor key_cache_internal_tensor_assign_6_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_6_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor key_cache_internal_tensor_assign_6_cast_fp16 = slice_update(begin = concat_43, begin_mask = key_cache_internal_tensor_assign_6_begin_mask_0, end = concat_44, end_mask = key_cache_internal_tensor_assign_6_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_6_squeeze_mask_0, stride = key_cache_internal_tensor_assign_6_stride_0, update = key_states_cast_fp16, x = coreml_update_state_20)[name = string("key_cache_internal_tensor_assign_6_cast_fp16")]; + write_state(data = key_cache_internal_tensor_assign_6_cast_fp16, input = key_cache)[name = string("coreml_update_state_10_write_state")]; + tensor coreml_update_state_22 = read_state(input = key_cache)[name = string("coreml_update_state_10")]; + tensor value_cache_internal_tensor_assign_6_stride_0 = const()[name = string("value_cache_internal_tensor_assign_6_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor value_cache_internal_tensor_assign_6_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_6_begin_mask_0"), val = tensor([false, false, false, true])]; + tensor value_cache_internal_tensor_assign_6_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_6_end_mask_0"), val = tensor([false, true, false, true])]; + tensor value_cache_internal_tensor_assign_6_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_6_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor value_states_cast_fp16 = transpose(perm = value_states_perm_0, x = var_1576_cast_fp16)[name = string("transpose_1")]; + tensor value_cache_internal_tensor_assign_6_cast_fp16 = slice_update(begin = concat_43, begin_mask = value_cache_internal_tensor_assign_6_begin_mask_0, end = concat_44, end_mask = value_cache_internal_tensor_assign_6_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_6_squeeze_mask_0, stride = value_cache_internal_tensor_assign_6_stride_0, update = value_states_cast_fp16, x = coreml_update_state_21)[name = string("value_cache_internal_tensor_assign_6_cast_fp16")]; + write_state(data = value_cache_internal_tensor_assign_6_cast_fp16, input = value_cache)[name = string("coreml_update_state_11_write_state")]; + tensor coreml_update_state_23 = read_state(input = value_cache)[name = string("coreml_update_state_11")]; + tensor var_1640_begin_0 = const()[name = string("op_1640_begin_0"), val = tensor([5, 0, 0, 0])]; + tensor var_1640_end_0 = const()[name = string("op_1640_end_0"), val = tensor([1, 2, 2048, 64])]; + tensor var_1640_end_mask_0 = const()[name = string("op_1640_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_1640_cast_fp16 = slice_by_index(begin = var_1640_begin_0, end = var_1640_end_0, end_mask = var_1640_end_mask_0, x = coreml_update_state_22)[name = string("op_1640_cast_fp16")]; + tensor tile_10 = const()[name = string("tile_10"), val = tensor([1, 1])]; + int32 var_1643_axis_0 = const()[name = string("op_1643_axis_0"), val = int32(1)]; + tensor var_1643_cast_fp16_0, tensor var_1643_cast_fp16_1 = split(axis = var_1643_axis_0, split_sizes = tile_10, x = var_1640_cast_fp16)[name = string("op_1643_cast_fp16")]; + tensor var_1650_begin_0 = const()[name = string("op_1650_begin_0"), val = tensor([5, 0, 0, 0])]; + tensor var_1650_end_0 = const()[name = string("op_1650_end_0"), val = tensor([1, 2, 2048, 64])]; + tensor var_1650_end_mask_0 = const()[name = string("op_1650_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_1650_cast_fp16 = slice_by_index(begin = var_1650_begin_0, end = var_1650_end_0, end_mask = var_1650_end_mask_0, x = coreml_update_state_23)[name = string("op_1650_cast_fp16")]; + tensor tile_11 = const()[name = string("tile_11"), val = tensor([1, 1])]; + int32 var_1653_axis_0 = const()[name = string("op_1653_axis_0"), val = int32(1)]; + tensor var_1653_cast_fp16_0, tensor var_1653_cast_fp16_1 = split(axis = var_1653_axis_0, split_sizes = tile_11, x = var_1650_cast_fp16)[name = string("op_1653_cast_fp16")]; + tensor var_1656_split_sizes_0 = const()[name = string("op_1656_split_sizes_0"), val = tensor([8, 8])]; + int32 var_1656_axis_0 = const()[name = string("op_1656_axis_0"), val = int32(1)]; + tensor var_1656_cast_fp16_0, tensor var_1656_cast_fp16_1 = split(axis = var_1656_axis_0, split_sizes = var_1656_split_sizes_0, x = query_states_cast_fp16)[name = string("op_1656_cast_fp16")]; + bool attn_weights_81_transpose_x_0 = const()[name = string("attn_weights_81_transpose_x_0"), val = bool(false)]; + bool attn_weights_81_transpose_y_0 = const()[name = string("attn_weights_81_transpose_y_0"), val = bool(false)]; + tensor attn_weights_81_cast_fp16 = matmul(transpose_x = attn_weights_81_transpose_x_0, transpose_y = attn_weights_81_transpose_y_0, x = var_1643_cast_fp16_0, y = var_1656_cast_fp16_0)[name = string("attn_weights_81_cast_fp16")]; + fp16 _inversed_attn_weights_83_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_83_y_0_to_fp16"), val = fp16(0x1p-3)]; + tensor _inversed_attn_weights_83_cast_fp16 = mul(x = attn_weights_81_cast_fp16, y = _inversed_attn_weights_83_y_0_to_fp16)[name = string("_inversed_attn_weights_83_cast_fp16")]; + tensor attn_weights_85_cast_fp16 = add(x = _inversed_attn_weights_83_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_85_cast_fp16")]; + int32 var_1663 = const()[name = string("op_1663"), val = int32(2)]; + tensor attn_weights_87_cast_fp16 = softmax(axis = var_1663, x = attn_weights_85_cast_fp16)[name = string("attn_weights_87_cast_fp16")]; + bool var_1669_transpose_x_1 = const()[name = string("op_1669_transpose_x_1"), val = bool(true)]; + bool var_1669_transpose_y_1 = const()[name = string("op_1669_transpose_y_1"), val = bool(false)]; + tensor var_1669_cast_fp16 = matmul(transpose_x = var_1669_transpose_x_1, transpose_y = var_1669_transpose_y_1, x = attn_weights_87_cast_fp16, y = var_1653_cast_fp16_0)[name = string("op_1669_cast_fp16")]; + bool attn_weights_89_transpose_x_0 = const()[name = string("attn_weights_89_transpose_x_0"), val = bool(false)]; + bool attn_weights_89_transpose_y_0 = const()[name = string("attn_weights_89_transpose_y_0"), val = bool(false)]; + tensor attn_weights_89_cast_fp16 = matmul(transpose_x = attn_weights_89_transpose_x_0, transpose_y = attn_weights_89_transpose_y_0, x = var_1643_cast_fp16_1, y = var_1656_cast_fp16_1)[name = string("attn_weights_89_cast_fp16")]; + fp16 _inversed_attn_weights_91_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_91_y_0_to_fp16"), val = fp16(0x1p-3)]; + tensor _inversed_attn_weights_91_cast_fp16 = mul(x = attn_weights_89_cast_fp16, y = _inversed_attn_weights_91_y_0_to_fp16)[name = string("_inversed_attn_weights_91_cast_fp16")]; + tensor attn_weights_93_cast_fp16 = add(x = _inversed_attn_weights_91_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_93_cast_fp16")]; + int32 var_1675 = const()[name = string("op_1675"), val = int32(2)]; + tensor attn_weights_cast_fp16 = softmax(axis = var_1675, x = attn_weights_93_cast_fp16)[name = string("attn_weights_cast_fp16")]; + bool attn_output_31_transpose_x_1 = const()[name = string("attn_output_31_transpose_x_1"), val = bool(true)]; + bool attn_output_31_transpose_y_1 = const()[name = string("attn_output_31_transpose_y_1"), val = bool(false)]; + tensor attn_output_31_cast_fp16 = matmul(transpose_x = attn_output_31_transpose_x_1, transpose_y = attn_output_31_transpose_y_1, x = attn_weights_cast_fp16, y = var_1653_cast_fp16_1)[name = string("attn_output_31_cast_fp16")]; + int32 var_1683 = const()[name = string("op_1683"), val = int32(1)]; + bool attn_output_33_interleave_0 = const()[name = string("attn_output_33_interleave_0"), val = bool(false)]; + tensor attn_output_33_cast_fp16 = concat(axis = var_1683, interleave = attn_output_33_interleave_0, values = (var_1669_cast_fp16, attn_output_31_cast_fp16))[name = string("attn_output_33_cast_fp16")]; + tensor var_1687_perm_0 = const()[name = string("op_1687_perm_0"), val = tensor([0, 1, 3, 2])]; + tensor var_1692 = const()[name = string("op_1692"), val = tensor([1, 1024, 1, 128])]; + tensor var_1687_cast_fp16 = transpose(perm = var_1687_perm_0, x = attn_output_33_cast_fp16)[name = string("transpose_0")]; + tensor x_95_cast_fp16 = reshape(shape = var_1692, x = var_1687_cast_fp16)[name = string("x_95_cast_fp16")]; + string hidden_states_33_pad_type_0 = const()[name = string("hidden_states_33_pad_type_0"), val = string("valid")]; + tensor hidden_states_33_strides_0 = const()[name = string("hidden_states_33_strides_0"), val = tensor([1, 1])]; + tensor hidden_states_33_pad_0 = const()[name = string("hidden_states_33_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor hidden_states_33_dilations_0 = const()[name = string("hidden_states_33_dilations_0"), val = tensor([1, 1])]; + int32 hidden_states_33_groups_0 = const()[name = string("hidden_states_33_groups_0"), val = int32(1)]; + tensor var_1699_to_fp16 = const()[name = string("op_1699_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(160492928)))]; + tensor hidden_states_33_cast_fp16 = conv(dilations = hidden_states_33_dilations_0, groups = hidden_states_33_groups_0, pad = hidden_states_33_pad_0, pad_type = hidden_states_33_pad_type_0, strides = hidden_states_33_strides_0, weight = var_1699_to_fp16, x = x_95_cast_fp16)[name = string("hidden_states_33_cast_fp16")]; + tensor x_97_cast_fp16 = add(x = x_89_cast_fp16, y = hidden_states_33_cast_fp16)[name = string("x_97_cast_fp16")]; + int32 var_1711 = const()[name = string("op_1711"), val = int32(1)]; + fp16 const_63_promoted_to_fp16 = const()[name = string("const_63_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_1714_cast_fp16 = mul(x = x_97_cast_fp16, y = const_63_promoted_to_fp16)[name = string("op_1714_cast_fp16")]; + bool x_99_interleave_0 = const()[name = string("x_99_interleave_0"), val = bool(false)]; + tensor x_99_cast_fp16 = concat(axis = var_1711, interleave = x_99_interleave_0, values = (x_97_cast_fp16, var_1714_cast_fp16))[name = string("x_99_cast_fp16")]; + tensor out_67_axes_0 = const()[name = string("out_67_axes_0"), val = tensor([1])]; + fp16 var_1724_to_fp16 = const()[name = string("op_1724_to_fp16"), val = fp16(0x1.5p-17)]; + tensor out_67_cast_fp16 = layer_norm(axes = out_67_axes_0, epsilon = var_1724_to_fp16, x = x_99_cast_fp16)[name = string("out_67_cast_fp16")]; + tensor layer_layers_5_post_attention_layernorm_weight_to_fp16 = const()[name = string("layer_layers_5_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(162590144)))]; + tensor out_69_cast_fp16 = mul(x = out_67_cast_fp16, y = layer_layers_5_post_attention_layernorm_weight_to_fp16)[name = string("out_69_cast_fp16")]; + tensor var_1730_split_sizes_0 = const()[name = string("op_1730_split_sizes_0"), val = tensor([1024, 1024])]; + int32 var_1730_axis_0 = const()[name = string("op_1730_axis_0"), val = int32(1)]; + tensor var_1730_cast_fp16_0, tensor var_1730_cast_fp16_1 = split(axis = var_1730_axis_0, split_sizes = var_1730_split_sizes_0, x = out_69_cast_fp16)[name = string("op_1730_cast_fp16")]; + string input_pad_type_0 = const()[name = string("input_pad_type_0"), val = string("valid")]; + tensor input_strides_0 = const()[name = string("input_strides_0"), val = tensor([1, 1])]; + tensor input_pad_0 = const()[name = string("input_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor input_dilations_0 = const()[name = string("input_dilations_0"), val = tensor([1, 1])]; + int32 input_groups_0 = const()[name = string("input_groups_0"), val = int32(1)]; + tensor var_1735_to_fp16 = const()[name = string("op_1735_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(162594304)))]; + tensor input_cast_fp16 = conv(dilations = input_dilations_0, groups = input_groups_0, pad = input_pad_0, pad_type = input_pad_type_0, strides = input_strides_0, weight = var_1735_to_fp16, x = var_1730_cast_fp16_0)[name = string("input_cast_fp16")]; + tensor var_1746_cast_fp16 = silu(x = input_cast_fp16)[name = string("op_1746_cast_fp16")]; + string var_1751_pad_type_0 = const()[name = string("op_1751_pad_type_0"), val = string("valid")]; + tensor var_1751_strides_0 = const()[name = string("op_1751_strides_0"), val = tensor([1, 1])]; + tensor var_1751_pad_0 = const()[name = string("op_1751_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1751_dilations_0 = const()[name = string("op_1751_dilations_0"), val = tensor([1, 1])]; + int32 var_1751_groups_0 = const()[name = string("op_1751_groups_0"), val = int32(1)]; + tensor var_1734_to_fp16 = const()[name = string("op_1734_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(170982976)))]; + tensor var_1751_cast_fp16 = conv(dilations = var_1751_dilations_0, groups = var_1751_groups_0, pad = var_1751_pad_0, pad_type = var_1751_pad_type_0, strides = var_1751_strides_0, weight = var_1734_to_fp16, x = var_1730_cast_fp16_0)[name = string("op_1751_cast_fp16")]; + tensor x_105_cast_fp16 = mul(x = var_1746_cast_fp16, y = var_1751_cast_fp16)[name = string("x_105_cast_fp16")]; + string hidden_states_pad_type_0 = const()[name = string("hidden_states_pad_type_0"), val = string("valid")]; + tensor hidden_states_strides_0 = const()[name = string("hidden_states_strides_0"), val = tensor([1, 1])]; + tensor hidden_states_pad_0 = const()[name = string("hidden_states_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor hidden_states_dilations_0 = const()[name = string("hidden_states_dilations_0"), val = tensor([1, 1])]; + int32 hidden_states_groups_0 = const()[name = string("hidden_states_groups_0"), val = int32(1)]; + tensor var_1733_to_fp16 = const()[name = string("op_1733_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(179371648)))]; + tensor hidden_states_cast_fp16 = conv(dilations = hidden_states_dilations_0, groups = hidden_states_groups_0, pad = hidden_states_pad_0, pad_type = hidden_states_pad_type_0, strides = hidden_states_strides_0, weight = var_1733_to_fp16, x = x_105_cast_fp16)[name = string("hidden_states_cast_fp16")]; + tensor x_107_cast_fp16 = add(x = x_97_cast_fp16, y = hidden_states_cast_fp16)[name = string("x_107_cast_fp16")]; + int32 var_1764 = const()[name = string("op_1764"), val = int32(1)]; + fp16 const_64_promoted_to_fp16 = const()[name = string("const_64_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_1767_cast_fp16 = mul(x = x_107_cast_fp16, y = const_64_promoted_to_fp16)[name = string("op_1767_cast_fp16")]; + bool x_109_interleave_0 = const()[name = string("x_109_interleave_0"), val = bool(false)]; + tensor x_109_cast_fp16 = concat(axis = var_1764, interleave = x_109_interleave_0, values = (x_107_cast_fp16, var_1767_cast_fp16))[name = string("x_109_cast_fp16")]; + tensor out_73_axes_0 = const()[name = string("out_73_axes_0"), val = tensor([1])]; + fp16 var_1777_to_fp16 = const()[name = string("op_1777_to_fp16"), val = fp16(0x1.5p-17)]; + tensor out_73_cast_fp16 = layer_norm(axes = out_73_axes_0, epsilon = var_1777_to_fp16, x = x_109_cast_fp16)[name = string("out_73_cast_fp16")]; + tensor layer_norm_weight_to_fp16 = const()[name = string("layer_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(187760320)))]; + tensor out_75_cast_fp16 = mul(x = out_73_cast_fp16, y = layer_norm_weight_to_fp16)[name = string("out_75_cast_fp16")]; + tensor var_1783_split_sizes_0 = const()[name = string("op_1783_split_sizes_0"), val = tensor([1024, 1024])]; + int32 var_1783_axis_0 = const()[name = string("op_1783_axis_0"), val = int32(1)]; + tensor output, tensor var_1783_cast_fp16_1 = split(axis = var_1783_axis_0, split_sizes = var_1783_split_sizes_0, x = out_75_cast_fp16)[name = string("op_1783_cast_fp16")]; + } -> (output); + func length_16(tensor inputs_embeds, state> key_cache, tensor position_id, state> value_cache) { + tensor var_80 = const()[name = string("op_80"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(188322240)))]; + tensor position_ids_1 = add(x = var_80, y = position_id)[name = string("position_ids_1")]; + int32 cos_batch_dims_0 = const()[name = string("cos_batch_dims_0"), val = int32(0)]; + bool cos_validate_indices_0 = const()[name = string("cos_validate_indices_0"), val = bool(false)]; + tensor layer_rope_emb_cos_cached_to_fp16 = const()[name = string("layer_rope_emb_cos_cached_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64)))]; + int32 greater_equal_0_y_0 = const()[name = string("greater_equal_0_y_0"), val = int32(0)]; + tensor greater_equal_0 = greater_equal(x = position_ids_1, y = greater_equal_0_y_0)[name = string("greater_equal_0")]; + int32 slice_by_index_0 = const()[name = string("slice_by_index_0"), val = int32(32768)]; + tensor add_0 = add(x = position_ids_1, y = slice_by_index_0)[name = string("add_0")]; + tensor select_0 = select(a = position_ids_1, b = add_0, cond = greater_equal_0)[name = string("select_0")]; + int32 cos_cast_fp16_axis_0 = const()[name = string("cos_cast_fp16_axis_0"), val = int32(0)]; + tensor cos_cast_fp16 = gather(axis = cos_cast_fp16_axis_0, batch_dims = cos_batch_dims_0, indices = select_0, validate_indices = cos_validate_indices_0, x = layer_rope_emb_cos_cached_to_fp16)[name = string("cos_cast_fp16")]; + int32 sin_batch_dims_0 = const()[name = string("sin_batch_dims_0"), val = int32(0)]; + bool sin_validate_indices_0 = const()[name = string("sin_validate_indices_0"), val = bool(false)]; + tensor layer_rope_emb_sin_cached_to_fp16 = const()[name = string("layer_rope_emb_sin_cached_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(4194432)))]; + int32 sin_cast_fp16_axis_0 = const()[name = string("sin_cast_fp16_axis_0"), val = int32(0)]; + tensor sin_cast_fp16 = gather(axis = sin_cast_fp16_axis_0, batch_dims = sin_batch_dims_0, indices = select_0, validate_indices = sin_validate_indices_0, x = layer_rope_emb_sin_cached_to_fp16)[name = string("sin_cast_fp16")]; + tensor var_105 = const()[name = string("op_105"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(8388800)))]; + tensor var_112_axes_0 = const()[name = string("op_112_axes_0"), val = tensor([2])]; + tensor var_112 = expand_dims(axes = var_112_axes_0, x = position_ids_1)[name = string("op_112")]; + tensor var_113 = greater(x = var_105, y = var_112)[name = string("op_113")]; + tensor attention_mask_3_axes_0 = const()[name = string("attention_mask_3_axes_0"), val = tensor([1])]; + string attention_mask_1_to_fp16_dtype_0 = const()[name = string("attention_mask_1_to_fp16_dtype_0"), val = string("fp16")]; + tensor var_113_to_fp16 = cast(dtype = attention_mask_1_to_fp16_dtype_0, x = var_113)[name = string("cast_65")]; + tensor attention_mask_3_cast_fp16 = expand_dims(axes = attention_mask_3_axes_0, x = var_113_to_fp16)[name = string("attention_mask_3_cast_fp16")]; + fp16 var_121_promoted_to_fp16 = const()[name = string("op_121_promoted_to_fp16"), val = fp16(0x0p+0)]; + tensor var_122_cast_fp16 = equal(x = attention_mask_3_cast_fp16, y = var_121_promoted_to_fp16)[name = string("op_122_cast_fp16")]; + tensor var_123_after_broadcast_to_fp16 = const()[name = string("op_123_after_broadcast_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(188322368)))]; + tensor attention_mask_cast_fp16 = select(a = attention_mask_3_cast_fp16, b = var_123_after_broadcast_to_fp16, cond = var_122_cast_fp16)[name = string("attention_mask_cast_fp16")]; + tensor cos_1_perm_0 = const()[name = string("cos_1_perm_0"), val = tensor([0, -1, -2])]; + tensor sin_1_perm_0 = const()[name = string("sin_1_perm_0"), val = tensor([0, -1, -2])]; + tensor attn_mask_perm_0 = const()[name = string("attn_mask_perm_0"), val = tensor([0, 1, -1, -2])]; + int32 var_138 = const()[name = string("op_138"), val = int32(1)]; + fp16 const_4_promoted_to_fp16 = const()[name = string("const_4_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_141_cast_fp16 = mul(x = inputs_embeds, y = const_4_promoted_to_fp16)[name = string("op_141_cast_fp16")]; + bool x_1_interleave_0 = const()[name = string("x_1_interleave_0"), val = bool(false)]; + tensor x_1_cast_fp16 = concat(axis = var_138, interleave = x_1_interleave_0, values = (inputs_embeds, var_141_cast_fp16))[name = string("x_1_cast_fp16")]; + tensor out_1_axes_0 = const()[name = string("out_1_axes_0"), val = tensor([1])]; + fp16 var_151_to_fp16 = const()[name = string("op_151_to_fp16"), val = fp16(0x1.5p-17)]; + tensor out_1_cast_fp16 = layer_norm(axes = out_1_axes_0, epsilon = var_151_to_fp16, x = x_1_cast_fp16)[name = string("out_1_cast_fp16")]; + tensor layer_layers_0_input_layernorm_weight_to_fp16 = const()[name = string("layer_layers_0_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(8401216)))]; + tensor out_3_cast_fp16 = mul(x = out_1_cast_fp16, y = layer_layers_0_input_layernorm_weight_to_fp16)[name = string("out_3_cast_fp16")]; + tensor var_157_split_sizes_0 = const()[name = string("op_157_split_sizes_0"), val = tensor([1024, 1024])]; + int32 var_157_axis_0 = const()[name = string("op_157_axis_0"), val = int32(1)]; + tensor var_157_cast_fp16_0, tensor var_157_cast_fp16_1 = split(axis = var_157_axis_0, split_sizes = var_157_split_sizes_0, x = out_3_cast_fp16)[name = string("op_157_cast_fp16")]; + tensor q_len_1 = const()[name = string("q_len_1"), val = tensor([16])]; + string query_states_1_pad_type_0 = const()[name = string("query_states_1_pad_type_0"), val = string("valid")]; + tensor query_states_1_strides_0 = const()[name = string("query_states_1_strides_0"), val = tensor([1, 1])]; + tensor query_states_1_pad_0 = const()[name = string("query_states_1_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor query_states_1_dilations_0 = const()[name = string("query_states_1_dilations_0"), val = tensor([1, 1])]; + int32 query_states_1_groups_0 = const()[name = string("query_states_1_groups_0"), val = int32(1)]; + tensor var_179_to_fp16 = const()[name = string("op_179_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(8405376)))]; + tensor query_states_1_cast_fp16 = conv(dilations = query_states_1_dilations_0, groups = query_states_1_groups_0, pad = query_states_1_pad_0, pad_type = query_states_1_pad_type_0, strides = query_states_1_strides_0, weight = var_179_to_fp16, x = var_157_cast_fp16_0)[name = string("query_states_1_cast_fp16")]; + string key_states_1_pad_type_0 = const()[name = string("key_states_1_pad_type_0"), val = string("valid")]; + tensor key_states_1_strides_0 = const()[name = string("key_states_1_strides_0"), val = tensor([1, 1])]; + tensor key_states_1_pad_0 = const()[name = string("key_states_1_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor key_states_1_dilations_0 = const()[name = string("key_states_1_dilations_0"), val = tensor([1, 1])]; + int32 key_states_1_groups_0 = const()[name = string("key_states_1_groups_0"), val = int32(1)]; + tensor var_190_to_fp16 = const()[name = string("op_190_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(10502592)))]; + tensor key_states_1_cast_fp16 = conv(dilations = key_states_1_dilations_0, groups = key_states_1_groups_0, pad = key_states_1_pad_0, pad_type = key_states_1_pad_type_0, strides = key_states_1_strides_0, weight = var_190_to_fp16, x = var_157_cast_fp16_0)[name = string("key_states_1_cast_fp16")]; + string value_states_1_pad_type_0 = const()[name = string("value_states_1_pad_type_0"), val = string("valid")]; + tensor value_states_1_strides_0 = const()[name = string("value_states_1_strides_0"), val = tensor([1, 1])]; + tensor value_states_1_pad_0 = const()[name = string("value_states_1_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor value_states_1_dilations_0 = const()[name = string("value_states_1_dilations_0"), val = tensor([1, 1])]; + int32 value_states_1_groups_0 = const()[name = string("value_states_1_groups_0"), val = int32(1)]; + tensor var_201_to_fp16 = const()[name = string("op_201_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(10764800)))]; + tensor value_states_1_cast_fp16 = conv(dilations = value_states_1_dilations_0, groups = value_states_1_groups_0, pad = value_states_1_pad_0, pad_type = value_states_1_pad_type_0, strides = value_states_1_strides_0, weight = var_201_to_fp16, x = var_157_cast_fp16_0)[name = string("value_states_1_cast_fp16")]; + tensor var_209 = const()[name = string("op_209"), val = tensor([1, 16, 64, 16])]; + tensor embed_1_cast_fp16 = reshape(shape = var_209, x = query_states_1_cast_fp16)[name = string("embed_1_cast_fp16")]; + tensor var_213 = const()[name = string("op_213"), val = tensor([1, 2, 64, 16])]; + tensor var_214_cast_fp16 = reshape(shape = var_213, x = key_states_1_cast_fp16)[name = string("op_214_cast_fp16")]; + tensor embed_3_perm_0 = const()[name = string("embed_3_perm_0"), val = tensor([0, 1, 3, 2])]; + tensor var_220 = const()[name = string("op_220"), val = tensor([1, 2, 64, 16])]; + tensor var_221_cast_fp16 = reshape(shape = var_220, x = value_states_1_cast_fp16)[name = string("op_221_cast_fp16")]; + tensor value_states_3_perm_0 = const()[name = string("value_states_3_perm_0"), val = tensor([0, 1, 3, 2])]; + tensor cos_1_cast_fp16 = transpose(perm = cos_1_perm_0, x = cos_cast_fp16)[name = string("transpose_20")]; + tensor var_225_cast_fp16 = mul(x = embed_1_cast_fp16, y = cos_1_cast_fp16)[name = string("op_225_cast_fp16")]; + tensor var_226_split_sizes_0 = const()[name = string("op_226_split_sizes_0"), val = tensor([32, 32])]; + int32 var_226_axis_0 = const()[name = string("op_226_axis_0"), val = int32(-2)]; + tensor var_226_cast_fp16_0, tensor var_226_cast_fp16_1 = split(axis = var_226_axis_0, split_sizes = var_226_split_sizes_0, x = embed_1_cast_fp16)[name = string("op_226_cast_fp16")]; + fp16 const_7_promoted_to_fp16 = const()[name = string("const_7_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_228_cast_fp16 = mul(x = var_226_cast_fp16_1, y = const_7_promoted_to_fp16)[name = string("op_228_cast_fp16")]; + int32 var_230 = const()[name = string("op_230"), val = int32(-2)]; + bool var_231_interleave_0 = const()[name = string("op_231_interleave_0"), val = bool(false)]; + tensor var_231_cast_fp16 = concat(axis = var_230, interleave = var_231_interleave_0, values = (var_228_cast_fp16, var_226_cast_fp16_0))[name = string("op_231_cast_fp16")]; + tensor sin_1_cast_fp16 = transpose(perm = sin_1_perm_0, x = sin_cast_fp16)[name = string("transpose_19")]; + tensor var_232_cast_fp16 = mul(x = var_231_cast_fp16, y = sin_1_cast_fp16)[name = string("op_232_cast_fp16")]; + tensor query_states_3_cast_fp16 = add(x = var_225_cast_fp16, y = var_232_cast_fp16)[name = string("query_states_3_cast_fp16")]; + tensor embed_3_cast_fp16 = transpose(perm = embed_3_perm_0, x = var_214_cast_fp16)[name = string("transpose_17")]; + tensor var_235_cast_fp16 = mul(x = embed_3_cast_fp16, y = cos_cast_fp16)[name = string("op_235_cast_fp16")]; + tensor var_236_split_sizes_0 = const()[name = string("op_236_split_sizes_0"), val = tensor([32, 32])]; + int32 var_236_axis_0 = const()[name = string("op_236_axis_0"), val = int32(-1)]; + tensor var_236_cast_fp16_0, tensor var_236_cast_fp16_1 = split(axis = var_236_axis_0, split_sizes = var_236_split_sizes_0, x = embed_3_cast_fp16)[name = string("op_236_cast_fp16")]; + fp16 const_8_promoted_to_fp16 = const()[name = string("const_8_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_238_cast_fp16 = mul(x = var_236_cast_fp16_1, y = const_8_promoted_to_fp16)[name = string("op_238_cast_fp16")]; + int32 var_240 = const()[name = string("op_240"), val = int32(-1)]; + bool var_241_interleave_0 = const()[name = string("op_241_interleave_0"), val = bool(false)]; + tensor var_241_cast_fp16 = concat(axis = var_240, interleave = var_241_interleave_0, values = (var_238_cast_fp16, var_236_cast_fp16_0))[name = string("op_241_cast_fp16")]; + tensor var_242_cast_fp16 = mul(x = var_241_cast_fp16, y = sin_cast_fp16)[name = string("op_242_cast_fp16")]; + tensor key_states_3_cast_fp16 = add(x = var_235_cast_fp16, y = var_242_cast_fp16)[name = string("key_states_3_cast_fp16")]; + tensor var_246 = add(x = position_id, y = q_len_1)[name = string("op_246")]; + tensor read_state_0 = read_state(input = key_cache)[name = string("read_state_0")]; + tensor expand_dims_1 = const()[name = string("expand_dims_1"), val = tensor([0])]; + tensor expand_dims_2 = const()[name = string("expand_dims_2"), val = tensor([0])]; + tensor expand_dims_4 = const()[name = string("expand_dims_4"), val = tensor([1])]; + tensor concat_3_values3_0 = const()[name = string("concat_3_values3_0"), val = tensor([0])]; + int32 concat_3_axis_0 = const()[name = string("concat_3_axis_0"), val = int32(0)]; + bool concat_3_interleave_0 = const()[name = string("concat_3_interleave_0"), val = bool(false)]; + tensor concat_3 = concat(axis = concat_3_axis_0, interleave = concat_3_interleave_0, values = (expand_dims_1, expand_dims_2, position_id, concat_3_values3_0))[name = string("concat_3")]; + tensor concat_4_values1_0 = const()[name = string("concat_4_values1_0"), val = tensor([0])]; + tensor concat_4_values3_0 = const()[name = string("concat_4_values3_0"), val = tensor([0])]; + int32 concat_4_axis_0 = const()[name = string("concat_4_axis_0"), val = int32(0)]; + bool concat_4_interleave_0 = const()[name = string("concat_4_interleave_0"), val = bool(false)]; + tensor concat_4 = concat(axis = concat_4_axis_0, interleave = concat_4_interleave_0, values = (expand_dims_4, concat_4_values1_0, var_246, concat_4_values3_0))[name = string("concat_4")]; + tensor key_cache_internal_tensor_assign_1_stride_0 = const()[name = string("key_cache_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor key_cache_internal_tensor_assign_1_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false, true])]; + tensor key_cache_internal_tensor_assign_1_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_1_end_mask_0"), val = tensor([false, true, false, true])]; + tensor key_cache_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor key_cache_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_3, begin_mask = key_cache_internal_tensor_assign_1_begin_mask_0, end = concat_4, end_mask = key_cache_internal_tensor_assign_1_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_1_squeeze_mask_0, stride = key_cache_internal_tensor_assign_1_stride_0, update = key_states_3_cast_fp16, x = read_state_0)[name = string("key_cache_internal_tensor_assign_1_cast_fp16")]; + write_state(data = key_cache_internal_tensor_assign_1_cast_fp16, input = key_cache)[name = string("coreml_update_state_36_write_state")]; + tensor coreml_update_state_12 = read_state(input = key_cache)[name = string("coreml_update_state_36")]; + tensor read_state_1 = read_state(input = value_cache)[name = string("read_state_1")]; + tensor value_cache_internal_tensor_assign_1_stride_0 = const()[name = string("value_cache_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor value_cache_internal_tensor_assign_1_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false, true])]; + tensor value_cache_internal_tensor_assign_1_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_1_end_mask_0"), val = tensor([false, true, false, true])]; + tensor value_cache_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor value_states_3_cast_fp16 = transpose(perm = value_states_3_perm_0, x = var_221_cast_fp16)[name = string("transpose_16")]; + tensor value_cache_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_3, begin_mask = value_cache_internal_tensor_assign_1_begin_mask_0, end = concat_4, end_mask = value_cache_internal_tensor_assign_1_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_1_squeeze_mask_0, stride = value_cache_internal_tensor_assign_1_stride_0, update = value_states_3_cast_fp16, x = read_state_1)[name = string("value_cache_internal_tensor_assign_1_cast_fp16")]; + write_state(data = value_cache_internal_tensor_assign_1_cast_fp16, input = value_cache)[name = string("coreml_update_state_37_write_state")]; + tensor coreml_update_state_13 = read_state(input = value_cache)[name = string("coreml_update_state_37")]; + tensor var_285_begin_0 = const()[name = string("op_285_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_285_end_0 = const()[name = string("op_285_end_0"), val = tensor([1, 2, 2048, 64])]; + tensor var_285_end_mask_0 = const()[name = string("op_285_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_285_cast_fp16 = slice_by_index(begin = var_285_begin_0, end = var_285_end_0, end_mask = var_285_end_mask_0, x = coreml_update_state_12)[name = string("op_285_cast_fp16")]; + tensor tile_0 = const()[name = string("tile_0"), val = tensor([1, 1])]; + int32 var_288_axis_0 = const()[name = string("op_288_axis_0"), val = int32(1)]; + tensor var_288_cast_fp16_0, tensor var_288_cast_fp16_1 = split(axis = var_288_axis_0, split_sizes = tile_0, x = var_285_cast_fp16)[name = string("op_288_cast_fp16")]; + tensor var_295_begin_0 = const()[name = string("op_295_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_295_end_0 = const()[name = string("op_295_end_0"), val = tensor([1, 2, 2048, 64])]; + tensor var_295_end_mask_0 = const()[name = string("op_295_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_295_cast_fp16 = slice_by_index(begin = var_295_begin_0, end = var_295_end_0, end_mask = var_295_end_mask_0, x = coreml_update_state_13)[name = string("op_295_cast_fp16")]; + tensor tile_1 = const()[name = string("tile_1"), val = tensor([1, 1])]; + int32 var_298_axis_0 = const()[name = string("op_298_axis_0"), val = int32(1)]; + tensor var_298_cast_fp16_0, tensor var_298_cast_fp16_1 = split(axis = var_298_axis_0, split_sizes = tile_1, x = var_295_cast_fp16)[name = string("op_298_cast_fp16")]; + tensor var_301_split_sizes_0 = const()[name = string("op_301_split_sizes_0"), val = tensor([8, 8])]; + int32 var_301_axis_0 = const()[name = string("op_301_axis_0"), val = int32(1)]; + tensor var_301_cast_fp16_0, tensor var_301_cast_fp16_1 = split(axis = var_301_axis_0, split_sizes = var_301_split_sizes_0, x = query_states_3_cast_fp16)[name = string("op_301_cast_fp16")]; + bool attn_weights_1_transpose_x_0 = const()[name = string("attn_weights_1_transpose_x_0"), val = bool(false)]; + bool attn_weights_1_transpose_y_0 = const()[name = string("attn_weights_1_transpose_y_0"), val = bool(false)]; + tensor attn_weights_1_cast_fp16 = matmul(transpose_x = attn_weights_1_transpose_x_0, transpose_y = attn_weights_1_transpose_y_0, x = var_288_cast_fp16_0, y = var_301_cast_fp16_0)[name = string("attn_weights_1_cast_fp16")]; + fp16 _inversed_attn_weights_3_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_3_y_0_to_fp16"), val = fp16(0x1p-3)]; + tensor _inversed_attn_weights_3_cast_fp16 = mul(x = attn_weights_1_cast_fp16, y = _inversed_attn_weights_3_y_0_to_fp16)[name = string("_inversed_attn_weights_3_cast_fp16")]; + tensor attn_mask_cast_fp16 = transpose(perm = attn_mask_perm_0, x = attention_mask_cast_fp16)[name = string("transpose_18")]; + tensor attn_weights_5_cast_fp16 = add(x = _inversed_attn_weights_3_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_5_cast_fp16")]; + int32 var_308 = const()[name = string("op_308"), val = int32(2)]; + tensor attn_weights_7_cast_fp16 = softmax(axis = var_308, x = attn_weights_5_cast_fp16)[name = string("attn_weights_7_cast_fp16")]; + bool var_314_transpose_x_1 = const()[name = string("op_314_transpose_x_1"), val = bool(true)]; + bool var_314_transpose_y_1 = const()[name = string("op_314_transpose_y_1"), val = bool(false)]; + tensor var_314_cast_fp16 = matmul(transpose_x = var_314_transpose_x_1, transpose_y = var_314_transpose_y_1, x = attn_weights_7_cast_fp16, y = var_298_cast_fp16_0)[name = string("op_314_cast_fp16")]; + bool attn_weights_9_transpose_x_0 = const()[name = string("attn_weights_9_transpose_x_0"), val = bool(false)]; + bool attn_weights_9_transpose_y_0 = const()[name = string("attn_weights_9_transpose_y_0"), val = bool(false)]; + tensor attn_weights_9_cast_fp16 = matmul(transpose_x = attn_weights_9_transpose_x_0, transpose_y = attn_weights_9_transpose_y_0, x = var_288_cast_fp16_1, y = var_301_cast_fp16_1)[name = string("attn_weights_9_cast_fp16")]; + fp16 _inversed_attn_weights_11_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_11_y_0_to_fp16"), val = fp16(0x1p-3)]; + tensor _inversed_attn_weights_11_cast_fp16 = mul(x = attn_weights_9_cast_fp16, y = _inversed_attn_weights_11_y_0_to_fp16)[name = string("_inversed_attn_weights_11_cast_fp16")]; + tensor attn_weights_13_cast_fp16 = add(x = _inversed_attn_weights_11_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_13_cast_fp16")]; + int32 var_320 = const()[name = string("op_320"), val = int32(2)]; + tensor attn_weights_15_cast_fp16 = softmax(axis = var_320, x = attn_weights_13_cast_fp16)[name = string("attn_weights_15_cast_fp16")]; + bool attn_output_1_transpose_x_1 = const()[name = string("attn_output_1_transpose_x_1"), val = bool(true)]; + bool attn_output_1_transpose_y_1 = const()[name = string("attn_output_1_transpose_y_1"), val = bool(false)]; + tensor attn_output_1_cast_fp16 = matmul(transpose_x = attn_output_1_transpose_x_1, transpose_y = attn_output_1_transpose_y_1, x = attn_weights_15_cast_fp16, y = var_298_cast_fp16_1)[name = string("attn_output_1_cast_fp16")]; + int32 var_328 = const()[name = string("op_328"), val = int32(1)]; + bool attn_output_3_interleave_0 = const()[name = string("attn_output_3_interleave_0"), val = bool(false)]; + tensor attn_output_3_cast_fp16 = concat(axis = var_328, interleave = attn_output_3_interleave_0, values = (var_314_cast_fp16, attn_output_1_cast_fp16))[name = string("attn_output_3_cast_fp16")]; + tensor var_332_perm_0 = const()[name = string("op_332_perm_0"), val = tensor([0, 1, 3, 2])]; + tensor var_337 = const()[name = string("op_337"), val = tensor([1, 1024, 1, 16])]; + tensor var_332_cast_fp16 = transpose(perm = var_332_perm_0, x = attn_output_3_cast_fp16)[name = string("transpose_15")]; + tensor x_5_cast_fp16 = reshape(shape = var_337, x = var_332_cast_fp16)[name = string("x_5_cast_fp16")]; + string hidden_states_3_pad_type_0 = const()[name = string("hidden_states_3_pad_type_0"), val = string("valid")]; + tensor hidden_states_3_strides_0 = const()[name = string("hidden_states_3_strides_0"), val = tensor([1, 1])]; + tensor hidden_states_3_pad_0 = const()[name = string("hidden_states_3_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor hidden_states_3_dilations_0 = const()[name = string("hidden_states_3_dilations_0"), val = tensor([1, 1])]; + int32 hidden_states_3_groups_0 = const()[name = string("hidden_states_3_groups_0"), val = int32(1)]; + tensor var_344_to_fp16 = const()[name = string("op_344_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(11027008)))]; + tensor hidden_states_3_cast_fp16 = conv(dilations = hidden_states_3_dilations_0, groups = hidden_states_3_groups_0, pad = hidden_states_3_pad_0, pad_type = hidden_states_3_pad_type_0, strides = hidden_states_3_strides_0, weight = var_344_to_fp16, x = x_5_cast_fp16)[name = string("hidden_states_3_cast_fp16")]; + tensor x_7_cast_fp16 = add(x = inputs_embeds, y = hidden_states_3_cast_fp16)[name = string("x_7_cast_fp16")]; + int32 var_356 = const()[name = string("op_356"), val = int32(1)]; + fp16 const_13_promoted_to_fp16 = const()[name = string("const_13_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_359_cast_fp16 = mul(x = x_7_cast_fp16, y = const_13_promoted_to_fp16)[name = string("op_359_cast_fp16")]; + bool x_9_interleave_0 = const()[name = string("x_9_interleave_0"), val = bool(false)]; + tensor x_9_cast_fp16 = concat(axis = var_356, interleave = x_9_interleave_0, values = (x_7_cast_fp16, var_359_cast_fp16))[name = string("x_9_cast_fp16")]; + tensor out_7_axes_0 = const()[name = string("out_7_axes_0"), val = tensor([1])]; + fp16 var_369_to_fp16 = const()[name = string("op_369_to_fp16"), val = fp16(0x1.5p-17)]; + tensor out_7_cast_fp16 = layer_norm(axes = out_7_axes_0, epsilon = var_369_to_fp16, x = x_9_cast_fp16)[name = string("out_7_cast_fp16")]; + tensor layer_layers_0_post_attention_layernorm_weight_to_fp16 = const()[name = string("layer_layers_0_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(13124224)))]; + tensor out_9_cast_fp16 = mul(x = out_7_cast_fp16, y = layer_layers_0_post_attention_layernorm_weight_to_fp16)[name = string("out_9_cast_fp16")]; + tensor var_375_split_sizes_0 = const()[name = string("op_375_split_sizes_0"), val = tensor([1024, 1024])]; + int32 var_375_axis_0 = const()[name = string("op_375_axis_0"), val = int32(1)]; + tensor var_375_cast_fp16_0, tensor var_375_cast_fp16_1 = split(axis = var_375_axis_0, split_sizes = var_375_split_sizes_0, x = out_9_cast_fp16)[name = string("op_375_cast_fp16")]; + string input_1_pad_type_0 = const()[name = string("input_1_pad_type_0"), val = string("valid")]; + tensor input_1_strides_0 = const()[name = string("input_1_strides_0"), val = tensor([1, 1])]; + tensor input_1_pad_0 = const()[name = string("input_1_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor input_1_dilations_0 = const()[name = string("input_1_dilations_0"), val = tensor([1, 1])]; + int32 input_1_groups_0 = const()[name = string("input_1_groups_0"), val = int32(1)]; + tensor var_380_to_fp16 = const()[name = string("op_380_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(13128384)))]; + tensor input_1_cast_fp16 = conv(dilations = input_1_dilations_0, groups = input_1_groups_0, pad = input_1_pad_0, pad_type = input_1_pad_type_0, strides = input_1_strides_0, weight = var_380_to_fp16, x = var_375_cast_fp16_0)[name = string("input_1_cast_fp16")]; + tensor var_391_cast_fp16 = silu(x = input_1_cast_fp16)[name = string("op_391_cast_fp16")]; + string var_396_pad_type_0 = const()[name = string("op_396_pad_type_0"), val = string("valid")]; + tensor var_396_strides_0 = const()[name = string("op_396_strides_0"), val = tensor([1, 1])]; + tensor var_396_pad_0 = const()[name = string("op_396_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_396_dilations_0 = const()[name = string("op_396_dilations_0"), val = tensor([1, 1])]; + int32 var_396_groups_0 = const()[name = string("op_396_groups_0"), val = int32(1)]; + tensor var_379_to_fp16 = const()[name = string("op_379_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(21517056)))]; + tensor var_396_cast_fp16 = conv(dilations = var_396_dilations_0, groups = var_396_groups_0, pad = var_396_pad_0, pad_type = var_396_pad_type_0, strides = var_396_strides_0, weight = var_379_to_fp16, x = var_375_cast_fp16_0)[name = string("op_396_cast_fp16")]; + tensor x_15_cast_fp16 = mul(x = var_391_cast_fp16, y = var_396_cast_fp16)[name = string("x_15_cast_fp16")]; + string hidden_states_5_pad_type_0 = const()[name = string("hidden_states_5_pad_type_0"), val = string("valid")]; + tensor hidden_states_5_strides_0 = const()[name = string("hidden_states_5_strides_0"), val = tensor([1, 1])]; + tensor hidden_states_5_pad_0 = const()[name = string("hidden_states_5_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor hidden_states_5_dilations_0 = const()[name = string("hidden_states_5_dilations_0"), val = tensor([1, 1])]; + int32 hidden_states_5_groups_0 = const()[name = string("hidden_states_5_groups_0"), val = int32(1)]; + tensor var_378_to_fp16 = const()[name = string("op_378_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(29905728)))]; + tensor hidden_states_5_cast_fp16 = conv(dilations = hidden_states_5_dilations_0, groups = hidden_states_5_groups_0, pad = hidden_states_5_pad_0, pad_type = hidden_states_5_pad_type_0, strides = hidden_states_5_strides_0, weight = var_378_to_fp16, x = x_15_cast_fp16)[name = string("hidden_states_5_cast_fp16")]; + tensor x_17_cast_fp16 = add(x = x_7_cast_fp16, y = hidden_states_5_cast_fp16)[name = string("x_17_cast_fp16")]; + int32 var_409 = const()[name = string("op_409"), val = int32(1)]; + fp16 const_14_promoted_to_fp16 = const()[name = string("const_14_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_412_cast_fp16 = mul(x = x_17_cast_fp16, y = const_14_promoted_to_fp16)[name = string("op_412_cast_fp16")]; + bool x_19_interleave_0 = const()[name = string("x_19_interleave_0"), val = bool(false)]; + tensor x_19_cast_fp16 = concat(axis = var_409, interleave = x_19_interleave_0, values = (x_17_cast_fp16, var_412_cast_fp16))[name = string("x_19_cast_fp16")]; + tensor out_13_axes_0 = const()[name = string("out_13_axes_0"), val = tensor([1])]; + fp16 var_422_to_fp16 = const()[name = string("op_422_to_fp16"), val = fp16(0x1.5p-17)]; + tensor out_13_cast_fp16 = layer_norm(axes = out_13_axes_0, epsilon = var_422_to_fp16, x = x_19_cast_fp16)[name = string("out_13_cast_fp16")]; + tensor layer_layers_1_input_layernorm_weight_to_fp16 = const()[name = string("layer_layers_1_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(38294400)))]; + tensor out_15_cast_fp16 = mul(x = out_13_cast_fp16, y = layer_layers_1_input_layernorm_weight_to_fp16)[name = string("out_15_cast_fp16")]; + tensor var_428_split_sizes_0 = const()[name = string("op_428_split_sizes_0"), val = tensor([1024, 1024])]; + int32 var_428_axis_0 = const()[name = string("op_428_axis_0"), val = int32(1)]; + tensor var_428_cast_fp16_0, tensor var_428_cast_fp16_1 = split(axis = var_428_axis_0, split_sizes = var_428_split_sizes_0, x = out_15_cast_fp16)[name = string("op_428_cast_fp16")]; + string query_states_5_pad_type_0 = const()[name = string("query_states_5_pad_type_0"), val = string("valid")]; + tensor query_states_5_strides_0 = const()[name = string("query_states_5_strides_0"), val = tensor([1, 1])]; + tensor query_states_5_pad_0 = const()[name = string("query_states_5_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor query_states_5_dilations_0 = const()[name = string("query_states_5_dilations_0"), val = tensor([1, 1])]; + int32 query_states_5_groups_0 = const()[name = string("query_states_5_groups_0"), val = int32(1)]; + tensor var_450_to_fp16 = const()[name = string("op_450_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(38298560)))]; + tensor query_states_5_cast_fp16 = conv(dilations = query_states_5_dilations_0, groups = query_states_5_groups_0, pad = query_states_5_pad_0, pad_type = query_states_5_pad_type_0, strides = query_states_5_strides_0, weight = var_450_to_fp16, x = var_428_cast_fp16_0)[name = string("query_states_5_cast_fp16")]; + string key_states_5_pad_type_0 = const()[name = string("key_states_5_pad_type_0"), val = string("valid")]; + tensor key_states_5_strides_0 = const()[name = string("key_states_5_strides_0"), val = tensor([1, 1])]; + tensor key_states_5_pad_0 = const()[name = string("key_states_5_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor key_states_5_dilations_0 = const()[name = string("key_states_5_dilations_0"), val = tensor([1, 1])]; + int32 key_states_5_groups_0 = const()[name = string("key_states_5_groups_0"), val = int32(1)]; + tensor var_461_to_fp16 = const()[name = string("op_461_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(40395776)))]; + tensor key_states_5_cast_fp16 = conv(dilations = key_states_5_dilations_0, groups = key_states_5_groups_0, pad = key_states_5_pad_0, pad_type = key_states_5_pad_type_0, strides = key_states_5_strides_0, weight = var_461_to_fp16, x = var_428_cast_fp16_0)[name = string("key_states_5_cast_fp16")]; + string value_states_5_pad_type_0 = const()[name = string("value_states_5_pad_type_0"), val = string("valid")]; + tensor value_states_5_strides_0 = const()[name = string("value_states_5_strides_0"), val = tensor([1, 1])]; + tensor value_states_5_pad_0 = const()[name = string("value_states_5_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor value_states_5_dilations_0 = const()[name = string("value_states_5_dilations_0"), val = tensor([1, 1])]; + int32 value_states_5_groups_0 = const()[name = string("value_states_5_groups_0"), val = int32(1)]; + tensor var_472_to_fp16 = const()[name = string("op_472_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(40657984)))]; + tensor value_states_5_cast_fp16 = conv(dilations = value_states_5_dilations_0, groups = value_states_5_groups_0, pad = value_states_5_pad_0, pad_type = value_states_5_pad_type_0, strides = value_states_5_strides_0, weight = var_472_to_fp16, x = var_428_cast_fp16_0)[name = string("value_states_5_cast_fp16")]; + tensor var_480 = const()[name = string("op_480"), val = tensor([1, 16, 64, 16])]; + tensor embed_5_cast_fp16 = reshape(shape = var_480, x = query_states_5_cast_fp16)[name = string("embed_5_cast_fp16")]; + tensor var_484 = const()[name = string("op_484"), val = tensor([1, 2, 64, 16])]; + tensor var_485_cast_fp16 = reshape(shape = var_484, x = key_states_5_cast_fp16)[name = string("op_485_cast_fp16")]; + tensor embed_7_perm_0 = const()[name = string("embed_7_perm_0"), val = tensor([0, 1, 3, 2])]; + tensor var_491 = const()[name = string("op_491"), val = tensor([1, 2, 64, 16])]; + tensor var_492_cast_fp16 = reshape(shape = var_491, x = value_states_5_cast_fp16)[name = string("op_492_cast_fp16")]; + tensor value_states_7_perm_0 = const()[name = string("value_states_7_perm_0"), val = tensor([0, 1, 3, 2])]; + tensor var_496_cast_fp16 = mul(x = embed_5_cast_fp16, y = cos_1_cast_fp16)[name = string("op_496_cast_fp16")]; + tensor var_497_split_sizes_0 = const()[name = string("op_497_split_sizes_0"), val = tensor([32, 32])]; + int32 var_497_axis_0 = const()[name = string("op_497_axis_0"), val = int32(-2)]; + tensor var_497_cast_fp16_0, tensor var_497_cast_fp16_1 = split(axis = var_497_axis_0, split_sizes = var_497_split_sizes_0, x = embed_5_cast_fp16)[name = string("op_497_cast_fp16")]; + fp16 const_17_promoted_to_fp16 = const()[name = string("const_17_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_499_cast_fp16 = mul(x = var_497_cast_fp16_1, y = const_17_promoted_to_fp16)[name = string("op_499_cast_fp16")]; + int32 var_501 = const()[name = string("op_501"), val = int32(-2)]; + bool var_502_interleave_0 = const()[name = string("op_502_interleave_0"), val = bool(false)]; + tensor var_502_cast_fp16 = concat(axis = var_501, interleave = var_502_interleave_0, values = (var_499_cast_fp16, var_497_cast_fp16_0))[name = string("op_502_cast_fp16")]; + tensor var_503_cast_fp16 = mul(x = var_502_cast_fp16, y = sin_1_cast_fp16)[name = string("op_503_cast_fp16")]; + tensor query_states_7_cast_fp16 = add(x = var_496_cast_fp16, y = var_503_cast_fp16)[name = string("query_states_7_cast_fp16")]; + tensor embed_7_cast_fp16 = transpose(perm = embed_7_perm_0, x = var_485_cast_fp16)[name = string("transpose_14")]; + tensor var_506_cast_fp16 = mul(x = embed_7_cast_fp16, y = cos_cast_fp16)[name = string("op_506_cast_fp16")]; + tensor var_507_split_sizes_0 = const()[name = string("op_507_split_sizes_0"), val = tensor([32, 32])]; + int32 var_507_axis_0 = const()[name = string("op_507_axis_0"), val = int32(-1)]; + tensor var_507_cast_fp16_0, tensor var_507_cast_fp16_1 = split(axis = var_507_axis_0, split_sizes = var_507_split_sizes_0, x = embed_7_cast_fp16)[name = string("op_507_cast_fp16")]; + fp16 const_18_promoted_to_fp16 = const()[name = string("const_18_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_509_cast_fp16 = mul(x = var_507_cast_fp16_1, y = const_18_promoted_to_fp16)[name = string("op_509_cast_fp16")]; + int32 var_511 = const()[name = string("op_511"), val = int32(-1)]; + bool var_512_interleave_0 = const()[name = string("op_512_interleave_0"), val = bool(false)]; + tensor var_512_cast_fp16 = concat(axis = var_511, interleave = var_512_interleave_0, values = (var_509_cast_fp16, var_507_cast_fp16_0))[name = string("op_512_cast_fp16")]; + tensor var_513_cast_fp16 = mul(x = var_512_cast_fp16, y = sin_cast_fp16)[name = string("op_513_cast_fp16")]; + tensor key_states_7_cast_fp16 = add(x = var_506_cast_fp16, y = var_513_cast_fp16)[name = string("key_states_7_cast_fp16")]; + tensor expand_dims_11 = const()[name = string("expand_dims_11"), val = tensor([1])]; + tensor expand_dims_12 = const()[name = string("expand_dims_12"), val = tensor([0])]; + tensor expand_dims_14 = const()[name = string("expand_dims_14"), val = tensor([2])]; + tensor concat_11_values3_0 = const()[name = string("concat_11_values3_0"), val = tensor([0])]; + int32 concat_11_axis_0 = const()[name = string("concat_11_axis_0"), val = int32(0)]; + bool concat_11_interleave_0 = const()[name = string("concat_11_interleave_0"), val = bool(false)]; + tensor concat_11 = concat(axis = concat_11_axis_0, interleave = concat_11_interleave_0, values = (expand_dims_11, expand_dims_12, position_id, concat_11_values3_0))[name = string("concat_11")]; + tensor concat_12_values1_0 = const()[name = string("concat_12_values1_0"), val = tensor([0])]; + tensor concat_12_values3_0 = const()[name = string("concat_12_values3_0"), val = tensor([0])]; + int32 concat_12_axis_0 = const()[name = string("concat_12_axis_0"), val = int32(0)]; + bool concat_12_interleave_0 = const()[name = string("concat_12_interleave_0"), val = bool(false)]; + tensor concat_12 = concat(axis = concat_12_axis_0, interleave = concat_12_interleave_0, values = (expand_dims_14, concat_12_values1_0, var_246, concat_12_values3_0))[name = string("concat_12")]; + tensor key_cache_internal_tensor_assign_2_stride_0 = const()[name = string("key_cache_internal_tensor_assign_2_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor key_cache_internal_tensor_assign_2_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_2_begin_mask_0"), val = tensor([false, false, false, true])]; + tensor key_cache_internal_tensor_assign_2_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_2_end_mask_0"), val = tensor([false, true, false, true])]; + tensor key_cache_internal_tensor_assign_2_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_2_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor key_cache_internal_tensor_assign_2_cast_fp16 = slice_update(begin = concat_11, begin_mask = key_cache_internal_tensor_assign_2_begin_mask_0, end = concat_12, end_mask = key_cache_internal_tensor_assign_2_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_2_squeeze_mask_0, stride = key_cache_internal_tensor_assign_2_stride_0, update = key_states_7_cast_fp16, x = coreml_update_state_12)[name = string("key_cache_internal_tensor_assign_2_cast_fp16")]; + write_state(data = key_cache_internal_tensor_assign_2_cast_fp16, input = key_cache)[name = string("coreml_update_state_38_write_state")]; + tensor coreml_update_state_14 = read_state(input = key_cache)[name = string("coreml_update_state_38")]; + tensor value_cache_internal_tensor_assign_2_stride_0 = const()[name = string("value_cache_internal_tensor_assign_2_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor value_cache_internal_tensor_assign_2_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_2_begin_mask_0"), val = tensor([false, false, false, true])]; + tensor value_cache_internal_tensor_assign_2_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_2_end_mask_0"), val = tensor([false, true, false, true])]; + tensor value_cache_internal_tensor_assign_2_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_2_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor value_states_7_cast_fp16 = transpose(perm = value_states_7_perm_0, x = var_492_cast_fp16)[name = string("transpose_13")]; + tensor value_cache_internal_tensor_assign_2_cast_fp16 = slice_update(begin = concat_11, begin_mask = value_cache_internal_tensor_assign_2_begin_mask_0, end = concat_12, end_mask = value_cache_internal_tensor_assign_2_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_2_squeeze_mask_0, stride = value_cache_internal_tensor_assign_2_stride_0, update = value_states_7_cast_fp16, x = coreml_update_state_13)[name = string("value_cache_internal_tensor_assign_2_cast_fp16")]; + write_state(data = value_cache_internal_tensor_assign_2_cast_fp16, input = value_cache)[name = string("coreml_update_state_39_write_state")]; + tensor coreml_update_state_15 = read_state(input = value_cache)[name = string("coreml_update_state_39")]; + tensor var_556_begin_0 = const()[name = string("op_556_begin_0"), val = tensor([1, 0, 0, 0])]; + tensor var_556_end_0 = const()[name = string("op_556_end_0"), val = tensor([2, 2, 2048, 64])]; + tensor var_556_end_mask_0 = const()[name = string("op_556_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_556_cast_fp16 = slice_by_index(begin = var_556_begin_0, end = var_556_end_0, end_mask = var_556_end_mask_0, x = coreml_update_state_14)[name = string("op_556_cast_fp16")]; + tensor tile_2 = const()[name = string("tile_2"), val = tensor([1, 1])]; + int32 var_559_axis_0 = const()[name = string("op_559_axis_0"), val = int32(1)]; + tensor var_559_cast_fp16_0, tensor var_559_cast_fp16_1 = split(axis = var_559_axis_0, split_sizes = tile_2, x = var_556_cast_fp16)[name = string("op_559_cast_fp16")]; + tensor var_566_begin_0 = const()[name = string("op_566_begin_0"), val = tensor([1, 0, 0, 0])]; + tensor var_566_end_0 = const()[name = string("op_566_end_0"), val = tensor([2, 2, 2048, 64])]; + tensor var_566_end_mask_0 = const()[name = string("op_566_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_566_cast_fp16 = slice_by_index(begin = var_566_begin_0, end = var_566_end_0, end_mask = var_566_end_mask_0, x = coreml_update_state_15)[name = string("op_566_cast_fp16")]; + tensor tile_3 = const()[name = string("tile_3"), val = tensor([1, 1])]; + int32 var_569_axis_0 = const()[name = string("op_569_axis_0"), val = int32(1)]; + tensor var_569_cast_fp16_0, tensor var_569_cast_fp16_1 = split(axis = var_569_axis_0, split_sizes = tile_3, x = var_566_cast_fp16)[name = string("op_569_cast_fp16")]; + tensor var_572_split_sizes_0 = const()[name = string("op_572_split_sizes_0"), val = tensor([8, 8])]; + int32 var_572_axis_0 = const()[name = string("op_572_axis_0"), val = int32(1)]; + tensor var_572_cast_fp16_0, tensor var_572_cast_fp16_1 = split(axis = var_572_axis_0, split_sizes = var_572_split_sizes_0, x = query_states_7_cast_fp16)[name = string("op_572_cast_fp16")]; + bool attn_weights_17_transpose_x_0 = const()[name = string("attn_weights_17_transpose_x_0"), val = bool(false)]; + bool attn_weights_17_transpose_y_0 = const()[name = string("attn_weights_17_transpose_y_0"), val = bool(false)]; + tensor attn_weights_17_cast_fp16 = matmul(transpose_x = attn_weights_17_transpose_x_0, transpose_y = attn_weights_17_transpose_y_0, x = var_559_cast_fp16_0, y = var_572_cast_fp16_0)[name = string("attn_weights_17_cast_fp16")]; + fp16 _inversed_attn_weights_19_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_19_y_0_to_fp16"), val = fp16(0x1p-3)]; + tensor _inversed_attn_weights_19_cast_fp16 = mul(x = attn_weights_17_cast_fp16, y = _inversed_attn_weights_19_y_0_to_fp16)[name = string("_inversed_attn_weights_19_cast_fp16")]; + tensor attn_weights_21_cast_fp16 = add(x = _inversed_attn_weights_19_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_21_cast_fp16")]; + int32 var_579 = const()[name = string("op_579"), val = int32(2)]; + tensor attn_weights_23_cast_fp16 = softmax(axis = var_579, x = attn_weights_21_cast_fp16)[name = string("attn_weights_23_cast_fp16")]; + bool var_585_transpose_x_1 = const()[name = string("op_585_transpose_x_1"), val = bool(true)]; + bool var_585_transpose_y_1 = const()[name = string("op_585_transpose_y_1"), val = bool(false)]; + tensor var_585_cast_fp16 = matmul(transpose_x = var_585_transpose_x_1, transpose_y = var_585_transpose_y_1, x = attn_weights_23_cast_fp16, y = var_569_cast_fp16_0)[name = string("op_585_cast_fp16")]; + bool attn_weights_25_transpose_x_0 = const()[name = string("attn_weights_25_transpose_x_0"), val = bool(false)]; + bool attn_weights_25_transpose_y_0 = const()[name = string("attn_weights_25_transpose_y_0"), val = bool(false)]; + tensor attn_weights_25_cast_fp16 = matmul(transpose_x = attn_weights_25_transpose_x_0, transpose_y = attn_weights_25_transpose_y_0, x = var_559_cast_fp16_1, y = var_572_cast_fp16_1)[name = string("attn_weights_25_cast_fp16")]; + fp16 _inversed_attn_weights_27_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_27_y_0_to_fp16"), val = fp16(0x1p-3)]; + tensor _inversed_attn_weights_27_cast_fp16 = mul(x = attn_weights_25_cast_fp16, y = _inversed_attn_weights_27_y_0_to_fp16)[name = string("_inversed_attn_weights_27_cast_fp16")]; + tensor attn_weights_29_cast_fp16 = add(x = _inversed_attn_weights_27_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_29_cast_fp16")]; + int32 var_591 = const()[name = string("op_591"), val = int32(2)]; + tensor attn_weights_31_cast_fp16 = softmax(axis = var_591, x = attn_weights_29_cast_fp16)[name = string("attn_weights_31_cast_fp16")]; + bool attn_output_7_transpose_x_1 = const()[name = string("attn_output_7_transpose_x_1"), val = bool(true)]; + bool attn_output_7_transpose_y_1 = const()[name = string("attn_output_7_transpose_y_1"), val = bool(false)]; + tensor attn_output_7_cast_fp16 = matmul(transpose_x = attn_output_7_transpose_x_1, transpose_y = attn_output_7_transpose_y_1, x = attn_weights_31_cast_fp16, y = var_569_cast_fp16_1)[name = string("attn_output_7_cast_fp16")]; + int32 var_599 = const()[name = string("op_599"), val = int32(1)]; + bool attn_output_9_interleave_0 = const()[name = string("attn_output_9_interleave_0"), val = bool(false)]; + tensor attn_output_9_cast_fp16 = concat(axis = var_599, interleave = attn_output_9_interleave_0, values = (var_585_cast_fp16, attn_output_7_cast_fp16))[name = string("attn_output_9_cast_fp16")]; + tensor var_603_perm_0 = const()[name = string("op_603_perm_0"), val = tensor([0, 1, 3, 2])]; + tensor var_608 = const()[name = string("op_608"), val = tensor([1, 1024, 1, 16])]; + tensor var_603_cast_fp16 = transpose(perm = var_603_perm_0, x = attn_output_9_cast_fp16)[name = string("transpose_12")]; + tensor x_23_cast_fp16 = reshape(shape = var_608, x = var_603_cast_fp16)[name = string("x_23_cast_fp16")]; + string hidden_states_9_pad_type_0 = const()[name = string("hidden_states_9_pad_type_0"), val = string("valid")]; + tensor hidden_states_9_strides_0 = const()[name = string("hidden_states_9_strides_0"), val = tensor([1, 1])]; + tensor hidden_states_9_pad_0 = const()[name = string("hidden_states_9_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor hidden_states_9_dilations_0 = const()[name = string("hidden_states_9_dilations_0"), val = tensor([1, 1])]; + int32 hidden_states_9_groups_0 = const()[name = string("hidden_states_9_groups_0"), val = int32(1)]; + tensor var_615_to_fp16 = const()[name = string("op_615_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(40920192)))]; + tensor hidden_states_9_cast_fp16 = conv(dilations = hidden_states_9_dilations_0, groups = hidden_states_9_groups_0, pad = hidden_states_9_pad_0, pad_type = hidden_states_9_pad_type_0, strides = hidden_states_9_strides_0, weight = var_615_to_fp16, x = x_23_cast_fp16)[name = string("hidden_states_9_cast_fp16")]; + tensor x_25_cast_fp16 = add(x = x_17_cast_fp16, y = hidden_states_9_cast_fp16)[name = string("x_25_cast_fp16")]; + int32 var_627 = const()[name = string("op_627"), val = int32(1)]; + fp16 const_23_promoted_to_fp16 = const()[name = string("const_23_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_630_cast_fp16 = mul(x = x_25_cast_fp16, y = const_23_promoted_to_fp16)[name = string("op_630_cast_fp16")]; + bool x_27_interleave_0 = const()[name = string("x_27_interleave_0"), val = bool(false)]; + tensor x_27_cast_fp16 = concat(axis = var_627, interleave = x_27_interleave_0, values = (x_25_cast_fp16, var_630_cast_fp16))[name = string("x_27_cast_fp16")]; + tensor out_19_axes_0 = const()[name = string("out_19_axes_0"), val = tensor([1])]; + fp16 var_640_to_fp16 = const()[name = string("op_640_to_fp16"), val = fp16(0x1.5p-17)]; + tensor out_19_cast_fp16 = layer_norm(axes = out_19_axes_0, epsilon = var_640_to_fp16, x = x_27_cast_fp16)[name = string("out_19_cast_fp16")]; + tensor layer_layers_1_post_attention_layernorm_weight_to_fp16 = const()[name = string("layer_layers_1_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(43017408)))]; + tensor out_21_cast_fp16 = mul(x = out_19_cast_fp16, y = layer_layers_1_post_attention_layernorm_weight_to_fp16)[name = string("out_21_cast_fp16")]; + tensor var_646_split_sizes_0 = const()[name = string("op_646_split_sizes_0"), val = tensor([1024, 1024])]; + int32 var_646_axis_0 = const()[name = string("op_646_axis_0"), val = int32(1)]; + tensor var_646_cast_fp16_0, tensor var_646_cast_fp16_1 = split(axis = var_646_axis_0, split_sizes = var_646_split_sizes_0, x = out_21_cast_fp16)[name = string("op_646_cast_fp16")]; + string input_3_pad_type_0 = const()[name = string("input_3_pad_type_0"), val = string("valid")]; + tensor input_3_strides_0 = const()[name = string("input_3_strides_0"), val = tensor([1, 1])]; + tensor input_3_pad_0 = const()[name = string("input_3_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor input_3_dilations_0 = const()[name = string("input_3_dilations_0"), val = tensor([1, 1])]; + int32 input_3_groups_0 = const()[name = string("input_3_groups_0"), val = int32(1)]; + tensor var_651_to_fp16 = const()[name = string("op_651_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(43021568)))]; + tensor input_3_cast_fp16 = conv(dilations = input_3_dilations_0, groups = input_3_groups_0, pad = input_3_pad_0, pad_type = input_3_pad_type_0, strides = input_3_strides_0, weight = var_651_to_fp16, x = var_646_cast_fp16_0)[name = string("input_3_cast_fp16")]; + tensor var_662_cast_fp16 = silu(x = input_3_cast_fp16)[name = string("op_662_cast_fp16")]; + string var_667_pad_type_0 = const()[name = string("op_667_pad_type_0"), val = string("valid")]; + tensor var_667_strides_0 = const()[name = string("op_667_strides_0"), val = tensor([1, 1])]; + tensor var_667_pad_0 = const()[name = string("op_667_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_667_dilations_0 = const()[name = string("op_667_dilations_0"), val = tensor([1, 1])]; + int32 var_667_groups_0 = const()[name = string("op_667_groups_0"), val = int32(1)]; + tensor var_650_to_fp16 = const()[name = string("op_650_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(51410240)))]; + tensor var_667_cast_fp16 = conv(dilations = var_667_dilations_0, groups = var_667_groups_0, pad = var_667_pad_0, pad_type = var_667_pad_type_0, strides = var_667_strides_0, weight = var_650_to_fp16, x = var_646_cast_fp16_0)[name = string("op_667_cast_fp16")]; + tensor x_33_cast_fp16 = mul(x = var_662_cast_fp16, y = var_667_cast_fp16)[name = string("x_33_cast_fp16")]; + string hidden_states_11_pad_type_0 = const()[name = string("hidden_states_11_pad_type_0"), val = string("valid")]; + tensor hidden_states_11_strides_0 = const()[name = string("hidden_states_11_strides_0"), val = tensor([1, 1])]; + tensor hidden_states_11_pad_0 = const()[name = string("hidden_states_11_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor hidden_states_11_dilations_0 = const()[name = string("hidden_states_11_dilations_0"), val = tensor([1, 1])]; + int32 hidden_states_11_groups_0 = const()[name = string("hidden_states_11_groups_0"), val = int32(1)]; + tensor var_649_to_fp16 = const()[name = string("op_649_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(59798912)))]; + tensor hidden_states_11_cast_fp16 = conv(dilations = hidden_states_11_dilations_0, groups = hidden_states_11_groups_0, pad = hidden_states_11_pad_0, pad_type = hidden_states_11_pad_type_0, strides = hidden_states_11_strides_0, weight = var_649_to_fp16, x = x_33_cast_fp16)[name = string("hidden_states_11_cast_fp16")]; + tensor x_35_cast_fp16 = add(x = x_25_cast_fp16, y = hidden_states_11_cast_fp16)[name = string("x_35_cast_fp16")]; + int32 var_680 = const()[name = string("op_680"), val = int32(1)]; + fp16 const_24_promoted_to_fp16 = const()[name = string("const_24_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_683_cast_fp16 = mul(x = x_35_cast_fp16, y = const_24_promoted_to_fp16)[name = string("op_683_cast_fp16")]; + bool x_37_interleave_0 = const()[name = string("x_37_interleave_0"), val = bool(false)]; + tensor x_37_cast_fp16 = concat(axis = var_680, interleave = x_37_interleave_0, values = (x_35_cast_fp16, var_683_cast_fp16))[name = string("x_37_cast_fp16")]; + tensor out_25_axes_0 = const()[name = string("out_25_axes_0"), val = tensor([1])]; + fp16 var_693_to_fp16 = const()[name = string("op_693_to_fp16"), val = fp16(0x1.5p-17)]; + tensor out_25_cast_fp16 = layer_norm(axes = out_25_axes_0, epsilon = var_693_to_fp16, x = x_37_cast_fp16)[name = string("out_25_cast_fp16")]; + tensor layer_layers_2_input_layernorm_weight_to_fp16 = const()[name = string("layer_layers_2_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(68187584)))]; + tensor out_27_cast_fp16 = mul(x = out_25_cast_fp16, y = layer_layers_2_input_layernorm_weight_to_fp16)[name = string("out_27_cast_fp16")]; + tensor var_699_split_sizes_0 = const()[name = string("op_699_split_sizes_0"), val = tensor([1024, 1024])]; + int32 var_699_axis_0 = const()[name = string("op_699_axis_0"), val = int32(1)]; + tensor var_699_cast_fp16_0, tensor var_699_cast_fp16_1 = split(axis = var_699_axis_0, split_sizes = var_699_split_sizes_0, x = out_27_cast_fp16)[name = string("op_699_cast_fp16")]; + string query_states_9_pad_type_0 = const()[name = string("query_states_9_pad_type_0"), val = string("valid")]; + tensor query_states_9_strides_0 = const()[name = string("query_states_9_strides_0"), val = tensor([1, 1])]; + tensor query_states_9_pad_0 = const()[name = string("query_states_9_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor query_states_9_dilations_0 = const()[name = string("query_states_9_dilations_0"), val = tensor([1, 1])]; + int32 query_states_9_groups_0 = const()[name = string("query_states_9_groups_0"), val = int32(1)]; + tensor var_721_to_fp16 = const()[name = string("op_721_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(68191744)))]; + tensor query_states_9_cast_fp16 = conv(dilations = query_states_9_dilations_0, groups = query_states_9_groups_0, pad = query_states_9_pad_0, pad_type = query_states_9_pad_type_0, strides = query_states_9_strides_0, weight = var_721_to_fp16, x = var_699_cast_fp16_0)[name = string("query_states_9_cast_fp16")]; + string key_states_9_pad_type_0 = const()[name = string("key_states_9_pad_type_0"), val = string("valid")]; + tensor key_states_9_strides_0 = const()[name = string("key_states_9_strides_0"), val = tensor([1, 1])]; + tensor key_states_9_pad_0 = const()[name = string("key_states_9_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor key_states_9_dilations_0 = const()[name = string("key_states_9_dilations_0"), val = tensor([1, 1])]; + int32 key_states_9_groups_0 = const()[name = string("key_states_9_groups_0"), val = int32(1)]; + tensor var_732_to_fp16 = const()[name = string("op_732_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(70288960)))]; + tensor key_states_9_cast_fp16 = conv(dilations = key_states_9_dilations_0, groups = key_states_9_groups_0, pad = key_states_9_pad_0, pad_type = key_states_9_pad_type_0, strides = key_states_9_strides_0, weight = var_732_to_fp16, x = var_699_cast_fp16_0)[name = string("key_states_9_cast_fp16")]; + string value_states_9_pad_type_0 = const()[name = string("value_states_9_pad_type_0"), val = string("valid")]; + tensor value_states_9_strides_0 = const()[name = string("value_states_9_strides_0"), val = tensor([1, 1])]; + tensor value_states_9_pad_0 = const()[name = string("value_states_9_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor value_states_9_dilations_0 = const()[name = string("value_states_9_dilations_0"), val = tensor([1, 1])]; + int32 value_states_9_groups_0 = const()[name = string("value_states_9_groups_0"), val = int32(1)]; + tensor var_743_to_fp16 = const()[name = string("op_743_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(70551168)))]; + tensor value_states_9_cast_fp16 = conv(dilations = value_states_9_dilations_0, groups = value_states_9_groups_0, pad = value_states_9_pad_0, pad_type = value_states_9_pad_type_0, strides = value_states_9_strides_0, weight = var_743_to_fp16, x = var_699_cast_fp16_0)[name = string("value_states_9_cast_fp16")]; + tensor var_751 = const()[name = string("op_751"), val = tensor([1, 16, 64, 16])]; + tensor embed_9_cast_fp16 = reshape(shape = var_751, x = query_states_9_cast_fp16)[name = string("embed_9_cast_fp16")]; + tensor var_755 = const()[name = string("op_755"), val = tensor([1, 2, 64, 16])]; + tensor var_756_cast_fp16 = reshape(shape = var_755, x = key_states_9_cast_fp16)[name = string("op_756_cast_fp16")]; + tensor embed_11_perm_0 = const()[name = string("embed_11_perm_0"), val = tensor([0, 1, 3, 2])]; + tensor var_762 = const()[name = string("op_762"), val = tensor([1, 2, 64, 16])]; + tensor var_763_cast_fp16 = reshape(shape = var_762, x = value_states_9_cast_fp16)[name = string("op_763_cast_fp16")]; + tensor value_states_11_perm_0 = const()[name = string("value_states_11_perm_0"), val = tensor([0, 1, 3, 2])]; + tensor var_767_cast_fp16 = mul(x = embed_9_cast_fp16, y = cos_1_cast_fp16)[name = string("op_767_cast_fp16")]; + tensor var_768_split_sizes_0 = const()[name = string("op_768_split_sizes_0"), val = tensor([32, 32])]; + int32 var_768_axis_0 = const()[name = string("op_768_axis_0"), val = int32(-2)]; + tensor var_768_cast_fp16_0, tensor var_768_cast_fp16_1 = split(axis = var_768_axis_0, split_sizes = var_768_split_sizes_0, x = embed_9_cast_fp16)[name = string("op_768_cast_fp16")]; + fp16 const_27_promoted_to_fp16 = const()[name = string("const_27_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_770_cast_fp16 = mul(x = var_768_cast_fp16_1, y = const_27_promoted_to_fp16)[name = string("op_770_cast_fp16")]; + int32 var_772 = const()[name = string("op_772"), val = int32(-2)]; + bool var_773_interleave_0 = const()[name = string("op_773_interleave_0"), val = bool(false)]; + tensor var_773_cast_fp16 = concat(axis = var_772, interleave = var_773_interleave_0, values = (var_770_cast_fp16, var_768_cast_fp16_0))[name = string("op_773_cast_fp16")]; + tensor var_774_cast_fp16 = mul(x = var_773_cast_fp16, y = sin_1_cast_fp16)[name = string("op_774_cast_fp16")]; + tensor query_states_11_cast_fp16 = add(x = var_767_cast_fp16, y = var_774_cast_fp16)[name = string("query_states_11_cast_fp16")]; + tensor embed_11_cast_fp16 = transpose(perm = embed_11_perm_0, x = var_756_cast_fp16)[name = string("transpose_11")]; + tensor var_777_cast_fp16 = mul(x = embed_11_cast_fp16, y = cos_cast_fp16)[name = string("op_777_cast_fp16")]; + tensor var_778_split_sizes_0 = const()[name = string("op_778_split_sizes_0"), val = tensor([32, 32])]; + int32 var_778_axis_0 = const()[name = string("op_778_axis_0"), val = int32(-1)]; + tensor var_778_cast_fp16_0, tensor var_778_cast_fp16_1 = split(axis = var_778_axis_0, split_sizes = var_778_split_sizes_0, x = embed_11_cast_fp16)[name = string("op_778_cast_fp16")]; + fp16 const_28_promoted_to_fp16 = const()[name = string("const_28_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_780_cast_fp16 = mul(x = var_778_cast_fp16_1, y = const_28_promoted_to_fp16)[name = string("op_780_cast_fp16")]; + int32 var_782 = const()[name = string("op_782"), val = int32(-1)]; + bool var_783_interleave_0 = const()[name = string("op_783_interleave_0"), val = bool(false)]; + tensor var_783_cast_fp16 = concat(axis = var_782, interleave = var_783_interleave_0, values = (var_780_cast_fp16, var_778_cast_fp16_0))[name = string("op_783_cast_fp16")]; + tensor var_784_cast_fp16 = mul(x = var_783_cast_fp16, y = sin_cast_fp16)[name = string("op_784_cast_fp16")]; + tensor key_states_11_cast_fp16 = add(x = var_777_cast_fp16, y = var_784_cast_fp16)[name = string("key_states_11_cast_fp16")]; + tensor expand_dims_21 = const()[name = string("expand_dims_21"), val = tensor([2])]; + tensor expand_dims_22 = const()[name = string("expand_dims_22"), val = tensor([0])]; + tensor expand_dims_24 = const()[name = string("expand_dims_24"), val = tensor([3])]; + tensor concat_19_values3_0 = const()[name = string("concat_19_values3_0"), val = tensor([0])]; + int32 concat_19_axis_0 = const()[name = string("concat_19_axis_0"), val = int32(0)]; + bool concat_19_interleave_0 = const()[name = string("concat_19_interleave_0"), val = bool(false)]; + tensor concat_19 = concat(axis = concat_19_axis_0, interleave = concat_19_interleave_0, values = (expand_dims_21, expand_dims_22, position_id, concat_19_values3_0))[name = string("concat_19")]; + tensor concat_20_values1_0 = const()[name = string("concat_20_values1_0"), val = tensor([0])]; + tensor concat_20_values3_0 = const()[name = string("concat_20_values3_0"), val = tensor([0])]; + int32 concat_20_axis_0 = const()[name = string("concat_20_axis_0"), val = int32(0)]; + bool concat_20_interleave_0 = const()[name = string("concat_20_interleave_0"), val = bool(false)]; + tensor concat_20 = concat(axis = concat_20_axis_0, interleave = concat_20_interleave_0, values = (expand_dims_24, concat_20_values1_0, var_246, concat_20_values3_0))[name = string("concat_20")]; + tensor key_cache_internal_tensor_assign_3_stride_0 = const()[name = string("key_cache_internal_tensor_assign_3_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor key_cache_internal_tensor_assign_3_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_3_begin_mask_0"), val = tensor([false, false, false, true])]; + tensor key_cache_internal_tensor_assign_3_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_3_end_mask_0"), val = tensor([false, true, false, true])]; + tensor key_cache_internal_tensor_assign_3_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_3_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor key_cache_internal_tensor_assign_3_cast_fp16 = slice_update(begin = concat_19, begin_mask = key_cache_internal_tensor_assign_3_begin_mask_0, end = concat_20, end_mask = key_cache_internal_tensor_assign_3_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_3_squeeze_mask_0, stride = key_cache_internal_tensor_assign_3_stride_0, update = key_states_11_cast_fp16, x = coreml_update_state_14)[name = string("key_cache_internal_tensor_assign_3_cast_fp16")]; + write_state(data = key_cache_internal_tensor_assign_3_cast_fp16, input = key_cache)[name = string("coreml_update_state_40_write_state")]; + tensor coreml_update_state_16 = read_state(input = key_cache)[name = string("coreml_update_state_40")]; + tensor value_cache_internal_tensor_assign_3_stride_0 = const()[name = string("value_cache_internal_tensor_assign_3_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor value_cache_internal_tensor_assign_3_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_3_begin_mask_0"), val = tensor([false, false, false, true])]; + tensor value_cache_internal_tensor_assign_3_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_3_end_mask_0"), val = tensor([false, true, false, true])]; + tensor value_cache_internal_tensor_assign_3_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_3_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor value_states_11_cast_fp16 = transpose(perm = value_states_11_perm_0, x = var_763_cast_fp16)[name = string("transpose_10")]; + tensor value_cache_internal_tensor_assign_3_cast_fp16 = slice_update(begin = concat_19, begin_mask = value_cache_internal_tensor_assign_3_begin_mask_0, end = concat_20, end_mask = value_cache_internal_tensor_assign_3_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_3_squeeze_mask_0, stride = value_cache_internal_tensor_assign_3_stride_0, update = value_states_11_cast_fp16, x = coreml_update_state_15)[name = string("value_cache_internal_tensor_assign_3_cast_fp16")]; + write_state(data = value_cache_internal_tensor_assign_3_cast_fp16, input = value_cache)[name = string("coreml_update_state_41_write_state")]; + tensor coreml_update_state_17 = read_state(input = value_cache)[name = string("coreml_update_state_41")]; + tensor var_827_begin_0 = const()[name = string("op_827_begin_0"), val = tensor([2, 0, 0, 0])]; + tensor var_827_end_0 = const()[name = string("op_827_end_0"), val = tensor([3, 2, 2048, 64])]; + tensor var_827_end_mask_0 = const()[name = string("op_827_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_827_cast_fp16 = slice_by_index(begin = var_827_begin_0, end = var_827_end_0, end_mask = var_827_end_mask_0, x = coreml_update_state_16)[name = string("op_827_cast_fp16")]; + tensor tile_4 = const()[name = string("tile_4"), val = tensor([1, 1])]; + int32 var_830_axis_0 = const()[name = string("op_830_axis_0"), val = int32(1)]; + tensor var_830_cast_fp16_0, tensor var_830_cast_fp16_1 = split(axis = var_830_axis_0, split_sizes = tile_4, x = var_827_cast_fp16)[name = string("op_830_cast_fp16")]; + tensor var_837_begin_0 = const()[name = string("op_837_begin_0"), val = tensor([2, 0, 0, 0])]; + tensor var_837_end_0 = const()[name = string("op_837_end_0"), val = tensor([3, 2, 2048, 64])]; + tensor var_837_end_mask_0 = const()[name = string("op_837_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_837_cast_fp16 = slice_by_index(begin = var_837_begin_0, end = var_837_end_0, end_mask = var_837_end_mask_0, x = coreml_update_state_17)[name = string("op_837_cast_fp16")]; + tensor tile_5 = const()[name = string("tile_5"), val = tensor([1, 1])]; + int32 var_840_axis_0 = const()[name = string("op_840_axis_0"), val = int32(1)]; + tensor var_840_cast_fp16_0, tensor var_840_cast_fp16_1 = split(axis = var_840_axis_0, split_sizes = tile_5, x = var_837_cast_fp16)[name = string("op_840_cast_fp16")]; + tensor var_843_split_sizes_0 = const()[name = string("op_843_split_sizes_0"), val = tensor([8, 8])]; + int32 var_843_axis_0 = const()[name = string("op_843_axis_0"), val = int32(1)]; + tensor var_843_cast_fp16_0, tensor var_843_cast_fp16_1 = split(axis = var_843_axis_0, split_sizes = var_843_split_sizes_0, x = query_states_11_cast_fp16)[name = string("op_843_cast_fp16")]; + bool attn_weights_33_transpose_x_0 = const()[name = string("attn_weights_33_transpose_x_0"), val = bool(false)]; + bool attn_weights_33_transpose_y_0 = const()[name = string("attn_weights_33_transpose_y_0"), val = bool(false)]; + tensor attn_weights_33_cast_fp16 = matmul(transpose_x = attn_weights_33_transpose_x_0, transpose_y = attn_weights_33_transpose_y_0, x = var_830_cast_fp16_0, y = var_843_cast_fp16_0)[name = string("attn_weights_33_cast_fp16")]; + fp16 _inversed_attn_weights_35_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_35_y_0_to_fp16"), val = fp16(0x1p-3)]; + tensor _inversed_attn_weights_35_cast_fp16 = mul(x = attn_weights_33_cast_fp16, y = _inversed_attn_weights_35_y_0_to_fp16)[name = string("_inversed_attn_weights_35_cast_fp16")]; + tensor attn_weights_37_cast_fp16 = add(x = _inversed_attn_weights_35_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_37_cast_fp16")]; + int32 var_850 = const()[name = string("op_850"), val = int32(2)]; + tensor attn_weights_39_cast_fp16 = softmax(axis = var_850, x = attn_weights_37_cast_fp16)[name = string("attn_weights_39_cast_fp16")]; + bool var_856_transpose_x_1 = const()[name = string("op_856_transpose_x_1"), val = bool(true)]; + bool var_856_transpose_y_1 = const()[name = string("op_856_transpose_y_1"), val = bool(false)]; + tensor var_856_cast_fp16 = matmul(transpose_x = var_856_transpose_x_1, transpose_y = var_856_transpose_y_1, x = attn_weights_39_cast_fp16, y = var_840_cast_fp16_0)[name = string("op_856_cast_fp16")]; + bool attn_weights_41_transpose_x_0 = const()[name = string("attn_weights_41_transpose_x_0"), val = bool(false)]; + bool attn_weights_41_transpose_y_0 = const()[name = string("attn_weights_41_transpose_y_0"), val = bool(false)]; + tensor attn_weights_41_cast_fp16 = matmul(transpose_x = attn_weights_41_transpose_x_0, transpose_y = attn_weights_41_transpose_y_0, x = var_830_cast_fp16_1, y = var_843_cast_fp16_1)[name = string("attn_weights_41_cast_fp16")]; + fp16 _inversed_attn_weights_43_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_43_y_0_to_fp16"), val = fp16(0x1p-3)]; + tensor _inversed_attn_weights_43_cast_fp16 = mul(x = attn_weights_41_cast_fp16, y = _inversed_attn_weights_43_y_0_to_fp16)[name = string("_inversed_attn_weights_43_cast_fp16")]; + tensor attn_weights_45_cast_fp16 = add(x = _inversed_attn_weights_43_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_45_cast_fp16")]; + int32 var_862 = const()[name = string("op_862"), val = int32(2)]; + tensor attn_weights_47_cast_fp16 = softmax(axis = var_862, x = attn_weights_45_cast_fp16)[name = string("attn_weights_47_cast_fp16")]; + bool attn_output_13_transpose_x_1 = const()[name = string("attn_output_13_transpose_x_1"), val = bool(true)]; + bool attn_output_13_transpose_y_1 = const()[name = string("attn_output_13_transpose_y_1"), val = bool(false)]; + tensor attn_output_13_cast_fp16 = matmul(transpose_x = attn_output_13_transpose_x_1, transpose_y = attn_output_13_transpose_y_1, x = attn_weights_47_cast_fp16, y = var_840_cast_fp16_1)[name = string("attn_output_13_cast_fp16")]; + int32 var_870 = const()[name = string("op_870"), val = int32(1)]; + bool attn_output_15_interleave_0 = const()[name = string("attn_output_15_interleave_0"), val = bool(false)]; + tensor attn_output_15_cast_fp16 = concat(axis = var_870, interleave = attn_output_15_interleave_0, values = (var_856_cast_fp16, attn_output_13_cast_fp16))[name = string("attn_output_15_cast_fp16")]; + tensor var_874_perm_0 = const()[name = string("op_874_perm_0"), val = tensor([0, 1, 3, 2])]; + tensor var_879 = const()[name = string("op_879"), val = tensor([1, 1024, 1, 16])]; + tensor var_874_cast_fp16 = transpose(perm = var_874_perm_0, x = attn_output_15_cast_fp16)[name = string("transpose_9")]; + tensor x_41_cast_fp16 = reshape(shape = var_879, x = var_874_cast_fp16)[name = string("x_41_cast_fp16")]; + string hidden_states_15_pad_type_0 = const()[name = string("hidden_states_15_pad_type_0"), val = string("valid")]; + tensor hidden_states_15_strides_0 = const()[name = string("hidden_states_15_strides_0"), val = tensor([1, 1])]; + tensor hidden_states_15_pad_0 = const()[name = string("hidden_states_15_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor hidden_states_15_dilations_0 = const()[name = string("hidden_states_15_dilations_0"), val = tensor([1, 1])]; + int32 hidden_states_15_groups_0 = const()[name = string("hidden_states_15_groups_0"), val = int32(1)]; + tensor var_886_to_fp16 = const()[name = string("op_886_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(70813376)))]; + tensor hidden_states_15_cast_fp16 = conv(dilations = hidden_states_15_dilations_0, groups = hidden_states_15_groups_0, pad = hidden_states_15_pad_0, pad_type = hidden_states_15_pad_type_0, strides = hidden_states_15_strides_0, weight = var_886_to_fp16, x = x_41_cast_fp16)[name = string("hidden_states_15_cast_fp16")]; + tensor x_43_cast_fp16 = add(x = x_35_cast_fp16, y = hidden_states_15_cast_fp16)[name = string("x_43_cast_fp16")]; + int32 var_898 = const()[name = string("op_898"), val = int32(1)]; + fp16 const_33_promoted_to_fp16 = const()[name = string("const_33_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_901_cast_fp16 = mul(x = x_43_cast_fp16, y = const_33_promoted_to_fp16)[name = string("op_901_cast_fp16")]; + bool x_45_interleave_0 = const()[name = string("x_45_interleave_0"), val = bool(false)]; + tensor x_45_cast_fp16 = concat(axis = var_898, interleave = x_45_interleave_0, values = (x_43_cast_fp16, var_901_cast_fp16))[name = string("x_45_cast_fp16")]; + tensor out_31_axes_0 = const()[name = string("out_31_axes_0"), val = tensor([1])]; + fp16 var_911_to_fp16 = const()[name = string("op_911_to_fp16"), val = fp16(0x1.5p-17)]; + tensor out_31_cast_fp16 = layer_norm(axes = out_31_axes_0, epsilon = var_911_to_fp16, x = x_45_cast_fp16)[name = string("out_31_cast_fp16")]; + tensor layer_layers_2_post_attention_layernorm_weight_to_fp16 = const()[name = string("layer_layers_2_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(72910592)))]; + tensor out_33_cast_fp16 = mul(x = out_31_cast_fp16, y = layer_layers_2_post_attention_layernorm_weight_to_fp16)[name = string("out_33_cast_fp16")]; + tensor var_917_split_sizes_0 = const()[name = string("op_917_split_sizes_0"), val = tensor([1024, 1024])]; + int32 var_917_axis_0 = const()[name = string("op_917_axis_0"), val = int32(1)]; + tensor var_917_cast_fp16_0, tensor var_917_cast_fp16_1 = split(axis = var_917_axis_0, split_sizes = var_917_split_sizes_0, x = out_33_cast_fp16)[name = string("op_917_cast_fp16")]; + string input_5_pad_type_0 = const()[name = string("input_5_pad_type_0"), val = string("valid")]; + tensor input_5_strides_0 = const()[name = string("input_5_strides_0"), val = tensor([1, 1])]; + tensor input_5_pad_0 = const()[name = string("input_5_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor input_5_dilations_0 = const()[name = string("input_5_dilations_0"), val = tensor([1, 1])]; + int32 input_5_groups_0 = const()[name = string("input_5_groups_0"), val = int32(1)]; + tensor var_922_to_fp16 = const()[name = string("op_922_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(72914752)))]; + tensor input_5_cast_fp16 = conv(dilations = input_5_dilations_0, groups = input_5_groups_0, pad = input_5_pad_0, pad_type = input_5_pad_type_0, strides = input_5_strides_0, weight = var_922_to_fp16, x = var_917_cast_fp16_0)[name = string("input_5_cast_fp16")]; + tensor var_933_cast_fp16 = silu(x = input_5_cast_fp16)[name = string("op_933_cast_fp16")]; + string var_938_pad_type_0 = const()[name = string("op_938_pad_type_0"), val = string("valid")]; + tensor var_938_strides_0 = const()[name = string("op_938_strides_0"), val = tensor([1, 1])]; + tensor var_938_pad_0 = const()[name = string("op_938_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_938_dilations_0 = const()[name = string("op_938_dilations_0"), val = tensor([1, 1])]; + int32 var_938_groups_0 = const()[name = string("op_938_groups_0"), val = int32(1)]; + tensor var_921_to_fp16 = const()[name = string("op_921_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(81303424)))]; + tensor var_938_cast_fp16 = conv(dilations = var_938_dilations_0, groups = var_938_groups_0, pad = var_938_pad_0, pad_type = var_938_pad_type_0, strides = var_938_strides_0, weight = var_921_to_fp16, x = var_917_cast_fp16_0)[name = string("op_938_cast_fp16")]; + tensor x_51_cast_fp16 = mul(x = var_933_cast_fp16, y = var_938_cast_fp16)[name = string("x_51_cast_fp16")]; + string hidden_states_17_pad_type_0 = const()[name = string("hidden_states_17_pad_type_0"), val = string("valid")]; + tensor hidden_states_17_strides_0 = const()[name = string("hidden_states_17_strides_0"), val = tensor([1, 1])]; + tensor hidden_states_17_pad_0 = const()[name = string("hidden_states_17_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor hidden_states_17_dilations_0 = const()[name = string("hidden_states_17_dilations_0"), val = tensor([1, 1])]; + int32 hidden_states_17_groups_0 = const()[name = string("hidden_states_17_groups_0"), val = int32(1)]; + tensor var_920_to_fp16 = const()[name = string("op_920_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(89692096)))]; + tensor hidden_states_17_cast_fp16 = conv(dilations = hidden_states_17_dilations_0, groups = hidden_states_17_groups_0, pad = hidden_states_17_pad_0, pad_type = hidden_states_17_pad_type_0, strides = hidden_states_17_strides_0, weight = var_920_to_fp16, x = x_51_cast_fp16)[name = string("hidden_states_17_cast_fp16")]; + tensor x_53_cast_fp16 = add(x = x_43_cast_fp16, y = hidden_states_17_cast_fp16)[name = string("x_53_cast_fp16")]; + int32 var_951 = const()[name = string("op_951"), val = int32(1)]; + fp16 const_34_promoted_to_fp16 = const()[name = string("const_34_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_954_cast_fp16 = mul(x = x_53_cast_fp16, y = const_34_promoted_to_fp16)[name = string("op_954_cast_fp16")]; + bool x_55_interleave_0 = const()[name = string("x_55_interleave_0"), val = bool(false)]; + tensor x_55_cast_fp16 = concat(axis = var_951, interleave = x_55_interleave_0, values = (x_53_cast_fp16, var_954_cast_fp16))[name = string("x_55_cast_fp16")]; + tensor out_37_axes_0 = const()[name = string("out_37_axes_0"), val = tensor([1])]; + fp16 var_964_to_fp16 = const()[name = string("op_964_to_fp16"), val = fp16(0x1.5p-17)]; + tensor out_37_cast_fp16 = layer_norm(axes = out_37_axes_0, epsilon = var_964_to_fp16, x = x_55_cast_fp16)[name = string("out_37_cast_fp16")]; + tensor layer_layers_3_input_layernorm_weight_to_fp16 = const()[name = string("layer_layers_3_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(98080768)))]; + tensor out_39_cast_fp16 = mul(x = out_37_cast_fp16, y = layer_layers_3_input_layernorm_weight_to_fp16)[name = string("out_39_cast_fp16")]; + tensor var_970_split_sizes_0 = const()[name = string("op_970_split_sizes_0"), val = tensor([1024, 1024])]; + int32 var_970_axis_0 = const()[name = string("op_970_axis_0"), val = int32(1)]; + tensor var_970_cast_fp16_0, tensor var_970_cast_fp16_1 = split(axis = var_970_axis_0, split_sizes = var_970_split_sizes_0, x = out_39_cast_fp16)[name = string("op_970_cast_fp16")]; + string query_states_13_pad_type_0 = const()[name = string("query_states_13_pad_type_0"), val = string("valid")]; + tensor query_states_13_strides_0 = const()[name = string("query_states_13_strides_0"), val = tensor([1, 1])]; + tensor query_states_13_pad_0 = const()[name = string("query_states_13_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor query_states_13_dilations_0 = const()[name = string("query_states_13_dilations_0"), val = tensor([1, 1])]; + int32 query_states_13_groups_0 = const()[name = string("query_states_13_groups_0"), val = int32(1)]; + tensor var_992_to_fp16 = const()[name = string("op_992_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(98084928)))]; + tensor query_states_13_cast_fp16 = conv(dilations = query_states_13_dilations_0, groups = query_states_13_groups_0, pad = query_states_13_pad_0, pad_type = query_states_13_pad_type_0, strides = query_states_13_strides_0, weight = var_992_to_fp16, x = var_970_cast_fp16_0)[name = string("query_states_13_cast_fp16")]; + string key_states_13_pad_type_0 = const()[name = string("key_states_13_pad_type_0"), val = string("valid")]; + tensor key_states_13_strides_0 = const()[name = string("key_states_13_strides_0"), val = tensor([1, 1])]; + tensor key_states_13_pad_0 = const()[name = string("key_states_13_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor key_states_13_dilations_0 = const()[name = string("key_states_13_dilations_0"), val = tensor([1, 1])]; + int32 key_states_13_groups_0 = const()[name = string("key_states_13_groups_0"), val = int32(1)]; + tensor var_1003_to_fp16 = const()[name = string("op_1003_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(100182144)))]; + tensor key_states_13_cast_fp16 = conv(dilations = key_states_13_dilations_0, groups = key_states_13_groups_0, pad = key_states_13_pad_0, pad_type = key_states_13_pad_type_0, strides = key_states_13_strides_0, weight = var_1003_to_fp16, x = var_970_cast_fp16_0)[name = string("key_states_13_cast_fp16")]; + string value_states_13_pad_type_0 = const()[name = string("value_states_13_pad_type_0"), val = string("valid")]; + tensor value_states_13_strides_0 = const()[name = string("value_states_13_strides_0"), val = tensor([1, 1])]; + tensor value_states_13_pad_0 = const()[name = string("value_states_13_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor value_states_13_dilations_0 = const()[name = string("value_states_13_dilations_0"), val = tensor([1, 1])]; + int32 value_states_13_groups_0 = const()[name = string("value_states_13_groups_0"), val = int32(1)]; + tensor var_1014_to_fp16 = const()[name = string("op_1014_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(100444352)))]; + tensor value_states_13_cast_fp16 = conv(dilations = value_states_13_dilations_0, groups = value_states_13_groups_0, pad = value_states_13_pad_0, pad_type = value_states_13_pad_type_0, strides = value_states_13_strides_0, weight = var_1014_to_fp16, x = var_970_cast_fp16_0)[name = string("value_states_13_cast_fp16")]; + tensor var_1022 = const()[name = string("op_1022"), val = tensor([1, 16, 64, 16])]; + tensor embed_13_cast_fp16 = reshape(shape = var_1022, x = query_states_13_cast_fp16)[name = string("embed_13_cast_fp16")]; + tensor var_1026 = const()[name = string("op_1026"), val = tensor([1, 2, 64, 16])]; + tensor var_1027_cast_fp16 = reshape(shape = var_1026, x = key_states_13_cast_fp16)[name = string("op_1027_cast_fp16")]; + tensor embed_15_perm_0 = const()[name = string("embed_15_perm_0"), val = tensor([0, 1, 3, 2])]; + tensor var_1033 = const()[name = string("op_1033"), val = tensor([1, 2, 64, 16])]; + tensor var_1034_cast_fp16 = reshape(shape = var_1033, x = value_states_13_cast_fp16)[name = string("op_1034_cast_fp16")]; + tensor value_states_15_perm_0 = const()[name = string("value_states_15_perm_0"), val = tensor([0, 1, 3, 2])]; + tensor var_1038_cast_fp16 = mul(x = embed_13_cast_fp16, y = cos_1_cast_fp16)[name = string("op_1038_cast_fp16")]; + tensor var_1039_split_sizes_0 = const()[name = string("op_1039_split_sizes_0"), val = tensor([32, 32])]; + int32 var_1039_axis_0 = const()[name = string("op_1039_axis_0"), val = int32(-2)]; + tensor var_1039_cast_fp16_0, tensor var_1039_cast_fp16_1 = split(axis = var_1039_axis_0, split_sizes = var_1039_split_sizes_0, x = embed_13_cast_fp16)[name = string("op_1039_cast_fp16")]; + fp16 const_37_promoted_to_fp16 = const()[name = string("const_37_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_1041_cast_fp16 = mul(x = var_1039_cast_fp16_1, y = const_37_promoted_to_fp16)[name = string("op_1041_cast_fp16")]; + int32 var_1043 = const()[name = string("op_1043"), val = int32(-2)]; + bool var_1044_interleave_0 = const()[name = string("op_1044_interleave_0"), val = bool(false)]; + tensor var_1044_cast_fp16 = concat(axis = var_1043, interleave = var_1044_interleave_0, values = (var_1041_cast_fp16, var_1039_cast_fp16_0))[name = string("op_1044_cast_fp16")]; + tensor var_1045_cast_fp16 = mul(x = var_1044_cast_fp16, y = sin_1_cast_fp16)[name = string("op_1045_cast_fp16")]; + tensor query_states_15_cast_fp16 = add(x = var_1038_cast_fp16, y = var_1045_cast_fp16)[name = string("query_states_15_cast_fp16")]; + tensor embed_15_cast_fp16 = transpose(perm = embed_15_perm_0, x = var_1027_cast_fp16)[name = string("transpose_8")]; + tensor var_1048_cast_fp16 = mul(x = embed_15_cast_fp16, y = cos_cast_fp16)[name = string("op_1048_cast_fp16")]; + tensor var_1049_split_sizes_0 = const()[name = string("op_1049_split_sizes_0"), val = tensor([32, 32])]; + int32 var_1049_axis_0 = const()[name = string("op_1049_axis_0"), val = int32(-1)]; + tensor var_1049_cast_fp16_0, tensor var_1049_cast_fp16_1 = split(axis = var_1049_axis_0, split_sizes = var_1049_split_sizes_0, x = embed_15_cast_fp16)[name = string("op_1049_cast_fp16")]; + fp16 const_38_promoted_to_fp16 = const()[name = string("const_38_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_1051_cast_fp16 = mul(x = var_1049_cast_fp16_1, y = const_38_promoted_to_fp16)[name = string("op_1051_cast_fp16")]; + int32 var_1053 = const()[name = string("op_1053"), val = int32(-1)]; + bool var_1054_interleave_0 = const()[name = string("op_1054_interleave_0"), val = bool(false)]; + tensor var_1054_cast_fp16 = concat(axis = var_1053, interleave = var_1054_interleave_0, values = (var_1051_cast_fp16, var_1049_cast_fp16_0))[name = string("op_1054_cast_fp16")]; + tensor var_1055_cast_fp16 = mul(x = var_1054_cast_fp16, y = sin_cast_fp16)[name = string("op_1055_cast_fp16")]; + tensor key_states_15_cast_fp16 = add(x = var_1048_cast_fp16, y = var_1055_cast_fp16)[name = string("key_states_15_cast_fp16")]; + tensor expand_dims_31 = const()[name = string("expand_dims_31"), val = tensor([3])]; + tensor expand_dims_32 = const()[name = string("expand_dims_32"), val = tensor([0])]; + tensor expand_dims_34 = const()[name = string("expand_dims_34"), val = tensor([4])]; + tensor concat_27_values3_0 = const()[name = string("concat_27_values3_0"), val = tensor([0])]; + int32 concat_27_axis_0 = const()[name = string("concat_27_axis_0"), val = int32(0)]; + bool concat_27_interleave_0 = const()[name = string("concat_27_interleave_0"), val = bool(false)]; + tensor concat_27 = concat(axis = concat_27_axis_0, interleave = concat_27_interleave_0, values = (expand_dims_31, expand_dims_32, position_id, concat_27_values3_0))[name = string("concat_27")]; + tensor concat_28_values1_0 = const()[name = string("concat_28_values1_0"), val = tensor([0])]; + tensor concat_28_values3_0 = const()[name = string("concat_28_values3_0"), val = tensor([0])]; + int32 concat_28_axis_0 = const()[name = string("concat_28_axis_0"), val = int32(0)]; + bool concat_28_interleave_0 = const()[name = string("concat_28_interleave_0"), val = bool(false)]; + tensor concat_28 = concat(axis = concat_28_axis_0, interleave = concat_28_interleave_0, values = (expand_dims_34, concat_28_values1_0, var_246, concat_28_values3_0))[name = string("concat_28")]; + tensor key_cache_internal_tensor_assign_4_stride_0 = const()[name = string("key_cache_internal_tensor_assign_4_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor key_cache_internal_tensor_assign_4_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_4_begin_mask_0"), val = tensor([false, false, false, true])]; + tensor key_cache_internal_tensor_assign_4_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_4_end_mask_0"), val = tensor([false, true, false, true])]; + tensor key_cache_internal_tensor_assign_4_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_4_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor key_cache_internal_tensor_assign_4_cast_fp16 = slice_update(begin = concat_27, begin_mask = key_cache_internal_tensor_assign_4_begin_mask_0, end = concat_28, end_mask = key_cache_internal_tensor_assign_4_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_4_squeeze_mask_0, stride = key_cache_internal_tensor_assign_4_stride_0, update = key_states_15_cast_fp16, x = coreml_update_state_16)[name = string("key_cache_internal_tensor_assign_4_cast_fp16")]; + write_state(data = key_cache_internal_tensor_assign_4_cast_fp16, input = key_cache)[name = string("coreml_update_state_42_write_state")]; + tensor coreml_update_state_18 = read_state(input = key_cache)[name = string("coreml_update_state_42")]; + tensor value_cache_internal_tensor_assign_4_stride_0 = const()[name = string("value_cache_internal_tensor_assign_4_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor value_cache_internal_tensor_assign_4_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_4_begin_mask_0"), val = tensor([false, false, false, true])]; + tensor value_cache_internal_tensor_assign_4_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_4_end_mask_0"), val = tensor([false, true, false, true])]; + tensor value_cache_internal_tensor_assign_4_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_4_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor value_states_15_cast_fp16 = transpose(perm = value_states_15_perm_0, x = var_1034_cast_fp16)[name = string("transpose_7")]; + tensor value_cache_internal_tensor_assign_4_cast_fp16 = slice_update(begin = concat_27, begin_mask = value_cache_internal_tensor_assign_4_begin_mask_0, end = concat_28, end_mask = value_cache_internal_tensor_assign_4_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_4_squeeze_mask_0, stride = value_cache_internal_tensor_assign_4_stride_0, update = value_states_15_cast_fp16, x = coreml_update_state_17)[name = string("value_cache_internal_tensor_assign_4_cast_fp16")]; + write_state(data = value_cache_internal_tensor_assign_4_cast_fp16, input = value_cache)[name = string("coreml_update_state_43_write_state")]; + tensor coreml_update_state_19 = read_state(input = value_cache)[name = string("coreml_update_state_43")]; + tensor var_1098_begin_0 = const()[name = string("op_1098_begin_0"), val = tensor([3, 0, 0, 0])]; + tensor var_1098_end_0 = const()[name = string("op_1098_end_0"), val = tensor([4, 2, 2048, 64])]; + tensor var_1098_end_mask_0 = const()[name = string("op_1098_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_1098_cast_fp16 = slice_by_index(begin = var_1098_begin_0, end = var_1098_end_0, end_mask = var_1098_end_mask_0, x = coreml_update_state_18)[name = string("op_1098_cast_fp16")]; + tensor tile_6 = const()[name = string("tile_6"), val = tensor([1, 1])]; + int32 var_1101_axis_0 = const()[name = string("op_1101_axis_0"), val = int32(1)]; + tensor var_1101_cast_fp16_0, tensor var_1101_cast_fp16_1 = split(axis = var_1101_axis_0, split_sizes = tile_6, x = var_1098_cast_fp16)[name = string("op_1101_cast_fp16")]; + tensor var_1108_begin_0 = const()[name = string("op_1108_begin_0"), val = tensor([3, 0, 0, 0])]; + tensor var_1108_end_0 = const()[name = string("op_1108_end_0"), val = tensor([4, 2, 2048, 64])]; + tensor var_1108_end_mask_0 = const()[name = string("op_1108_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_1108_cast_fp16 = slice_by_index(begin = var_1108_begin_0, end = var_1108_end_0, end_mask = var_1108_end_mask_0, x = coreml_update_state_19)[name = string("op_1108_cast_fp16")]; + tensor tile_7 = const()[name = string("tile_7"), val = tensor([1, 1])]; + int32 var_1111_axis_0 = const()[name = string("op_1111_axis_0"), val = int32(1)]; + tensor var_1111_cast_fp16_0, tensor var_1111_cast_fp16_1 = split(axis = var_1111_axis_0, split_sizes = tile_7, x = var_1108_cast_fp16)[name = string("op_1111_cast_fp16")]; + tensor var_1114_split_sizes_0 = const()[name = string("op_1114_split_sizes_0"), val = tensor([8, 8])]; + int32 var_1114_axis_0 = const()[name = string("op_1114_axis_0"), val = int32(1)]; + tensor var_1114_cast_fp16_0, tensor var_1114_cast_fp16_1 = split(axis = var_1114_axis_0, split_sizes = var_1114_split_sizes_0, x = query_states_15_cast_fp16)[name = string("op_1114_cast_fp16")]; + bool attn_weights_49_transpose_x_0 = const()[name = string("attn_weights_49_transpose_x_0"), val = bool(false)]; + bool attn_weights_49_transpose_y_0 = const()[name = string("attn_weights_49_transpose_y_0"), val = bool(false)]; + tensor attn_weights_49_cast_fp16 = matmul(transpose_x = attn_weights_49_transpose_x_0, transpose_y = attn_weights_49_transpose_y_0, x = var_1101_cast_fp16_0, y = var_1114_cast_fp16_0)[name = string("attn_weights_49_cast_fp16")]; + fp16 _inversed_attn_weights_51_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_51_y_0_to_fp16"), val = fp16(0x1p-3)]; + tensor _inversed_attn_weights_51_cast_fp16 = mul(x = attn_weights_49_cast_fp16, y = _inversed_attn_weights_51_y_0_to_fp16)[name = string("_inversed_attn_weights_51_cast_fp16")]; + tensor attn_weights_53_cast_fp16 = add(x = _inversed_attn_weights_51_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_53_cast_fp16")]; + int32 var_1121 = const()[name = string("op_1121"), val = int32(2)]; + tensor attn_weights_55_cast_fp16 = softmax(axis = var_1121, x = attn_weights_53_cast_fp16)[name = string("attn_weights_55_cast_fp16")]; + bool var_1127_transpose_x_1 = const()[name = string("op_1127_transpose_x_1"), val = bool(true)]; + bool var_1127_transpose_y_1 = const()[name = string("op_1127_transpose_y_1"), val = bool(false)]; + tensor var_1127_cast_fp16 = matmul(transpose_x = var_1127_transpose_x_1, transpose_y = var_1127_transpose_y_1, x = attn_weights_55_cast_fp16, y = var_1111_cast_fp16_0)[name = string("op_1127_cast_fp16")]; + bool attn_weights_57_transpose_x_0 = const()[name = string("attn_weights_57_transpose_x_0"), val = bool(false)]; + bool attn_weights_57_transpose_y_0 = const()[name = string("attn_weights_57_transpose_y_0"), val = bool(false)]; + tensor attn_weights_57_cast_fp16 = matmul(transpose_x = attn_weights_57_transpose_x_0, transpose_y = attn_weights_57_transpose_y_0, x = var_1101_cast_fp16_1, y = var_1114_cast_fp16_1)[name = string("attn_weights_57_cast_fp16")]; + fp16 _inversed_attn_weights_59_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_59_y_0_to_fp16"), val = fp16(0x1p-3)]; + tensor _inversed_attn_weights_59_cast_fp16 = mul(x = attn_weights_57_cast_fp16, y = _inversed_attn_weights_59_y_0_to_fp16)[name = string("_inversed_attn_weights_59_cast_fp16")]; + tensor attn_weights_61_cast_fp16 = add(x = _inversed_attn_weights_59_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_61_cast_fp16")]; + int32 var_1133 = const()[name = string("op_1133"), val = int32(2)]; + tensor attn_weights_63_cast_fp16 = softmax(axis = var_1133, x = attn_weights_61_cast_fp16)[name = string("attn_weights_63_cast_fp16")]; + bool attn_output_19_transpose_x_1 = const()[name = string("attn_output_19_transpose_x_1"), val = bool(true)]; + bool attn_output_19_transpose_y_1 = const()[name = string("attn_output_19_transpose_y_1"), val = bool(false)]; + tensor attn_output_19_cast_fp16 = matmul(transpose_x = attn_output_19_transpose_x_1, transpose_y = attn_output_19_transpose_y_1, x = attn_weights_63_cast_fp16, y = var_1111_cast_fp16_1)[name = string("attn_output_19_cast_fp16")]; + int32 var_1141 = const()[name = string("op_1141"), val = int32(1)]; + bool attn_output_21_interleave_0 = const()[name = string("attn_output_21_interleave_0"), val = bool(false)]; + tensor attn_output_21_cast_fp16 = concat(axis = var_1141, interleave = attn_output_21_interleave_0, values = (var_1127_cast_fp16, attn_output_19_cast_fp16))[name = string("attn_output_21_cast_fp16")]; + tensor var_1145_perm_0 = const()[name = string("op_1145_perm_0"), val = tensor([0, 1, 3, 2])]; + tensor var_1150 = const()[name = string("op_1150"), val = tensor([1, 1024, 1, 16])]; + tensor var_1145_cast_fp16 = transpose(perm = var_1145_perm_0, x = attn_output_21_cast_fp16)[name = string("transpose_6")]; + tensor x_59_cast_fp16 = reshape(shape = var_1150, x = var_1145_cast_fp16)[name = string("x_59_cast_fp16")]; + string hidden_states_21_pad_type_0 = const()[name = string("hidden_states_21_pad_type_0"), val = string("valid")]; + tensor hidden_states_21_strides_0 = const()[name = string("hidden_states_21_strides_0"), val = tensor([1, 1])]; + tensor hidden_states_21_pad_0 = const()[name = string("hidden_states_21_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor hidden_states_21_dilations_0 = const()[name = string("hidden_states_21_dilations_0"), val = tensor([1, 1])]; + int32 hidden_states_21_groups_0 = const()[name = string("hidden_states_21_groups_0"), val = int32(1)]; + tensor var_1157_to_fp16 = const()[name = string("op_1157_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(100706560)))]; + tensor hidden_states_21_cast_fp16 = conv(dilations = hidden_states_21_dilations_0, groups = hidden_states_21_groups_0, pad = hidden_states_21_pad_0, pad_type = hidden_states_21_pad_type_0, strides = hidden_states_21_strides_0, weight = var_1157_to_fp16, x = x_59_cast_fp16)[name = string("hidden_states_21_cast_fp16")]; + tensor x_61_cast_fp16 = add(x = x_53_cast_fp16, y = hidden_states_21_cast_fp16)[name = string("x_61_cast_fp16")]; + int32 var_1169 = const()[name = string("op_1169"), val = int32(1)]; + fp16 const_43_promoted_to_fp16 = const()[name = string("const_43_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_1172_cast_fp16 = mul(x = x_61_cast_fp16, y = const_43_promoted_to_fp16)[name = string("op_1172_cast_fp16")]; + bool x_63_interleave_0 = const()[name = string("x_63_interleave_0"), val = bool(false)]; + tensor x_63_cast_fp16 = concat(axis = var_1169, interleave = x_63_interleave_0, values = (x_61_cast_fp16, var_1172_cast_fp16))[name = string("x_63_cast_fp16")]; + tensor out_43_axes_0 = const()[name = string("out_43_axes_0"), val = tensor([1])]; + fp16 var_1182_to_fp16 = const()[name = string("op_1182_to_fp16"), val = fp16(0x1.5p-17)]; + tensor out_43_cast_fp16 = layer_norm(axes = out_43_axes_0, epsilon = var_1182_to_fp16, x = x_63_cast_fp16)[name = string("out_43_cast_fp16")]; + tensor layer_layers_3_post_attention_layernorm_weight_to_fp16 = const()[name = string("layer_layers_3_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(102803776)))]; + tensor out_45_cast_fp16 = mul(x = out_43_cast_fp16, y = layer_layers_3_post_attention_layernorm_weight_to_fp16)[name = string("out_45_cast_fp16")]; + tensor var_1188_split_sizes_0 = const()[name = string("op_1188_split_sizes_0"), val = tensor([1024, 1024])]; + int32 var_1188_axis_0 = const()[name = string("op_1188_axis_0"), val = int32(1)]; + tensor var_1188_cast_fp16_0, tensor var_1188_cast_fp16_1 = split(axis = var_1188_axis_0, split_sizes = var_1188_split_sizes_0, x = out_45_cast_fp16)[name = string("op_1188_cast_fp16")]; + string input_7_pad_type_0 = const()[name = string("input_7_pad_type_0"), val = string("valid")]; + tensor input_7_strides_0 = const()[name = string("input_7_strides_0"), val = tensor([1, 1])]; + tensor input_7_pad_0 = const()[name = string("input_7_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor input_7_dilations_0 = const()[name = string("input_7_dilations_0"), val = tensor([1, 1])]; + int32 input_7_groups_0 = const()[name = string("input_7_groups_0"), val = int32(1)]; + tensor var_1193_to_fp16 = const()[name = string("op_1193_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(102807936)))]; + tensor input_7_cast_fp16 = conv(dilations = input_7_dilations_0, groups = input_7_groups_0, pad = input_7_pad_0, pad_type = input_7_pad_type_0, strides = input_7_strides_0, weight = var_1193_to_fp16, x = var_1188_cast_fp16_0)[name = string("input_7_cast_fp16")]; + tensor var_1204_cast_fp16 = silu(x = input_7_cast_fp16)[name = string("op_1204_cast_fp16")]; + string var_1209_pad_type_0 = const()[name = string("op_1209_pad_type_0"), val = string("valid")]; + tensor var_1209_strides_0 = const()[name = string("op_1209_strides_0"), val = tensor([1, 1])]; + tensor var_1209_pad_0 = const()[name = string("op_1209_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1209_dilations_0 = const()[name = string("op_1209_dilations_0"), val = tensor([1, 1])]; + int32 var_1209_groups_0 = const()[name = string("op_1209_groups_0"), val = int32(1)]; + tensor var_1192_to_fp16 = const()[name = string("op_1192_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(111196608)))]; + tensor var_1209_cast_fp16 = conv(dilations = var_1209_dilations_0, groups = var_1209_groups_0, pad = var_1209_pad_0, pad_type = var_1209_pad_type_0, strides = var_1209_strides_0, weight = var_1192_to_fp16, x = var_1188_cast_fp16_0)[name = string("op_1209_cast_fp16")]; + tensor x_69_cast_fp16 = mul(x = var_1204_cast_fp16, y = var_1209_cast_fp16)[name = string("x_69_cast_fp16")]; + string hidden_states_23_pad_type_0 = const()[name = string("hidden_states_23_pad_type_0"), val = string("valid")]; + tensor hidden_states_23_strides_0 = const()[name = string("hidden_states_23_strides_0"), val = tensor([1, 1])]; + tensor hidden_states_23_pad_0 = const()[name = string("hidden_states_23_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor hidden_states_23_dilations_0 = const()[name = string("hidden_states_23_dilations_0"), val = tensor([1, 1])]; + int32 hidden_states_23_groups_0 = const()[name = string("hidden_states_23_groups_0"), val = int32(1)]; + tensor var_1191_to_fp16 = const()[name = string("op_1191_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(119585280)))]; + tensor hidden_states_23_cast_fp16 = conv(dilations = hidden_states_23_dilations_0, groups = hidden_states_23_groups_0, pad = hidden_states_23_pad_0, pad_type = hidden_states_23_pad_type_0, strides = hidden_states_23_strides_0, weight = var_1191_to_fp16, x = x_69_cast_fp16)[name = string("hidden_states_23_cast_fp16")]; + tensor x_71_cast_fp16 = add(x = x_61_cast_fp16, y = hidden_states_23_cast_fp16)[name = string("x_71_cast_fp16")]; + int32 var_1222 = const()[name = string("op_1222"), val = int32(1)]; + fp16 const_44_promoted_to_fp16 = const()[name = string("const_44_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_1225_cast_fp16 = mul(x = x_71_cast_fp16, y = const_44_promoted_to_fp16)[name = string("op_1225_cast_fp16")]; + bool x_73_interleave_0 = const()[name = string("x_73_interleave_0"), val = bool(false)]; + tensor x_73_cast_fp16 = concat(axis = var_1222, interleave = x_73_interleave_0, values = (x_71_cast_fp16, var_1225_cast_fp16))[name = string("x_73_cast_fp16")]; + tensor out_49_axes_0 = const()[name = string("out_49_axes_0"), val = tensor([1])]; + fp16 var_1235_to_fp16 = const()[name = string("op_1235_to_fp16"), val = fp16(0x1.5p-17)]; + tensor out_49_cast_fp16 = layer_norm(axes = out_49_axes_0, epsilon = var_1235_to_fp16, x = x_73_cast_fp16)[name = string("out_49_cast_fp16")]; + tensor layer_layers_4_input_layernorm_weight_to_fp16 = const()[name = string("layer_layers_4_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(127973952)))]; + tensor out_51_cast_fp16 = mul(x = out_49_cast_fp16, y = layer_layers_4_input_layernorm_weight_to_fp16)[name = string("out_51_cast_fp16")]; + tensor var_1241_split_sizes_0 = const()[name = string("op_1241_split_sizes_0"), val = tensor([1024, 1024])]; + int32 var_1241_axis_0 = const()[name = string("op_1241_axis_0"), val = int32(1)]; + tensor var_1241_cast_fp16_0, tensor var_1241_cast_fp16_1 = split(axis = var_1241_axis_0, split_sizes = var_1241_split_sizes_0, x = out_51_cast_fp16)[name = string("op_1241_cast_fp16")]; + string query_states_17_pad_type_0 = const()[name = string("query_states_17_pad_type_0"), val = string("valid")]; + tensor query_states_17_strides_0 = const()[name = string("query_states_17_strides_0"), val = tensor([1, 1])]; + tensor query_states_17_pad_0 = const()[name = string("query_states_17_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor query_states_17_dilations_0 = const()[name = string("query_states_17_dilations_0"), val = tensor([1, 1])]; + int32 query_states_17_groups_0 = const()[name = string("query_states_17_groups_0"), val = int32(1)]; + tensor var_1263_to_fp16 = const()[name = string("op_1263_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(127978112)))]; + tensor query_states_17_cast_fp16 = conv(dilations = query_states_17_dilations_0, groups = query_states_17_groups_0, pad = query_states_17_pad_0, pad_type = query_states_17_pad_type_0, strides = query_states_17_strides_0, weight = var_1263_to_fp16, x = var_1241_cast_fp16_0)[name = string("query_states_17_cast_fp16")]; + string key_states_17_pad_type_0 = const()[name = string("key_states_17_pad_type_0"), val = string("valid")]; + tensor key_states_17_strides_0 = const()[name = string("key_states_17_strides_0"), val = tensor([1, 1])]; + tensor key_states_17_pad_0 = const()[name = string("key_states_17_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor key_states_17_dilations_0 = const()[name = string("key_states_17_dilations_0"), val = tensor([1, 1])]; + int32 key_states_17_groups_0 = const()[name = string("key_states_17_groups_0"), val = int32(1)]; + tensor var_1274_to_fp16 = const()[name = string("op_1274_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(130075328)))]; + tensor key_states_17_cast_fp16 = conv(dilations = key_states_17_dilations_0, groups = key_states_17_groups_0, pad = key_states_17_pad_0, pad_type = key_states_17_pad_type_0, strides = key_states_17_strides_0, weight = var_1274_to_fp16, x = var_1241_cast_fp16_0)[name = string("key_states_17_cast_fp16")]; + string value_states_17_pad_type_0 = const()[name = string("value_states_17_pad_type_0"), val = string("valid")]; + tensor value_states_17_strides_0 = const()[name = string("value_states_17_strides_0"), val = tensor([1, 1])]; + tensor value_states_17_pad_0 = const()[name = string("value_states_17_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor value_states_17_dilations_0 = const()[name = string("value_states_17_dilations_0"), val = tensor([1, 1])]; + int32 value_states_17_groups_0 = const()[name = string("value_states_17_groups_0"), val = int32(1)]; + tensor var_1285_to_fp16 = const()[name = string("op_1285_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(130337536)))]; + tensor value_states_17_cast_fp16 = conv(dilations = value_states_17_dilations_0, groups = value_states_17_groups_0, pad = value_states_17_pad_0, pad_type = value_states_17_pad_type_0, strides = value_states_17_strides_0, weight = var_1285_to_fp16, x = var_1241_cast_fp16_0)[name = string("value_states_17_cast_fp16")]; + tensor var_1293 = const()[name = string("op_1293"), val = tensor([1, 16, 64, 16])]; + tensor embed_17_cast_fp16 = reshape(shape = var_1293, x = query_states_17_cast_fp16)[name = string("embed_17_cast_fp16")]; + tensor var_1297 = const()[name = string("op_1297"), val = tensor([1, 2, 64, 16])]; + tensor var_1298_cast_fp16 = reshape(shape = var_1297, x = key_states_17_cast_fp16)[name = string("op_1298_cast_fp16")]; + tensor embed_19_perm_0 = const()[name = string("embed_19_perm_0"), val = tensor([0, 1, 3, 2])]; + tensor var_1304 = const()[name = string("op_1304"), val = tensor([1, 2, 64, 16])]; + tensor var_1305_cast_fp16 = reshape(shape = var_1304, x = value_states_17_cast_fp16)[name = string("op_1305_cast_fp16")]; + tensor value_states_19_perm_0 = const()[name = string("value_states_19_perm_0"), val = tensor([0, 1, 3, 2])]; + tensor var_1309_cast_fp16 = mul(x = embed_17_cast_fp16, y = cos_1_cast_fp16)[name = string("op_1309_cast_fp16")]; + tensor var_1310_split_sizes_0 = const()[name = string("op_1310_split_sizes_0"), val = tensor([32, 32])]; + int32 var_1310_axis_0 = const()[name = string("op_1310_axis_0"), val = int32(-2)]; + tensor var_1310_cast_fp16_0, tensor var_1310_cast_fp16_1 = split(axis = var_1310_axis_0, split_sizes = var_1310_split_sizes_0, x = embed_17_cast_fp16)[name = string("op_1310_cast_fp16")]; + fp16 const_47_promoted_to_fp16 = const()[name = string("const_47_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_1312_cast_fp16 = mul(x = var_1310_cast_fp16_1, y = const_47_promoted_to_fp16)[name = string("op_1312_cast_fp16")]; + int32 var_1314 = const()[name = string("op_1314"), val = int32(-2)]; + bool var_1315_interleave_0 = const()[name = string("op_1315_interleave_0"), val = bool(false)]; + tensor var_1315_cast_fp16 = concat(axis = var_1314, interleave = var_1315_interleave_0, values = (var_1312_cast_fp16, var_1310_cast_fp16_0))[name = string("op_1315_cast_fp16")]; + tensor var_1316_cast_fp16 = mul(x = var_1315_cast_fp16, y = sin_1_cast_fp16)[name = string("op_1316_cast_fp16")]; + tensor query_states_19_cast_fp16 = add(x = var_1309_cast_fp16, y = var_1316_cast_fp16)[name = string("query_states_19_cast_fp16")]; + tensor embed_19_cast_fp16 = transpose(perm = embed_19_perm_0, x = var_1298_cast_fp16)[name = string("transpose_5")]; + tensor var_1319_cast_fp16 = mul(x = embed_19_cast_fp16, y = cos_cast_fp16)[name = string("op_1319_cast_fp16")]; + tensor var_1320_split_sizes_0 = const()[name = string("op_1320_split_sizes_0"), val = tensor([32, 32])]; + int32 var_1320_axis_0 = const()[name = string("op_1320_axis_0"), val = int32(-1)]; + tensor var_1320_cast_fp16_0, tensor var_1320_cast_fp16_1 = split(axis = var_1320_axis_0, split_sizes = var_1320_split_sizes_0, x = embed_19_cast_fp16)[name = string("op_1320_cast_fp16")]; + fp16 const_48_promoted_to_fp16 = const()[name = string("const_48_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_1322_cast_fp16 = mul(x = var_1320_cast_fp16_1, y = const_48_promoted_to_fp16)[name = string("op_1322_cast_fp16")]; + int32 var_1324 = const()[name = string("op_1324"), val = int32(-1)]; + bool var_1325_interleave_0 = const()[name = string("op_1325_interleave_0"), val = bool(false)]; + tensor var_1325_cast_fp16 = concat(axis = var_1324, interleave = var_1325_interleave_0, values = (var_1322_cast_fp16, var_1320_cast_fp16_0))[name = string("op_1325_cast_fp16")]; + tensor var_1326_cast_fp16 = mul(x = var_1325_cast_fp16, y = sin_cast_fp16)[name = string("op_1326_cast_fp16")]; + tensor key_states_19_cast_fp16 = add(x = var_1319_cast_fp16, y = var_1326_cast_fp16)[name = string("key_states_19_cast_fp16")]; + tensor expand_dims_41 = const()[name = string("expand_dims_41"), val = tensor([4])]; + tensor expand_dims_42 = const()[name = string("expand_dims_42"), val = tensor([0])]; + tensor expand_dims_44 = const()[name = string("expand_dims_44"), val = tensor([5])]; + tensor concat_35_values3_0 = const()[name = string("concat_35_values3_0"), val = tensor([0])]; + int32 concat_35_axis_0 = const()[name = string("concat_35_axis_0"), val = int32(0)]; + bool concat_35_interleave_0 = const()[name = string("concat_35_interleave_0"), val = bool(false)]; + tensor concat_35 = concat(axis = concat_35_axis_0, interleave = concat_35_interleave_0, values = (expand_dims_41, expand_dims_42, position_id, concat_35_values3_0))[name = string("concat_35")]; + tensor concat_36_values1_0 = const()[name = string("concat_36_values1_0"), val = tensor([0])]; + tensor concat_36_values3_0 = const()[name = string("concat_36_values3_0"), val = tensor([0])]; + int32 concat_36_axis_0 = const()[name = string("concat_36_axis_0"), val = int32(0)]; + bool concat_36_interleave_0 = const()[name = string("concat_36_interleave_0"), val = bool(false)]; + tensor concat_36 = concat(axis = concat_36_axis_0, interleave = concat_36_interleave_0, values = (expand_dims_44, concat_36_values1_0, var_246, concat_36_values3_0))[name = string("concat_36")]; + tensor key_cache_internal_tensor_assign_5_stride_0 = const()[name = string("key_cache_internal_tensor_assign_5_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor key_cache_internal_tensor_assign_5_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_5_begin_mask_0"), val = tensor([false, false, false, true])]; + tensor key_cache_internal_tensor_assign_5_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_5_end_mask_0"), val = tensor([false, true, false, true])]; + tensor key_cache_internal_tensor_assign_5_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_5_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor key_cache_internal_tensor_assign_5_cast_fp16 = slice_update(begin = concat_35, begin_mask = key_cache_internal_tensor_assign_5_begin_mask_0, end = concat_36, end_mask = key_cache_internal_tensor_assign_5_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_5_squeeze_mask_0, stride = key_cache_internal_tensor_assign_5_stride_0, update = key_states_19_cast_fp16, x = coreml_update_state_18)[name = string("key_cache_internal_tensor_assign_5_cast_fp16")]; + write_state(data = key_cache_internal_tensor_assign_5_cast_fp16, input = key_cache)[name = string("coreml_update_state_44_write_state")]; + tensor coreml_update_state_20 = read_state(input = key_cache)[name = string("coreml_update_state_44")]; + tensor value_cache_internal_tensor_assign_5_stride_0 = const()[name = string("value_cache_internal_tensor_assign_5_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor value_cache_internal_tensor_assign_5_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_5_begin_mask_0"), val = tensor([false, false, false, true])]; + tensor value_cache_internal_tensor_assign_5_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_5_end_mask_0"), val = tensor([false, true, false, true])]; + tensor value_cache_internal_tensor_assign_5_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_5_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor value_states_19_cast_fp16 = transpose(perm = value_states_19_perm_0, x = var_1305_cast_fp16)[name = string("transpose_4")]; + tensor value_cache_internal_tensor_assign_5_cast_fp16 = slice_update(begin = concat_35, begin_mask = value_cache_internal_tensor_assign_5_begin_mask_0, end = concat_36, end_mask = value_cache_internal_tensor_assign_5_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_5_squeeze_mask_0, stride = value_cache_internal_tensor_assign_5_stride_0, update = value_states_19_cast_fp16, x = coreml_update_state_19)[name = string("value_cache_internal_tensor_assign_5_cast_fp16")]; + write_state(data = value_cache_internal_tensor_assign_5_cast_fp16, input = value_cache)[name = string("coreml_update_state_45_write_state")]; + tensor coreml_update_state_21 = read_state(input = value_cache)[name = string("coreml_update_state_45")]; + tensor var_1369_begin_0 = const()[name = string("op_1369_begin_0"), val = tensor([4, 0, 0, 0])]; + tensor var_1369_end_0 = const()[name = string("op_1369_end_0"), val = tensor([5, 2, 2048, 64])]; + tensor var_1369_end_mask_0 = const()[name = string("op_1369_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_1369_cast_fp16 = slice_by_index(begin = var_1369_begin_0, end = var_1369_end_0, end_mask = var_1369_end_mask_0, x = coreml_update_state_20)[name = string("op_1369_cast_fp16")]; + tensor tile_8 = const()[name = string("tile_8"), val = tensor([1, 1])]; + int32 var_1372_axis_0 = const()[name = string("op_1372_axis_0"), val = int32(1)]; + tensor var_1372_cast_fp16_0, tensor var_1372_cast_fp16_1 = split(axis = var_1372_axis_0, split_sizes = tile_8, x = var_1369_cast_fp16)[name = string("op_1372_cast_fp16")]; + tensor var_1379_begin_0 = const()[name = string("op_1379_begin_0"), val = tensor([4, 0, 0, 0])]; + tensor var_1379_end_0 = const()[name = string("op_1379_end_0"), val = tensor([5, 2, 2048, 64])]; + tensor var_1379_end_mask_0 = const()[name = string("op_1379_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_1379_cast_fp16 = slice_by_index(begin = var_1379_begin_0, end = var_1379_end_0, end_mask = var_1379_end_mask_0, x = coreml_update_state_21)[name = string("op_1379_cast_fp16")]; + tensor tile_9 = const()[name = string("tile_9"), val = tensor([1, 1])]; + int32 var_1382_axis_0 = const()[name = string("op_1382_axis_0"), val = int32(1)]; + tensor var_1382_cast_fp16_0, tensor var_1382_cast_fp16_1 = split(axis = var_1382_axis_0, split_sizes = tile_9, x = var_1379_cast_fp16)[name = string("op_1382_cast_fp16")]; + tensor var_1385_split_sizes_0 = const()[name = string("op_1385_split_sizes_0"), val = tensor([8, 8])]; + int32 var_1385_axis_0 = const()[name = string("op_1385_axis_0"), val = int32(1)]; + tensor var_1385_cast_fp16_0, tensor var_1385_cast_fp16_1 = split(axis = var_1385_axis_0, split_sizes = var_1385_split_sizes_0, x = query_states_19_cast_fp16)[name = string("op_1385_cast_fp16")]; + bool attn_weights_65_transpose_x_0 = const()[name = string("attn_weights_65_transpose_x_0"), val = bool(false)]; + bool attn_weights_65_transpose_y_0 = const()[name = string("attn_weights_65_transpose_y_0"), val = bool(false)]; + tensor attn_weights_65_cast_fp16 = matmul(transpose_x = attn_weights_65_transpose_x_0, transpose_y = attn_weights_65_transpose_y_0, x = var_1372_cast_fp16_0, y = var_1385_cast_fp16_0)[name = string("attn_weights_65_cast_fp16")]; + fp16 _inversed_attn_weights_67_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_67_y_0_to_fp16"), val = fp16(0x1p-3)]; + tensor _inversed_attn_weights_67_cast_fp16 = mul(x = attn_weights_65_cast_fp16, y = _inversed_attn_weights_67_y_0_to_fp16)[name = string("_inversed_attn_weights_67_cast_fp16")]; + tensor attn_weights_69_cast_fp16 = add(x = _inversed_attn_weights_67_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_69_cast_fp16")]; + int32 var_1392 = const()[name = string("op_1392"), val = int32(2)]; + tensor attn_weights_71_cast_fp16 = softmax(axis = var_1392, x = attn_weights_69_cast_fp16)[name = string("attn_weights_71_cast_fp16")]; + bool var_1398_transpose_x_1 = const()[name = string("op_1398_transpose_x_1"), val = bool(true)]; + bool var_1398_transpose_y_1 = const()[name = string("op_1398_transpose_y_1"), val = bool(false)]; + tensor var_1398_cast_fp16 = matmul(transpose_x = var_1398_transpose_x_1, transpose_y = var_1398_transpose_y_1, x = attn_weights_71_cast_fp16, y = var_1382_cast_fp16_0)[name = string("op_1398_cast_fp16")]; + bool attn_weights_73_transpose_x_0 = const()[name = string("attn_weights_73_transpose_x_0"), val = bool(false)]; + bool attn_weights_73_transpose_y_0 = const()[name = string("attn_weights_73_transpose_y_0"), val = bool(false)]; + tensor attn_weights_73_cast_fp16 = matmul(transpose_x = attn_weights_73_transpose_x_0, transpose_y = attn_weights_73_transpose_y_0, x = var_1372_cast_fp16_1, y = var_1385_cast_fp16_1)[name = string("attn_weights_73_cast_fp16")]; + fp16 _inversed_attn_weights_75_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_75_y_0_to_fp16"), val = fp16(0x1p-3)]; + tensor _inversed_attn_weights_75_cast_fp16 = mul(x = attn_weights_73_cast_fp16, y = _inversed_attn_weights_75_y_0_to_fp16)[name = string("_inversed_attn_weights_75_cast_fp16")]; + tensor attn_weights_77_cast_fp16 = add(x = _inversed_attn_weights_75_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_77_cast_fp16")]; + int32 var_1404 = const()[name = string("op_1404"), val = int32(2)]; + tensor attn_weights_79_cast_fp16 = softmax(axis = var_1404, x = attn_weights_77_cast_fp16)[name = string("attn_weights_79_cast_fp16")]; + bool attn_output_25_transpose_x_1 = const()[name = string("attn_output_25_transpose_x_1"), val = bool(true)]; + bool attn_output_25_transpose_y_1 = const()[name = string("attn_output_25_transpose_y_1"), val = bool(false)]; + tensor attn_output_25_cast_fp16 = matmul(transpose_x = attn_output_25_transpose_x_1, transpose_y = attn_output_25_transpose_y_1, x = attn_weights_79_cast_fp16, y = var_1382_cast_fp16_1)[name = string("attn_output_25_cast_fp16")]; + int32 var_1412 = const()[name = string("op_1412"), val = int32(1)]; + bool attn_output_27_interleave_0 = const()[name = string("attn_output_27_interleave_0"), val = bool(false)]; + tensor attn_output_27_cast_fp16 = concat(axis = var_1412, interleave = attn_output_27_interleave_0, values = (var_1398_cast_fp16, attn_output_25_cast_fp16))[name = string("attn_output_27_cast_fp16")]; + tensor var_1416_perm_0 = const()[name = string("op_1416_perm_0"), val = tensor([0, 1, 3, 2])]; + tensor var_1421 = const()[name = string("op_1421"), val = tensor([1, 1024, 1, 16])]; + tensor var_1416_cast_fp16 = transpose(perm = var_1416_perm_0, x = attn_output_27_cast_fp16)[name = string("transpose_3")]; + tensor x_77_cast_fp16 = reshape(shape = var_1421, x = var_1416_cast_fp16)[name = string("x_77_cast_fp16")]; + string hidden_states_27_pad_type_0 = const()[name = string("hidden_states_27_pad_type_0"), val = string("valid")]; + tensor hidden_states_27_strides_0 = const()[name = string("hidden_states_27_strides_0"), val = tensor([1, 1])]; + tensor hidden_states_27_pad_0 = const()[name = string("hidden_states_27_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor hidden_states_27_dilations_0 = const()[name = string("hidden_states_27_dilations_0"), val = tensor([1, 1])]; + int32 hidden_states_27_groups_0 = const()[name = string("hidden_states_27_groups_0"), val = int32(1)]; + tensor var_1428_to_fp16 = const()[name = string("op_1428_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(130599744)))]; + tensor hidden_states_27_cast_fp16 = conv(dilations = hidden_states_27_dilations_0, groups = hidden_states_27_groups_0, pad = hidden_states_27_pad_0, pad_type = hidden_states_27_pad_type_0, strides = hidden_states_27_strides_0, weight = var_1428_to_fp16, x = x_77_cast_fp16)[name = string("hidden_states_27_cast_fp16")]; + tensor x_79_cast_fp16 = add(x = x_71_cast_fp16, y = hidden_states_27_cast_fp16)[name = string("x_79_cast_fp16")]; + int32 var_1440 = const()[name = string("op_1440"), val = int32(1)]; + fp16 const_53_promoted_to_fp16 = const()[name = string("const_53_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_1443_cast_fp16 = mul(x = x_79_cast_fp16, y = const_53_promoted_to_fp16)[name = string("op_1443_cast_fp16")]; + bool x_81_interleave_0 = const()[name = string("x_81_interleave_0"), val = bool(false)]; + tensor x_81_cast_fp16 = concat(axis = var_1440, interleave = x_81_interleave_0, values = (x_79_cast_fp16, var_1443_cast_fp16))[name = string("x_81_cast_fp16")]; + tensor out_55_axes_0 = const()[name = string("out_55_axes_0"), val = tensor([1])]; + fp16 var_1453_to_fp16 = const()[name = string("op_1453_to_fp16"), val = fp16(0x1.5p-17)]; + tensor out_55_cast_fp16 = layer_norm(axes = out_55_axes_0, epsilon = var_1453_to_fp16, x = x_81_cast_fp16)[name = string("out_55_cast_fp16")]; + tensor layer_layers_4_post_attention_layernorm_weight_to_fp16 = const()[name = string("layer_layers_4_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(132696960)))]; + tensor out_57_cast_fp16 = mul(x = out_55_cast_fp16, y = layer_layers_4_post_attention_layernorm_weight_to_fp16)[name = string("out_57_cast_fp16")]; + tensor var_1459_split_sizes_0 = const()[name = string("op_1459_split_sizes_0"), val = tensor([1024, 1024])]; + int32 var_1459_axis_0 = const()[name = string("op_1459_axis_0"), val = int32(1)]; + tensor var_1459_cast_fp16_0, tensor var_1459_cast_fp16_1 = split(axis = var_1459_axis_0, split_sizes = var_1459_split_sizes_0, x = out_57_cast_fp16)[name = string("op_1459_cast_fp16")]; + string input_9_pad_type_0 = const()[name = string("input_9_pad_type_0"), val = string("valid")]; + tensor input_9_strides_0 = const()[name = string("input_9_strides_0"), val = tensor([1, 1])]; + tensor input_9_pad_0 = const()[name = string("input_9_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor input_9_dilations_0 = const()[name = string("input_9_dilations_0"), val = tensor([1, 1])]; + int32 input_9_groups_0 = const()[name = string("input_9_groups_0"), val = int32(1)]; + tensor var_1464_to_fp16 = const()[name = string("op_1464_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(132701120)))]; + tensor input_9_cast_fp16 = conv(dilations = input_9_dilations_0, groups = input_9_groups_0, pad = input_9_pad_0, pad_type = input_9_pad_type_0, strides = input_9_strides_0, weight = var_1464_to_fp16, x = var_1459_cast_fp16_0)[name = string("input_9_cast_fp16")]; + tensor var_1475_cast_fp16 = silu(x = input_9_cast_fp16)[name = string("op_1475_cast_fp16")]; + string var_1480_pad_type_0 = const()[name = string("op_1480_pad_type_0"), val = string("valid")]; + tensor var_1480_strides_0 = const()[name = string("op_1480_strides_0"), val = tensor([1, 1])]; + tensor var_1480_pad_0 = const()[name = string("op_1480_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1480_dilations_0 = const()[name = string("op_1480_dilations_0"), val = tensor([1, 1])]; + int32 var_1480_groups_0 = const()[name = string("op_1480_groups_0"), val = int32(1)]; + tensor var_1463_to_fp16 = const()[name = string("op_1463_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(141089792)))]; + tensor var_1480_cast_fp16 = conv(dilations = var_1480_dilations_0, groups = var_1480_groups_0, pad = var_1480_pad_0, pad_type = var_1480_pad_type_0, strides = var_1480_strides_0, weight = var_1463_to_fp16, x = var_1459_cast_fp16_0)[name = string("op_1480_cast_fp16")]; + tensor x_87_cast_fp16 = mul(x = var_1475_cast_fp16, y = var_1480_cast_fp16)[name = string("x_87_cast_fp16")]; + string hidden_states_29_pad_type_0 = const()[name = string("hidden_states_29_pad_type_0"), val = string("valid")]; + tensor hidden_states_29_strides_0 = const()[name = string("hidden_states_29_strides_0"), val = tensor([1, 1])]; + tensor hidden_states_29_pad_0 = const()[name = string("hidden_states_29_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor hidden_states_29_dilations_0 = const()[name = string("hidden_states_29_dilations_0"), val = tensor([1, 1])]; + int32 hidden_states_29_groups_0 = const()[name = string("hidden_states_29_groups_0"), val = int32(1)]; + tensor var_1462_to_fp16 = const()[name = string("op_1462_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(149478464)))]; + tensor hidden_states_29_cast_fp16 = conv(dilations = hidden_states_29_dilations_0, groups = hidden_states_29_groups_0, pad = hidden_states_29_pad_0, pad_type = hidden_states_29_pad_type_0, strides = hidden_states_29_strides_0, weight = var_1462_to_fp16, x = x_87_cast_fp16)[name = string("hidden_states_29_cast_fp16")]; + tensor x_89_cast_fp16 = add(x = x_79_cast_fp16, y = hidden_states_29_cast_fp16)[name = string("x_89_cast_fp16")]; + int32 var_1493 = const()[name = string("op_1493"), val = int32(1)]; + fp16 const_54_promoted_to_fp16 = const()[name = string("const_54_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_1496_cast_fp16 = mul(x = x_89_cast_fp16, y = const_54_promoted_to_fp16)[name = string("op_1496_cast_fp16")]; + bool x_91_interleave_0 = const()[name = string("x_91_interleave_0"), val = bool(false)]; + tensor x_91_cast_fp16 = concat(axis = var_1493, interleave = x_91_interleave_0, values = (x_89_cast_fp16, var_1496_cast_fp16))[name = string("x_91_cast_fp16")]; + tensor out_61_axes_0 = const()[name = string("out_61_axes_0"), val = tensor([1])]; + fp16 var_1506_to_fp16 = const()[name = string("op_1506_to_fp16"), val = fp16(0x1.5p-17)]; + tensor out_61_cast_fp16 = layer_norm(axes = out_61_axes_0, epsilon = var_1506_to_fp16, x = x_91_cast_fp16)[name = string("out_61_cast_fp16")]; + tensor layer_layers_5_input_layernorm_weight_to_fp16 = const()[name = string("layer_layers_5_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(157867136)))]; + tensor out_63_cast_fp16 = mul(x = out_61_cast_fp16, y = layer_layers_5_input_layernorm_weight_to_fp16)[name = string("out_63_cast_fp16")]; + tensor var_1512_split_sizes_0 = const()[name = string("op_1512_split_sizes_0"), val = tensor([1024, 1024])]; + int32 var_1512_axis_0 = const()[name = string("op_1512_axis_0"), val = int32(1)]; + tensor var_1512_cast_fp16_0, tensor var_1512_cast_fp16_1 = split(axis = var_1512_axis_0, split_sizes = var_1512_split_sizes_0, x = out_63_cast_fp16)[name = string("op_1512_cast_fp16")]; + string query_states_21_pad_type_0 = const()[name = string("query_states_21_pad_type_0"), val = string("valid")]; + tensor query_states_21_strides_0 = const()[name = string("query_states_21_strides_0"), val = tensor([1, 1])]; + tensor query_states_21_pad_0 = const()[name = string("query_states_21_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor query_states_21_dilations_0 = const()[name = string("query_states_21_dilations_0"), val = tensor([1, 1])]; + int32 query_states_21_groups_0 = const()[name = string("query_states_21_groups_0"), val = int32(1)]; + tensor var_1534_to_fp16 = const()[name = string("op_1534_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(157871296)))]; + tensor query_states_21_cast_fp16 = conv(dilations = query_states_21_dilations_0, groups = query_states_21_groups_0, pad = query_states_21_pad_0, pad_type = query_states_21_pad_type_0, strides = query_states_21_strides_0, weight = var_1534_to_fp16, x = var_1512_cast_fp16_0)[name = string("query_states_21_cast_fp16")]; + string key_states_21_pad_type_0 = const()[name = string("key_states_21_pad_type_0"), val = string("valid")]; + tensor key_states_21_strides_0 = const()[name = string("key_states_21_strides_0"), val = tensor([1, 1])]; + tensor key_states_21_pad_0 = const()[name = string("key_states_21_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor key_states_21_dilations_0 = const()[name = string("key_states_21_dilations_0"), val = tensor([1, 1])]; + int32 key_states_21_groups_0 = const()[name = string("key_states_21_groups_0"), val = int32(1)]; + tensor var_1545_to_fp16 = const()[name = string("op_1545_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(159968512)))]; + tensor key_states_21_cast_fp16 = conv(dilations = key_states_21_dilations_0, groups = key_states_21_groups_0, pad = key_states_21_pad_0, pad_type = key_states_21_pad_type_0, strides = key_states_21_strides_0, weight = var_1545_to_fp16, x = var_1512_cast_fp16_0)[name = string("key_states_21_cast_fp16")]; + string value_states_21_pad_type_0 = const()[name = string("value_states_21_pad_type_0"), val = string("valid")]; + tensor value_states_21_strides_0 = const()[name = string("value_states_21_strides_0"), val = tensor([1, 1])]; + tensor value_states_21_pad_0 = const()[name = string("value_states_21_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor value_states_21_dilations_0 = const()[name = string("value_states_21_dilations_0"), val = tensor([1, 1])]; + int32 value_states_21_groups_0 = const()[name = string("value_states_21_groups_0"), val = int32(1)]; + tensor var_1556_to_fp16 = const()[name = string("op_1556_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(160230720)))]; + tensor value_states_21_cast_fp16 = conv(dilations = value_states_21_dilations_0, groups = value_states_21_groups_0, pad = value_states_21_pad_0, pad_type = value_states_21_pad_type_0, strides = value_states_21_strides_0, weight = var_1556_to_fp16, x = var_1512_cast_fp16_0)[name = string("value_states_21_cast_fp16")]; + tensor var_1564 = const()[name = string("op_1564"), val = tensor([1, 16, 64, 16])]; + tensor embed_21_cast_fp16 = reshape(shape = var_1564, x = query_states_21_cast_fp16)[name = string("embed_21_cast_fp16")]; + tensor var_1568 = const()[name = string("op_1568"), val = tensor([1, 2, 64, 16])]; + tensor var_1569_cast_fp16 = reshape(shape = var_1568, x = key_states_21_cast_fp16)[name = string("op_1569_cast_fp16")]; + tensor embed_perm_0 = const()[name = string("embed_perm_0"), val = tensor([0, 1, 3, 2])]; + tensor var_1575 = const()[name = string("op_1575"), val = tensor([1, 2, 64, 16])]; + tensor var_1576_cast_fp16 = reshape(shape = var_1575, x = value_states_21_cast_fp16)[name = string("op_1576_cast_fp16")]; + tensor value_states_perm_0 = const()[name = string("value_states_perm_0"), val = tensor([0, 1, 3, 2])]; + tensor var_1580_cast_fp16 = mul(x = embed_21_cast_fp16, y = cos_1_cast_fp16)[name = string("op_1580_cast_fp16")]; + tensor var_1581_split_sizes_0 = const()[name = string("op_1581_split_sizes_0"), val = tensor([32, 32])]; + int32 var_1581_axis_0 = const()[name = string("op_1581_axis_0"), val = int32(-2)]; + tensor var_1581_cast_fp16_0, tensor var_1581_cast_fp16_1 = split(axis = var_1581_axis_0, split_sizes = var_1581_split_sizes_0, x = embed_21_cast_fp16)[name = string("op_1581_cast_fp16")]; + fp16 const_57_promoted_to_fp16 = const()[name = string("const_57_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_1583_cast_fp16 = mul(x = var_1581_cast_fp16_1, y = const_57_promoted_to_fp16)[name = string("op_1583_cast_fp16")]; + int32 var_1585 = const()[name = string("op_1585"), val = int32(-2)]; + bool var_1586_interleave_0 = const()[name = string("op_1586_interleave_0"), val = bool(false)]; + tensor var_1586_cast_fp16 = concat(axis = var_1585, interleave = var_1586_interleave_0, values = (var_1583_cast_fp16, var_1581_cast_fp16_0))[name = string("op_1586_cast_fp16")]; + tensor var_1587_cast_fp16 = mul(x = var_1586_cast_fp16, y = sin_1_cast_fp16)[name = string("op_1587_cast_fp16")]; + tensor query_states_cast_fp16 = add(x = var_1580_cast_fp16, y = var_1587_cast_fp16)[name = string("query_states_cast_fp16")]; + tensor embed_cast_fp16 = transpose(perm = embed_perm_0, x = var_1569_cast_fp16)[name = string("transpose_2")]; + tensor var_1590_cast_fp16 = mul(x = embed_cast_fp16, y = cos_cast_fp16)[name = string("op_1590_cast_fp16")]; + tensor var_1591_split_sizes_0 = const()[name = string("op_1591_split_sizes_0"), val = tensor([32, 32])]; + int32 var_1591_axis_0 = const()[name = string("op_1591_axis_0"), val = int32(-1)]; + tensor var_1591_cast_fp16_0, tensor var_1591_cast_fp16_1 = split(axis = var_1591_axis_0, split_sizes = var_1591_split_sizes_0, x = embed_cast_fp16)[name = string("op_1591_cast_fp16")]; + fp16 const_58_promoted_to_fp16 = const()[name = string("const_58_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_1593_cast_fp16 = mul(x = var_1591_cast_fp16_1, y = const_58_promoted_to_fp16)[name = string("op_1593_cast_fp16")]; + int32 var_1595 = const()[name = string("op_1595"), val = int32(-1)]; + bool var_1596_interleave_0 = const()[name = string("op_1596_interleave_0"), val = bool(false)]; + tensor var_1596_cast_fp16 = concat(axis = var_1595, interleave = var_1596_interleave_0, values = (var_1593_cast_fp16, var_1591_cast_fp16_0))[name = string("op_1596_cast_fp16")]; + tensor var_1597_cast_fp16 = mul(x = var_1596_cast_fp16, y = sin_cast_fp16)[name = string("op_1597_cast_fp16")]; + tensor key_states_cast_fp16 = add(x = var_1590_cast_fp16, y = var_1597_cast_fp16)[name = string("key_states_cast_fp16")]; + tensor expand_dims_51 = const()[name = string("expand_dims_51"), val = tensor([5])]; + tensor expand_dims_52 = const()[name = string("expand_dims_52"), val = tensor([0])]; + tensor expand_dims_54 = const()[name = string("expand_dims_54"), val = tensor([6])]; + tensor concat_43_values3_0 = const()[name = string("concat_43_values3_0"), val = tensor([0])]; + int32 concat_43_axis_0 = const()[name = string("concat_43_axis_0"), val = int32(0)]; + bool concat_43_interleave_0 = const()[name = string("concat_43_interleave_0"), val = bool(false)]; + tensor concat_43 = concat(axis = concat_43_axis_0, interleave = concat_43_interleave_0, values = (expand_dims_51, expand_dims_52, position_id, concat_43_values3_0))[name = string("concat_43")]; + tensor concat_44_values1_0 = const()[name = string("concat_44_values1_0"), val = tensor([0])]; + tensor concat_44_values3_0 = const()[name = string("concat_44_values3_0"), val = tensor([0])]; + int32 concat_44_axis_0 = const()[name = string("concat_44_axis_0"), val = int32(0)]; + bool concat_44_interleave_0 = const()[name = string("concat_44_interleave_0"), val = bool(false)]; + tensor concat_44 = concat(axis = concat_44_axis_0, interleave = concat_44_interleave_0, values = (expand_dims_54, concat_44_values1_0, var_246, concat_44_values3_0))[name = string("concat_44")]; + tensor key_cache_internal_tensor_assign_6_stride_0 = const()[name = string("key_cache_internal_tensor_assign_6_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor key_cache_internal_tensor_assign_6_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_6_begin_mask_0"), val = tensor([false, false, false, true])]; + tensor key_cache_internal_tensor_assign_6_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_6_end_mask_0"), val = tensor([false, true, false, true])]; + tensor key_cache_internal_tensor_assign_6_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_6_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor key_cache_internal_tensor_assign_6_cast_fp16 = slice_update(begin = concat_43, begin_mask = key_cache_internal_tensor_assign_6_begin_mask_0, end = concat_44, end_mask = key_cache_internal_tensor_assign_6_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_6_squeeze_mask_0, stride = key_cache_internal_tensor_assign_6_stride_0, update = key_states_cast_fp16, x = coreml_update_state_20)[name = string("key_cache_internal_tensor_assign_6_cast_fp16")]; + write_state(data = key_cache_internal_tensor_assign_6_cast_fp16, input = key_cache)[name = string("coreml_update_state_46_write_state")]; + tensor coreml_update_state_22 = read_state(input = key_cache)[name = string("coreml_update_state_46")]; + tensor value_cache_internal_tensor_assign_6_stride_0 = const()[name = string("value_cache_internal_tensor_assign_6_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor value_cache_internal_tensor_assign_6_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_6_begin_mask_0"), val = tensor([false, false, false, true])]; + tensor value_cache_internal_tensor_assign_6_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_6_end_mask_0"), val = tensor([false, true, false, true])]; + tensor value_cache_internal_tensor_assign_6_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_6_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor value_states_cast_fp16 = transpose(perm = value_states_perm_0, x = var_1576_cast_fp16)[name = string("transpose_1")]; + tensor value_cache_internal_tensor_assign_6_cast_fp16 = slice_update(begin = concat_43, begin_mask = value_cache_internal_tensor_assign_6_begin_mask_0, end = concat_44, end_mask = value_cache_internal_tensor_assign_6_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_6_squeeze_mask_0, stride = value_cache_internal_tensor_assign_6_stride_0, update = value_states_cast_fp16, x = coreml_update_state_21)[name = string("value_cache_internal_tensor_assign_6_cast_fp16")]; + write_state(data = value_cache_internal_tensor_assign_6_cast_fp16, input = value_cache)[name = string("coreml_update_state_47_write_state")]; + tensor coreml_update_state_23 = read_state(input = value_cache)[name = string("coreml_update_state_47")]; + tensor var_1640_begin_0 = const()[name = string("op_1640_begin_0"), val = tensor([5, 0, 0, 0])]; + tensor var_1640_end_0 = const()[name = string("op_1640_end_0"), val = tensor([1, 2, 2048, 64])]; + tensor var_1640_end_mask_0 = const()[name = string("op_1640_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_1640_cast_fp16 = slice_by_index(begin = var_1640_begin_0, end = var_1640_end_0, end_mask = var_1640_end_mask_0, x = coreml_update_state_22)[name = string("op_1640_cast_fp16")]; + tensor tile_10 = const()[name = string("tile_10"), val = tensor([1, 1])]; + int32 var_1643_axis_0 = const()[name = string("op_1643_axis_0"), val = int32(1)]; + tensor var_1643_cast_fp16_0, tensor var_1643_cast_fp16_1 = split(axis = var_1643_axis_0, split_sizes = tile_10, x = var_1640_cast_fp16)[name = string("op_1643_cast_fp16")]; + tensor var_1650_begin_0 = const()[name = string("op_1650_begin_0"), val = tensor([5, 0, 0, 0])]; + tensor var_1650_end_0 = const()[name = string("op_1650_end_0"), val = tensor([1, 2, 2048, 64])]; + tensor var_1650_end_mask_0 = const()[name = string("op_1650_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_1650_cast_fp16 = slice_by_index(begin = var_1650_begin_0, end = var_1650_end_0, end_mask = var_1650_end_mask_0, x = coreml_update_state_23)[name = string("op_1650_cast_fp16")]; + tensor tile_11 = const()[name = string("tile_11"), val = tensor([1, 1])]; + int32 var_1653_axis_0 = const()[name = string("op_1653_axis_0"), val = int32(1)]; + tensor var_1653_cast_fp16_0, tensor var_1653_cast_fp16_1 = split(axis = var_1653_axis_0, split_sizes = tile_11, x = var_1650_cast_fp16)[name = string("op_1653_cast_fp16")]; + tensor var_1656_split_sizes_0 = const()[name = string("op_1656_split_sizes_0"), val = tensor([8, 8])]; + int32 var_1656_axis_0 = const()[name = string("op_1656_axis_0"), val = int32(1)]; + tensor var_1656_cast_fp16_0, tensor var_1656_cast_fp16_1 = split(axis = var_1656_axis_0, split_sizes = var_1656_split_sizes_0, x = query_states_cast_fp16)[name = string("op_1656_cast_fp16")]; + bool attn_weights_81_transpose_x_0 = const()[name = string("attn_weights_81_transpose_x_0"), val = bool(false)]; + bool attn_weights_81_transpose_y_0 = const()[name = string("attn_weights_81_transpose_y_0"), val = bool(false)]; + tensor attn_weights_81_cast_fp16 = matmul(transpose_x = attn_weights_81_transpose_x_0, transpose_y = attn_weights_81_transpose_y_0, x = var_1643_cast_fp16_0, y = var_1656_cast_fp16_0)[name = string("attn_weights_81_cast_fp16")]; + fp16 _inversed_attn_weights_83_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_83_y_0_to_fp16"), val = fp16(0x1p-3)]; + tensor _inversed_attn_weights_83_cast_fp16 = mul(x = attn_weights_81_cast_fp16, y = _inversed_attn_weights_83_y_0_to_fp16)[name = string("_inversed_attn_weights_83_cast_fp16")]; + tensor attn_weights_85_cast_fp16 = add(x = _inversed_attn_weights_83_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_85_cast_fp16")]; + int32 var_1663 = const()[name = string("op_1663"), val = int32(2)]; + tensor attn_weights_87_cast_fp16 = softmax(axis = var_1663, x = attn_weights_85_cast_fp16)[name = string("attn_weights_87_cast_fp16")]; + bool var_1669_transpose_x_1 = const()[name = string("op_1669_transpose_x_1"), val = bool(true)]; + bool var_1669_transpose_y_1 = const()[name = string("op_1669_transpose_y_1"), val = bool(false)]; + tensor var_1669_cast_fp16 = matmul(transpose_x = var_1669_transpose_x_1, transpose_y = var_1669_transpose_y_1, x = attn_weights_87_cast_fp16, y = var_1653_cast_fp16_0)[name = string("op_1669_cast_fp16")]; + bool attn_weights_89_transpose_x_0 = const()[name = string("attn_weights_89_transpose_x_0"), val = bool(false)]; + bool attn_weights_89_transpose_y_0 = const()[name = string("attn_weights_89_transpose_y_0"), val = bool(false)]; + tensor attn_weights_89_cast_fp16 = matmul(transpose_x = attn_weights_89_transpose_x_0, transpose_y = attn_weights_89_transpose_y_0, x = var_1643_cast_fp16_1, y = var_1656_cast_fp16_1)[name = string("attn_weights_89_cast_fp16")]; + fp16 _inversed_attn_weights_91_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_91_y_0_to_fp16"), val = fp16(0x1p-3)]; + tensor _inversed_attn_weights_91_cast_fp16 = mul(x = attn_weights_89_cast_fp16, y = _inversed_attn_weights_91_y_0_to_fp16)[name = string("_inversed_attn_weights_91_cast_fp16")]; + tensor attn_weights_93_cast_fp16 = add(x = _inversed_attn_weights_91_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_93_cast_fp16")]; + int32 var_1675 = const()[name = string("op_1675"), val = int32(2)]; + tensor attn_weights_cast_fp16 = softmax(axis = var_1675, x = attn_weights_93_cast_fp16)[name = string("attn_weights_cast_fp16")]; + bool attn_output_31_transpose_x_1 = const()[name = string("attn_output_31_transpose_x_1"), val = bool(true)]; + bool attn_output_31_transpose_y_1 = const()[name = string("attn_output_31_transpose_y_1"), val = bool(false)]; + tensor attn_output_31_cast_fp16 = matmul(transpose_x = attn_output_31_transpose_x_1, transpose_y = attn_output_31_transpose_y_1, x = attn_weights_cast_fp16, y = var_1653_cast_fp16_1)[name = string("attn_output_31_cast_fp16")]; + int32 var_1683 = const()[name = string("op_1683"), val = int32(1)]; + bool attn_output_33_interleave_0 = const()[name = string("attn_output_33_interleave_0"), val = bool(false)]; + tensor attn_output_33_cast_fp16 = concat(axis = var_1683, interleave = attn_output_33_interleave_0, values = (var_1669_cast_fp16, attn_output_31_cast_fp16))[name = string("attn_output_33_cast_fp16")]; + tensor var_1687_perm_0 = const()[name = string("op_1687_perm_0"), val = tensor([0, 1, 3, 2])]; + tensor var_1692 = const()[name = string("op_1692"), val = tensor([1, 1024, 1, 16])]; + tensor var_1687_cast_fp16 = transpose(perm = var_1687_perm_0, x = attn_output_33_cast_fp16)[name = string("transpose_0")]; + tensor x_95_cast_fp16 = reshape(shape = var_1692, x = var_1687_cast_fp16)[name = string("x_95_cast_fp16")]; + string hidden_states_33_pad_type_0 = const()[name = string("hidden_states_33_pad_type_0"), val = string("valid")]; + tensor hidden_states_33_strides_0 = const()[name = string("hidden_states_33_strides_0"), val = tensor([1, 1])]; + tensor hidden_states_33_pad_0 = const()[name = string("hidden_states_33_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor hidden_states_33_dilations_0 = const()[name = string("hidden_states_33_dilations_0"), val = tensor([1, 1])]; + int32 hidden_states_33_groups_0 = const()[name = string("hidden_states_33_groups_0"), val = int32(1)]; + tensor var_1699_to_fp16 = const()[name = string("op_1699_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(160492928)))]; + tensor hidden_states_33_cast_fp16 = conv(dilations = hidden_states_33_dilations_0, groups = hidden_states_33_groups_0, pad = hidden_states_33_pad_0, pad_type = hidden_states_33_pad_type_0, strides = hidden_states_33_strides_0, weight = var_1699_to_fp16, x = x_95_cast_fp16)[name = string("hidden_states_33_cast_fp16")]; + tensor x_97_cast_fp16 = add(x = x_89_cast_fp16, y = hidden_states_33_cast_fp16)[name = string("x_97_cast_fp16")]; + int32 var_1711 = const()[name = string("op_1711"), val = int32(1)]; + fp16 const_63_promoted_to_fp16 = const()[name = string("const_63_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_1714_cast_fp16 = mul(x = x_97_cast_fp16, y = const_63_promoted_to_fp16)[name = string("op_1714_cast_fp16")]; + bool x_99_interleave_0 = const()[name = string("x_99_interleave_0"), val = bool(false)]; + tensor x_99_cast_fp16 = concat(axis = var_1711, interleave = x_99_interleave_0, values = (x_97_cast_fp16, var_1714_cast_fp16))[name = string("x_99_cast_fp16")]; + tensor out_67_axes_0 = const()[name = string("out_67_axes_0"), val = tensor([1])]; + fp16 var_1724_to_fp16 = const()[name = string("op_1724_to_fp16"), val = fp16(0x1.5p-17)]; + tensor out_67_cast_fp16 = layer_norm(axes = out_67_axes_0, epsilon = var_1724_to_fp16, x = x_99_cast_fp16)[name = string("out_67_cast_fp16")]; + tensor layer_layers_5_post_attention_layernorm_weight_to_fp16 = const()[name = string("layer_layers_5_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(162590144)))]; + tensor out_69_cast_fp16 = mul(x = out_67_cast_fp16, y = layer_layers_5_post_attention_layernorm_weight_to_fp16)[name = string("out_69_cast_fp16")]; + tensor var_1730_split_sizes_0 = const()[name = string("op_1730_split_sizes_0"), val = tensor([1024, 1024])]; + int32 var_1730_axis_0 = const()[name = string("op_1730_axis_0"), val = int32(1)]; + tensor var_1730_cast_fp16_0, tensor var_1730_cast_fp16_1 = split(axis = var_1730_axis_0, split_sizes = var_1730_split_sizes_0, x = out_69_cast_fp16)[name = string("op_1730_cast_fp16")]; + string input_pad_type_0 = const()[name = string("input_pad_type_0"), val = string("valid")]; + tensor input_strides_0 = const()[name = string("input_strides_0"), val = tensor([1, 1])]; + tensor input_pad_0 = const()[name = string("input_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor input_dilations_0 = const()[name = string("input_dilations_0"), val = tensor([1, 1])]; + int32 input_groups_0 = const()[name = string("input_groups_0"), val = int32(1)]; + tensor var_1735_to_fp16 = const()[name = string("op_1735_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(162594304)))]; + tensor input_cast_fp16 = conv(dilations = input_dilations_0, groups = input_groups_0, pad = input_pad_0, pad_type = input_pad_type_0, strides = input_strides_0, weight = var_1735_to_fp16, x = var_1730_cast_fp16_0)[name = string("input_cast_fp16")]; + tensor var_1746_cast_fp16 = silu(x = input_cast_fp16)[name = string("op_1746_cast_fp16")]; + string var_1751_pad_type_0 = const()[name = string("op_1751_pad_type_0"), val = string("valid")]; + tensor var_1751_strides_0 = const()[name = string("op_1751_strides_0"), val = tensor([1, 1])]; + tensor var_1751_pad_0 = const()[name = string("op_1751_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1751_dilations_0 = const()[name = string("op_1751_dilations_0"), val = tensor([1, 1])]; + int32 var_1751_groups_0 = const()[name = string("op_1751_groups_0"), val = int32(1)]; + tensor var_1734_to_fp16 = const()[name = string("op_1734_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(170982976)))]; + tensor var_1751_cast_fp16 = conv(dilations = var_1751_dilations_0, groups = var_1751_groups_0, pad = var_1751_pad_0, pad_type = var_1751_pad_type_0, strides = var_1751_strides_0, weight = var_1734_to_fp16, x = var_1730_cast_fp16_0)[name = string("op_1751_cast_fp16")]; + tensor x_105_cast_fp16 = mul(x = var_1746_cast_fp16, y = var_1751_cast_fp16)[name = string("x_105_cast_fp16")]; + string hidden_states_pad_type_0 = const()[name = string("hidden_states_pad_type_0"), val = string("valid")]; + tensor hidden_states_strides_0 = const()[name = string("hidden_states_strides_0"), val = tensor([1, 1])]; + tensor hidden_states_pad_0 = const()[name = string("hidden_states_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor hidden_states_dilations_0 = const()[name = string("hidden_states_dilations_0"), val = tensor([1, 1])]; + int32 hidden_states_groups_0 = const()[name = string("hidden_states_groups_0"), val = int32(1)]; + tensor var_1733_to_fp16 = const()[name = string("op_1733_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(179371648)))]; + tensor hidden_states_cast_fp16 = conv(dilations = hidden_states_dilations_0, groups = hidden_states_groups_0, pad = hidden_states_pad_0, pad_type = hidden_states_pad_type_0, strides = hidden_states_strides_0, weight = var_1733_to_fp16, x = x_105_cast_fp16)[name = string("hidden_states_cast_fp16")]; + tensor x_107_cast_fp16 = add(x = x_97_cast_fp16, y = hidden_states_cast_fp16)[name = string("x_107_cast_fp16")]; + int32 var_1764 = const()[name = string("op_1764"), val = int32(1)]; + fp16 const_64_promoted_to_fp16 = const()[name = string("const_64_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_1767_cast_fp16 = mul(x = x_107_cast_fp16, y = const_64_promoted_to_fp16)[name = string("op_1767_cast_fp16")]; + bool x_109_interleave_0 = const()[name = string("x_109_interleave_0"), val = bool(false)]; + tensor x_109_cast_fp16 = concat(axis = var_1764, interleave = x_109_interleave_0, values = (x_107_cast_fp16, var_1767_cast_fp16))[name = string("x_109_cast_fp16")]; + tensor out_73_axes_0 = const()[name = string("out_73_axes_0"), val = tensor([1])]; + fp16 var_1777_to_fp16 = const()[name = string("op_1777_to_fp16"), val = fp16(0x1.5p-17)]; + tensor out_73_cast_fp16 = layer_norm(axes = out_73_axes_0, epsilon = var_1777_to_fp16, x = x_109_cast_fp16)[name = string("out_73_cast_fp16")]; + tensor layer_norm_weight_to_fp16 = const()[name = string("layer_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(187760320)))]; + tensor out_75_cast_fp16 = mul(x = out_73_cast_fp16, y = layer_norm_weight_to_fp16)[name = string("out_75_cast_fp16")]; + tensor var_1783_split_sizes_0 = const()[name = string("op_1783_split_sizes_0"), val = tensor([1024, 1024])]; + int32 var_1783_axis_0 = const()[name = string("op_1783_axis_0"), val = int32(1)]; + tensor output, tensor var_1783_cast_fp16_1 = split(axis = var_1783_axis_0, split_sizes = var_1783_split_sizes_0, x = out_75_cast_fp16)[name = string("op_1783_cast_fp16")]; + } -> (output); + func length_32(tensor inputs_embeds, state> key_cache, tensor position_id, state> value_cache) { + tensor var_80 = const()[name = string("op_80"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(188387968)))]; + tensor position_ids_1 = add(x = var_80, y = position_id)[name = string("position_ids_1")]; + int32 cos_batch_dims_0 = const()[name = string("cos_batch_dims_0"), val = int32(0)]; + bool cos_validate_indices_0 = const()[name = string("cos_validate_indices_0"), val = bool(false)]; + tensor layer_rope_emb_cos_cached_to_fp16 = const()[name = string("layer_rope_emb_cos_cached_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64)))]; + int32 greater_equal_0_y_0 = const()[name = string("greater_equal_0_y_0"), val = int32(0)]; + tensor greater_equal_0 = greater_equal(x = position_ids_1, y = greater_equal_0_y_0)[name = string("greater_equal_0")]; + int32 slice_by_index_0 = const()[name = string("slice_by_index_0"), val = int32(32768)]; + tensor add_0 = add(x = position_ids_1, y = slice_by_index_0)[name = string("add_0")]; + tensor select_0 = select(a = position_ids_1, b = add_0, cond = greater_equal_0)[name = string("select_0")]; + int32 cos_cast_fp16_axis_0 = const()[name = string("cos_cast_fp16_axis_0"), val = int32(0)]; + tensor cos_cast_fp16 = gather(axis = cos_cast_fp16_axis_0, batch_dims = cos_batch_dims_0, indices = select_0, validate_indices = cos_validate_indices_0, x = layer_rope_emb_cos_cached_to_fp16)[name = string("cos_cast_fp16")]; + int32 sin_batch_dims_0 = const()[name = string("sin_batch_dims_0"), val = int32(0)]; + bool sin_validate_indices_0 = const()[name = string("sin_validate_indices_0"), val = bool(false)]; + tensor layer_rope_emb_sin_cached_to_fp16 = const()[name = string("layer_rope_emb_sin_cached_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(4194432)))]; + int32 sin_cast_fp16_axis_0 = const()[name = string("sin_cast_fp16_axis_0"), val = int32(0)]; + tensor sin_cast_fp16 = gather(axis = sin_cast_fp16_axis_0, batch_dims = sin_batch_dims_0, indices = select_0, validate_indices = sin_validate_indices_0, x = layer_rope_emb_sin_cached_to_fp16)[name = string("sin_cast_fp16")]; + tensor var_105 = const()[name = string("op_105"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(8388800)))]; + tensor var_112_axes_0 = const()[name = string("op_112_axes_0"), val = tensor([2])]; + tensor var_112 = expand_dims(axes = var_112_axes_0, x = position_ids_1)[name = string("op_112")]; + tensor var_113 = greater(x = var_105, y = var_112)[name = string("op_113")]; + tensor attention_mask_3_axes_0 = const()[name = string("attention_mask_3_axes_0"), val = tensor([1])]; + string attention_mask_1_to_fp16_dtype_0 = const()[name = string("attention_mask_1_to_fp16_dtype_0"), val = string("fp16")]; + tensor var_113_to_fp16 = cast(dtype = attention_mask_1_to_fp16_dtype_0, x = var_113)[name = string("cast_65")]; + tensor attention_mask_3_cast_fp16 = expand_dims(axes = attention_mask_3_axes_0, x = var_113_to_fp16)[name = string("attention_mask_3_cast_fp16")]; + fp16 var_121_promoted_to_fp16 = const()[name = string("op_121_promoted_to_fp16"), val = fp16(0x0p+0)]; + tensor var_122_cast_fp16 = equal(x = attention_mask_3_cast_fp16, y = var_121_promoted_to_fp16)[name = string("op_122_cast_fp16")]; + tensor var_123_after_broadcast_to_fp16 = const()[name = string("op_123_after_broadcast_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(188388160)))]; + tensor attention_mask_cast_fp16 = select(a = attention_mask_3_cast_fp16, b = var_123_after_broadcast_to_fp16, cond = var_122_cast_fp16)[name = string("attention_mask_cast_fp16")]; + tensor cos_1_perm_0 = const()[name = string("cos_1_perm_0"), val = tensor([0, -1, -2])]; + tensor sin_1_perm_0 = const()[name = string("sin_1_perm_0"), val = tensor([0, -1, -2])]; + tensor attn_mask_perm_0 = const()[name = string("attn_mask_perm_0"), val = tensor([0, 1, -1, -2])]; + int32 var_138 = const()[name = string("op_138"), val = int32(1)]; + fp16 const_4_promoted_to_fp16 = const()[name = string("const_4_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_141_cast_fp16 = mul(x = inputs_embeds, y = const_4_promoted_to_fp16)[name = string("op_141_cast_fp16")]; + bool x_1_interleave_0 = const()[name = string("x_1_interleave_0"), val = bool(false)]; + tensor x_1_cast_fp16 = concat(axis = var_138, interleave = x_1_interleave_0, values = (inputs_embeds, var_141_cast_fp16))[name = string("x_1_cast_fp16")]; + tensor out_1_axes_0 = const()[name = string("out_1_axes_0"), val = tensor([1])]; + fp16 var_151_to_fp16 = const()[name = string("op_151_to_fp16"), val = fp16(0x1.5p-17)]; + tensor out_1_cast_fp16 = layer_norm(axes = out_1_axes_0, epsilon = var_151_to_fp16, x = x_1_cast_fp16)[name = string("out_1_cast_fp16")]; + tensor layer_layers_0_input_layernorm_weight_to_fp16 = const()[name = string("layer_layers_0_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(8401216)))]; + tensor out_3_cast_fp16 = mul(x = out_1_cast_fp16, y = layer_layers_0_input_layernorm_weight_to_fp16)[name = string("out_3_cast_fp16")]; + tensor var_157_split_sizes_0 = const()[name = string("op_157_split_sizes_0"), val = tensor([1024, 1024])]; + int32 var_157_axis_0 = const()[name = string("op_157_axis_0"), val = int32(1)]; + tensor var_157_cast_fp16_0, tensor var_157_cast_fp16_1 = split(axis = var_157_axis_0, split_sizes = var_157_split_sizes_0, x = out_3_cast_fp16)[name = string("op_157_cast_fp16")]; + tensor q_len_1 = const()[name = string("q_len_1"), val = tensor([32])]; + string query_states_1_pad_type_0 = const()[name = string("query_states_1_pad_type_0"), val = string("valid")]; + tensor query_states_1_strides_0 = const()[name = string("query_states_1_strides_0"), val = tensor([1, 1])]; + tensor query_states_1_pad_0 = const()[name = string("query_states_1_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor query_states_1_dilations_0 = const()[name = string("query_states_1_dilations_0"), val = tensor([1, 1])]; + int32 query_states_1_groups_0 = const()[name = string("query_states_1_groups_0"), val = int32(1)]; + tensor var_179_to_fp16 = const()[name = string("op_179_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(8405376)))]; + tensor query_states_1_cast_fp16 = conv(dilations = query_states_1_dilations_0, groups = query_states_1_groups_0, pad = query_states_1_pad_0, pad_type = query_states_1_pad_type_0, strides = query_states_1_strides_0, weight = var_179_to_fp16, x = var_157_cast_fp16_0)[name = string("query_states_1_cast_fp16")]; + string key_states_1_pad_type_0 = const()[name = string("key_states_1_pad_type_0"), val = string("valid")]; + tensor key_states_1_strides_0 = const()[name = string("key_states_1_strides_0"), val = tensor([1, 1])]; + tensor key_states_1_pad_0 = const()[name = string("key_states_1_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor key_states_1_dilations_0 = const()[name = string("key_states_1_dilations_0"), val = tensor([1, 1])]; + int32 key_states_1_groups_0 = const()[name = string("key_states_1_groups_0"), val = int32(1)]; + tensor var_190_to_fp16 = const()[name = string("op_190_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(10502592)))]; + tensor key_states_1_cast_fp16 = conv(dilations = key_states_1_dilations_0, groups = key_states_1_groups_0, pad = key_states_1_pad_0, pad_type = key_states_1_pad_type_0, strides = key_states_1_strides_0, weight = var_190_to_fp16, x = var_157_cast_fp16_0)[name = string("key_states_1_cast_fp16")]; + string value_states_1_pad_type_0 = const()[name = string("value_states_1_pad_type_0"), val = string("valid")]; + tensor value_states_1_strides_0 = const()[name = string("value_states_1_strides_0"), val = tensor([1, 1])]; + tensor value_states_1_pad_0 = const()[name = string("value_states_1_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor value_states_1_dilations_0 = const()[name = string("value_states_1_dilations_0"), val = tensor([1, 1])]; + int32 value_states_1_groups_0 = const()[name = string("value_states_1_groups_0"), val = int32(1)]; + tensor var_201_to_fp16 = const()[name = string("op_201_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(10764800)))]; + tensor value_states_1_cast_fp16 = conv(dilations = value_states_1_dilations_0, groups = value_states_1_groups_0, pad = value_states_1_pad_0, pad_type = value_states_1_pad_type_0, strides = value_states_1_strides_0, weight = var_201_to_fp16, x = var_157_cast_fp16_0)[name = string("value_states_1_cast_fp16")]; + tensor var_209 = const()[name = string("op_209"), val = tensor([1, 16, 64, 32])]; + tensor embed_1_cast_fp16 = reshape(shape = var_209, x = query_states_1_cast_fp16)[name = string("embed_1_cast_fp16")]; + tensor var_213 = const()[name = string("op_213"), val = tensor([1, 2, 64, 32])]; + tensor var_214_cast_fp16 = reshape(shape = var_213, x = key_states_1_cast_fp16)[name = string("op_214_cast_fp16")]; + tensor embed_3_perm_0 = const()[name = string("embed_3_perm_0"), val = tensor([0, 1, 3, 2])]; + tensor var_220 = const()[name = string("op_220"), val = tensor([1, 2, 64, 32])]; + tensor var_221_cast_fp16 = reshape(shape = var_220, x = value_states_1_cast_fp16)[name = string("op_221_cast_fp16")]; + tensor value_states_3_perm_0 = const()[name = string("value_states_3_perm_0"), val = tensor([0, 1, 3, 2])]; + tensor cos_1_cast_fp16 = transpose(perm = cos_1_perm_0, x = cos_cast_fp16)[name = string("transpose_20")]; + tensor var_225_cast_fp16 = mul(x = embed_1_cast_fp16, y = cos_1_cast_fp16)[name = string("op_225_cast_fp16")]; + tensor var_226_split_sizes_0 = const()[name = string("op_226_split_sizes_0"), val = tensor([32, 32])]; + int32 var_226_axis_0 = const()[name = string("op_226_axis_0"), val = int32(-2)]; + tensor var_226_cast_fp16_0, tensor var_226_cast_fp16_1 = split(axis = var_226_axis_0, split_sizes = var_226_split_sizes_0, x = embed_1_cast_fp16)[name = string("op_226_cast_fp16")]; + fp16 const_7_promoted_to_fp16 = const()[name = string("const_7_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_228_cast_fp16 = mul(x = var_226_cast_fp16_1, y = const_7_promoted_to_fp16)[name = string("op_228_cast_fp16")]; + int32 var_230 = const()[name = string("op_230"), val = int32(-2)]; + bool var_231_interleave_0 = const()[name = string("op_231_interleave_0"), val = bool(false)]; + tensor var_231_cast_fp16 = concat(axis = var_230, interleave = var_231_interleave_0, values = (var_228_cast_fp16, var_226_cast_fp16_0))[name = string("op_231_cast_fp16")]; + tensor sin_1_cast_fp16 = transpose(perm = sin_1_perm_0, x = sin_cast_fp16)[name = string("transpose_19")]; + tensor var_232_cast_fp16 = mul(x = var_231_cast_fp16, y = sin_1_cast_fp16)[name = string("op_232_cast_fp16")]; + tensor query_states_3_cast_fp16 = add(x = var_225_cast_fp16, y = var_232_cast_fp16)[name = string("query_states_3_cast_fp16")]; + tensor embed_3_cast_fp16 = transpose(perm = embed_3_perm_0, x = var_214_cast_fp16)[name = string("transpose_17")]; + tensor var_235_cast_fp16 = mul(x = embed_3_cast_fp16, y = cos_cast_fp16)[name = string("op_235_cast_fp16")]; + tensor var_236_split_sizes_0 = const()[name = string("op_236_split_sizes_0"), val = tensor([32, 32])]; + int32 var_236_axis_0 = const()[name = string("op_236_axis_0"), val = int32(-1)]; + tensor var_236_cast_fp16_0, tensor var_236_cast_fp16_1 = split(axis = var_236_axis_0, split_sizes = var_236_split_sizes_0, x = embed_3_cast_fp16)[name = string("op_236_cast_fp16")]; + fp16 const_8_promoted_to_fp16 = const()[name = string("const_8_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_238_cast_fp16 = mul(x = var_236_cast_fp16_1, y = const_8_promoted_to_fp16)[name = string("op_238_cast_fp16")]; + int32 var_240 = const()[name = string("op_240"), val = int32(-1)]; + bool var_241_interleave_0 = const()[name = string("op_241_interleave_0"), val = bool(false)]; + tensor var_241_cast_fp16 = concat(axis = var_240, interleave = var_241_interleave_0, values = (var_238_cast_fp16, var_236_cast_fp16_0))[name = string("op_241_cast_fp16")]; + tensor var_242_cast_fp16 = mul(x = var_241_cast_fp16, y = sin_cast_fp16)[name = string("op_242_cast_fp16")]; + tensor key_states_3_cast_fp16 = add(x = var_235_cast_fp16, y = var_242_cast_fp16)[name = string("key_states_3_cast_fp16")]; + tensor var_246 = add(x = position_id, y = q_len_1)[name = string("op_246")]; + tensor read_state_0 = read_state(input = key_cache)[name = string("read_state_0")]; + tensor expand_dims_1 = const()[name = string("expand_dims_1"), val = tensor([0])]; + tensor expand_dims_2 = const()[name = string("expand_dims_2"), val = tensor([0])]; + tensor expand_dims_4 = const()[name = string("expand_dims_4"), val = tensor([1])]; + tensor concat_3_values3_0 = const()[name = string("concat_3_values3_0"), val = tensor([0])]; + int32 concat_3_axis_0 = const()[name = string("concat_3_axis_0"), val = int32(0)]; + bool concat_3_interleave_0 = const()[name = string("concat_3_interleave_0"), val = bool(false)]; + tensor concat_3 = concat(axis = concat_3_axis_0, interleave = concat_3_interleave_0, values = (expand_dims_1, expand_dims_2, position_id, concat_3_values3_0))[name = string("concat_3")]; + tensor concat_4_values1_0 = const()[name = string("concat_4_values1_0"), val = tensor([0])]; + tensor concat_4_values3_0 = const()[name = string("concat_4_values3_0"), val = tensor([0])]; + int32 concat_4_axis_0 = const()[name = string("concat_4_axis_0"), val = int32(0)]; + bool concat_4_interleave_0 = const()[name = string("concat_4_interleave_0"), val = bool(false)]; + tensor concat_4 = concat(axis = concat_4_axis_0, interleave = concat_4_interleave_0, values = (expand_dims_4, concat_4_values1_0, var_246, concat_4_values3_0))[name = string("concat_4")]; + tensor key_cache_internal_tensor_assign_1_stride_0 = const()[name = string("key_cache_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor key_cache_internal_tensor_assign_1_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false, true])]; + tensor key_cache_internal_tensor_assign_1_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_1_end_mask_0"), val = tensor([false, true, false, true])]; + tensor key_cache_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor key_cache_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_3, begin_mask = key_cache_internal_tensor_assign_1_begin_mask_0, end = concat_4, end_mask = key_cache_internal_tensor_assign_1_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_1_squeeze_mask_0, stride = key_cache_internal_tensor_assign_1_stride_0, update = key_states_3_cast_fp16, x = read_state_0)[name = string("key_cache_internal_tensor_assign_1_cast_fp16")]; + write_state(data = key_cache_internal_tensor_assign_1_cast_fp16, input = key_cache)[name = string("coreml_update_state_48_write_state")]; + tensor coreml_update_state_12 = read_state(input = key_cache)[name = string("coreml_update_state_48")]; + tensor read_state_1 = read_state(input = value_cache)[name = string("read_state_1")]; + tensor value_cache_internal_tensor_assign_1_stride_0 = const()[name = string("value_cache_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor value_cache_internal_tensor_assign_1_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false, true])]; + tensor value_cache_internal_tensor_assign_1_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_1_end_mask_0"), val = tensor([false, true, false, true])]; + tensor value_cache_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor value_states_3_cast_fp16 = transpose(perm = value_states_3_perm_0, x = var_221_cast_fp16)[name = string("transpose_16")]; + tensor value_cache_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_3, begin_mask = value_cache_internal_tensor_assign_1_begin_mask_0, end = concat_4, end_mask = value_cache_internal_tensor_assign_1_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_1_squeeze_mask_0, stride = value_cache_internal_tensor_assign_1_stride_0, update = value_states_3_cast_fp16, x = read_state_1)[name = string("value_cache_internal_tensor_assign_1_cast_fp16")]; + write_state(data = value_cache_internal_tensor_assign_1_cast_fp16, input = value_cache)[name = string("coreml_update_state_49_write_state")]; + tensor coreml_update_state_13 = read_state(input = value_cache)[name = string("coreml_update_state_49")]; + tensor var_285_begin_0 = const()[name = string("op_285_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_285_end_0 = const()[name = string("op_285_end_0"), val = tensor([1, 2, 2048, 64])]; + tensor var_285_end_mask_0 = const()[name = string("op_285_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_285_cast_fp16 = slice_by_index(begin = var_285_begin_0, end = var_285_end_0, end_mask = var_285_end_mask_0, x = coreml_update_state_12)[name = string("op_285_cast_fp16")]; + tensor tile_0 = const()[name = string("tile_0"), val = tensor([1, 1])]; + int32 var_288_axis_0 = const()[name = string("op_288_axis_0"), val = int32(1)]; + tensor var_288_cast_fp16_0, tensor var_288_cast_fp16_1 = split(axis = var_288_axis_0, split_sizes = tile_0, x = var_285_cast_fp16)[name = string("op_288_cast_fp16")]; + tensor var_295_begin_0 = const()[name = string("op_295_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_295_end_0 = const()[name = string("op_295_end_0"), val = tensor([1, 2, 2048, 64])]; + tensor var_295_end_mask_0 = const()[name = string("op_295_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_295_cast_fp16 = slice_by_index(begin = var_295_begin_0, end = var_295_end_0, end_mask = var_295_end_mask_0, x = coreml_update_state_13)[name = string("op_295_cast_fp16")]; + tensor tile_1 = const()[name = string("tile_1"), val = tensor([1, 1])]; + int32 var_298_axis_0 = const()[name = string("op_298_axis_0"), val = int32(1)]; + tensor var_298_cast_fp16_0, tensor var_298_cast_fp16_1 = split(axis = var_298_axis_0, split_sizes = tile_1, x = var_295_cast_fp16)[name = string("op_298_cast_fp16")]; + tensor var_301_split_sizes_0 = const()[name = string("op_301_split_sizes_0"), val = tensor([8, 8])]; + int32 var_301_axis_0 = const()[name = string("op_301_axis_0"), val = int32(1)]; + tensor var_301_cast_fp16_0, tensor var_301_cast_fp16_1 = split(axis = var_301_axis_0, split_sizes = var_301_split_sizes_0, x = query_states_3_cast_fp16)[name = string("op_301_cast_fp16")]; + bool attn_weights_1_transpose_x_0 = const()[name = string("attn_weights_1_transpose_x_0"), val = bool(false)]; + bool attn_weights_1_transpose_y_0 = const()[name = string("attn_weights_1_transpose_y_0"), val = bool(false)]; + tensor attn_weights_1_cast_fp16 = matmul(transpose_x = attn_weights_1_transpose_x_0, transpose_y = attn_weights_1_transpose_y_0, x = var_288_cast_fp16_0, y = var_301_cast_fp16_0)[name = string("attn_weights_1_cast_fp16")]; + fp16 _inversed_attn_weights_3_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_3_y_0_to_fp16"), val = fp16(0x1p-3)]; + tensor _inversed_attn_weights_3_cast_fp16 = mul(x = attn_weights_1_cast_fp16, y = _inversed_attn_weights_3_y_0_to_fp16)[name = string("_inversed_attn_weights_3_cast_fp16")]; + tensor attn_mask_cast_fp16 = transpose(perm = attn_mask_perm_0, x = attention_mask_cast_fp16)[name = string("transpose_18")]; + tensor attn_weights_5_cast_fp16 = add(x = _inversed_attn_weights_3_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_5_cast_fp16")]; + int32 var_308 = const()[name = string("op_308"), val = int32(2)]; + tensor attn_weights_7_cast_fp16 = softmax(axis = var_308, x = attn_weights_5_cast_fp16)[name = string("attn_weights_7_cast_fp16")]; + bool var_314_transpose_x_1 = const()[name = string("op_314_transpose_x_1"), val = bool(true)]; + bool var_314_transpose_y_1 = const()[name = string("op_314_transpose_y_1"), val = bool(false)]; + tensor var_314_cast_fp16 = matmul(transpose_x = var_314_transpose_x_1, transpose_y = var_314_transpose_y_1, x = attn_weights_7_cast_fp16, y = var_298_cast_fp16_0)[name = string("op_314_cast_fp16")]; + bool attn_weights_9_transpose_x_0 = const()[name = string("attn_weights_9_transpose_x_0"), val = bool(false)]; + bool attn_weights_9_transpose_y_0 = const()[name = string("attn_weights_9_transpose_y_0"), val = bool(false)]; + tensor attn_weights_9_cast_fp16 = matmul(transpose_x = attn_weights_9_transpose_x_0, transpose_y = attn_weights_9_transpose_y_0, x = var_288_cast_fp16_1, y = var_301_cast_fp16_1)[name = string("attn_weights_9_cast_fp16")]; + fp16 _inversed_attn_weights_11_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_11_y_0_to_fp16"), val = fp16(0x1p-3)]; + tensor _inversed_attn_weights_11_cast_fp16 = mul(x = attn_weights_9_cast_fp16, y = _inversed_attn_weights_11_y_0_to_fp16)[name = string("_inversed_attn_weights_11_cast_fp16")]; + tensor attn_weights_13_cast_fp16 = add(x = _inversed_attn_weights_11_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_13_cast_fp16")]; + int32 var_320 = const()[name = string("op_320"), val = int32(2)]; + tensor attn_weights_15_cast_fp16 = softmax(axis = var_320, x = attn_weights_13_cast_fp16)[name = string("attn_weights_15_cast_fp16")]; + bool attn_output_1_transpose_x_1 = const()[name = string("attn_output_1_transpose_x_1"), val = bool(true)]; + bool attn_output_1_transpose_y_1 = const()[name = string("attn_output_1_transpose_y_1"), val = bool(false)]; + tensor attn_output_1_cast_fp16 = matmul(transpose_x = attn_output_1_transpose_x_1, transpose_y = attn_output_1_transpose_y_1, x = attn_weights_15_cast_fp16, y = var_298_cast_fp16_1)[name = string("attn_output_1_cast_fp16")]; + int32 var_328 = const()[name = string("op_328"), val = int32(1)]; + bool attn_output_3_interleave_0 = const()[name = string("attn_output_3_interleave_0"), val = bool(false)]; + tensor attn_output_3_cast_fp16 = concat(axis = var_328, interleave = attn_output_3_interleave_0, values = (var_314_cast_fp16, attn_output_1_cast_fp16))[name = string("attn_output_3_cast_fp16")]; + tensor var_332_perm_0 = const()[name = string("op_332_perm_0"), val = tensor([0, 1, 3, 2])]; + tensor var_337 = const()[name = string("op_337"), val = tensor([1, 1024, 1, 32])]; + tensor var_332_cast_fp16 = transpose(perm = var_332_perm_0, x = attn_output_3_cast_fp16)[name = string("transpose_15")]; + tensor x_5_cast_fp16 = reshape(shape = var_337, x = var_332_cast_fp16)[name = string("x_5_cast_fp16")]; + string hidden_states_3_pad_type_0 = const()[name = string("hidden_states_3_pad_type_0"), val = string("valid")]; + tensor hidden_states_3_strides_0 = const()[name = string("hidden_states_3_strides_0"), val = tensor([1, 1])]; + tensor hidden_states_3_pad_0 = const()[name = string("hidden_states_3_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor hidden_states_3_dilations_0 = const()[name = string("hidden_states_3_dilations_0"), val = tensor([1, 1])]; + int32 hidden_states_3_groups_0 = const()[name = string("hidden_states_3_groups_0"), val = int32(1)]; + tensor var_344_to_fp16 = const()[name = string("op_344_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(11027008)))]; + tensor hidden_states_3_cast_fp16 = conv(dilations = hidden_states_3_dilations_0, groups = hidden_states_3_groups_0, pad = hidden_states_3_pad_0, pad_type = hidden_states_3_pad_type_0, strides = hidden_states_3_strides_0, weight = var_344_to_fp16, x = x_5_cast_fp16)[name = string("hidden_states_3_cast_fp16")]; + tensor x_7_cast_fp16 = add(x = inputs_embeds, y = hidden_states_3_cast_fp16)[name = string("x_7_cast_fp16")]; + int32 var_356 = const()[name = string("op_356"), val = int32(1)]; + fp16 const_13_promoted_to_fp16 = const()[name = string("const_13_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_359_cast_fp16 = mul(x = x_7_cast_fp16, y = const_13_promoted_to_fp16)[name = string("op_359_cast_fp16")]; + bool x_9_interleave_0 = const()[name = string("x_9_interleave_0"), val = bool(false)]; + tensor x_9_cast_fp16 = concat(axis = var_356, interleave = x_9_interleave_0, values = (x_7_cast_fp16, var_359_cast_fp16))[name = string("x_9_cast_fp16")]; + tensor out_7_axes_0 = const()[name = string("out_7_axes_0"), val = tensor([1])]; + fp16 var_369_to_fp16 = const()[name = string("op_369_to_fp16"), val = fp16(0x1.5p-17)]; + tensor out_7_cast_fp16 = layer_norm(axes = out_7_axes_0, epsilon = var_369_to_fp16, x = x_9_cast_fp16)[name = string("out_7_cast_fp16")]; + tensor layer_layers_0_post_attention_layernorm_weight_to_fp16 = const()[name = string("layer_layers_0_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(13124224)))]; + tensor out_9_cast_fp16 = mul(x = out_7_cast_fp16, y = layer_layers_0_post_attention_layernorm_weight_to_fp16)[name = string("out_9_cast_fp16")]; + tensor var_375_split_sizes_0 = const()[name = string("op_375_split_sizes_0"), val = tensor([1024, 1024])]; + int32 var_375_axis_0 = const()[name = string("op_375_axis_0"), val = int32(1)]; + tensor var_375_cast_fp16_0, tensor var_375_cast_fp16_1 = split(axis = var_375_axis_0, split_sizes = var_375_split_sizes_0, x = out_9_cast_fp16)[name = string("op_375_cast_fp16")]; + string input_1_pad_type_0 = const()[name = string("input_1_pad_type_0"), val = string("valid")]; + tensor input_1_strides_0 = const()[name = string("input_1_strides_0"), val = tensor([1, 1])]; + tensor input_1_pad_0 = const()[name = string("input_1_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor input_1_dilations_0 = const()[name = string("input_1_dilations_0"), val = tensor([1, 1])]; + int32 input_1_groups_0 = const()[name = string("input_1_groups_0"), val = int32(1)]; + tensor var_380_to_fp16 = const()[name = string("op_380_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(13128384)))]; + tensor input_1_cast_fp16 = conv(dilations = input_1_dilations_0, groups = input_1_groups_0, pad = input_1_pad_0, pad_type = input_1_pad_type_0, strides = input_1_strides_0, weight = var_380_to_fp16, x = var_375_cast_fp16_0)[name = string("input_1_cast_fp16")]; + tensor var_391_cast_fp16 = silu(x = input_1_cast_fp16)[name = string("op_391_cast_fp16")]; + string var_396_pad_type_0 = const()[name = string("op_396_pad_type_0"), val = string("valid")]; + tensor var_396_strides_0 = const()[name = string("op_396_strides_0"), val = tensor([1, 1])]; + tensor var_396_pad_0 = const()[name = string("op_396_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_396_dilations_0 = const()[name = string("op_396_dilations_0"), val = tensor([1, 1])]; + int32 var_396_groups_0 = const()[name = string("op_396_groups_0"), val = int32(1)]; + tensor var_379_to_fp16 = const()[name = string("op_379_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(21517056)))]; + tensor var_396_cast_fp16 = conv(dilations = var_396_dilations_0, groups = var_396_groups_0, pad = var_396_pad_0, pad_type = var_396_pad_type_0, strides = var_396_strides_0, weight = var_379_to_fp16, x = var_375_cast_fp16_0)[name = string("op_396_cast_fp16")]; + tensor x_15_cast_fp16 = mul(x = var_391_cast_fp16, y = var_396_cast_fp16)[name = string("x_15_cast_fp16")]; + string hidden_states_5_pad_type_0 = const()[name = string("hidden_states_5_pad_type_0"), val = string("valid")]; + tensor hidden_states_5_strides_0 = const()[name = string("hidden_states_5_strides_0"), val = tensor([1, 1])]; + tensor hidden_states_5_pad_0 = const()[name = string("hidden_states_5_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor hidden_states_5_dilations_0 = const()[name = string("hidden_states_5_dilations_0"), val = tensor([1, 1])]; + int32 hidden_states_5_groups_0 = const()[name = string("hidden_states_5_groups_0"), val = int32(1)]; + tensor var_378_to_fp16 = const()[name = string("op_378_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(29905728)))]; + tensor hidden_states_5_cast_fp16 = conv(dilations = hidden_states_5_dilations_0, groups = hidden_states_5_groups_0, pad = hidden_states_5_pad_0, pad_type = hidden_states_5_pad_type_0, strides = hidden_states_5_strides_0, weight = var_378_to_fp16, x = x_15_cast_fp16)[name = string("hidden_states_5_cast_fp16")]; + tensor x_17_cast_fp16 = add(x = x_7_cast_fp16, y = hidden_states_5_cast_fp16)[name = string("x_17_cast_fp16")]; + int32 var_409 = const()[name = string("op_409"), val = int32(1)]; + fp16 const_14_promoted_to_fp16 = const()[name = string("const_14_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_412_cast_fp16 = mul(x = x_17_cast_fp16, y = const_14_promoted_to_fp16)[name = string("op_412_cast_fp16")]; + bool x_19_interleave_0 = const()[name = string("x_19_interleave_0"), val = bool(false)]; + tensor x_19_cast_fp16 = concat(axis = var_409, interleave = x_19_interleave_0, values = (x_17_cast_fp16, var_412_cast_fp16))[name = string("x_19_cast_fp16")]; + tensor out_13_axes_0 = const()[name = string("out_13_axes_0"), val = tensor([1])]; + fp16 var_422_to_fp16 = const()[name = string("op_422_to_fp16"), val = fp16(0x1.5p-17)]; + tensor out_13_cast_fp16 = layer_norm(axes = out_13_axes_0, epsilon = var_422_to_fp16, x = x_19_cast_fp16)[name = string("out_13_cast_fp16")]; + tensor layer_layers_1_input_layernorm_weight_to_fp16 = const()[name = string("layer_layers_1_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(38294400)))]; + tensor out_15_cast_fp16 = mul(x = out_13_cast_fp16, y = layer_layers_1_input_layernorm_weight_to_fp16)[name = string("out_15_cast_fp16")]; + tensor var_428_split_sizes_0 = const()[name = string("op_428_split_sizes_0"), val = tensor([1024, 1024])]; + int32 var_428_axis_0 = const()[name = string("op_428_axis_0"), val = int32(1)]; + tensor var_428_cast_fp16_0, tensor var_428_cast_fp16_1 = split(axis = var_428_axis_0, split_sizes = var_428_split_sizes_0, x = out_15_cast_fp16)[name = string("op_428_cast_fp16")]; + string query_states_5_pad_type_0 = const()[name = string("query_states_5_pad_type_0"), val = string("valid")]; + tensor query_states_5_strides_0 = const()[name = string("query_states_5_strides_0"), val = tensor([1, 1])]; + tensor query_states_5_pad_0 = const()[name = string("query_states_5_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor query_states_5_dilations_0 = const()[name = string("query_states_5_dilations_0"), val = tensor([1, 1])]; + int32 query_states_5_groups_0 = const()[name = string("query_states_5_groups_0"), val = int32(1)]; + tensor var_450_to_fp16 = const()[name = string("op_450_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(38298560)))]; + tensor query_states_5_cast_fp16 = conv(dilations = query_states_5_dilations_0, groups = query_states_5_groups_0, pad = query_states_5_pad_0, pad_type = query_states_5_pad_type_0, strides = query_states_5_strides_0, weight = var_450_to_fp16, x = var_428_cast_fp16_0)[name = string("query_states_5_cast_fp16")]; + string key_states_5_pad_type_0 = const()[name = string("key_states_5_pad_type_0"), val = string("valid")]; + tensor key_states_5_strides_0 = const()[name = string("key_states_5_strides_0"), val = tensor([1, 1])]; + tensor key_states_5_pad_0 = const()[name = string("key_states_5_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor key_states_5_dilations_0 = const()[name = string("key_states_5_dilations_0"), val = tensor([1, 1])]; + int32 key_states_5_groups_0 = const()[name = string("key_states_5_groups_0"), val = int32(1)]; + tensor var_461_to_fp16 = const()[name = string("op_461_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(40395776)))]; + tensor key_states_5_cast_fp16 = conv(dilations = key_states_5_dilations_0, groups = key_states_5_groups_0, pad = key_states_5_pad_0, pad_type = key_states_5_pad_type_0, strides = key_states_5_strides_0, weight = var_461_to_fp16, x = var_428_cast_fp16_0)[name = string("key_states_5_cast_fp16")]; + string value_states_5_pad_type_0 = const()[name = string("value_states_5_pad_type_0"), val = string("valid")]; + tensor value_states_5_strides_0 = const()[name = string("value_states_5_strides_0"), val = tensor([1, 1])]; + tensor value_states_5_pad_0 = const()[name = string("value_states_5_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor value_states_5_dilations_0 = const()[name = string("value_states_5_dilations_0"), val = tensor([1, 1])]; + int32 value_states_5_groups_0 = const()[name = string("value_states_5_groups_0"), val = int32(1)]; + tensor var_472_to_fp16 = const()[name = string("op_472_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(40657984)))]; + tensor value_states_5_cast_fp16 = conv(dilations = value_states_5_dilations_0, groups = value_states_5_groups_0, pad = value_states_5_pad_0, pad_type = value_states_5_pad_type_0, strides = value_states_5_strides_0, weight = var_472_to_fp16, x = var_428_cast_fp16_0)[name = string("value_states_5_cast_fp16")]; + tensor var_480 = const()[name = string("op_480"), val = tensor([1, 16, 64, 32])]; + tensor embed_5_cast_fp16 = reshape(shape = var_480, x = query_states_5_cast_fp16)[name = string("embed_5_cast_fp16")]; + tensor var_484 = const()[name = string("op_484"), val = tensor([1, 2, 64, 32])]; + tensor var_485_cast_fp16 = reshape(shape = var_484, x = key_states_5_cast_fp16)[name = string("op_485_cast_fp16")]; + tensor embed_7_perm_0 = const()[name = string("embed_7_perm_0"), val = tensor([0, 1, 3, 2])]; + tensor var_491 = const()[name = string("op_491"), val = tensor([1, 2, 64, 32])]; + tensor var_492_cast_fp16 = reshape(shape = var_491, x = value_states_5_cast_fp16)[name = string("op_492_cast_fp16")]; + tensor value_states_7_perm_0 = const()[name = string("value_states_7_perm_0"), val = tensor([0, 1, 3, 2])]; + tensor var_496_cast_fp16 = mul(x = embed_5_cast_fp16, y = cos_1_cast_fp16)[name = string("op_496_cast_fp16")]; + tensor var_497_split_sizes_0 = const()[name = string("op_497_split_sizes_0"), val = tensor([32, 32])]; + int32 var_497_axis_0 = const()[name = string("op_497_axis_0"), val = int32(-2)]; + tensor var_497_cast_fp16_0, tensor var_497_cast_fp16_1 = split(axis = var_497_axis_0, split_sizes = var_497_split_sizes_0, x = embed_5_cast_fp16)[name = string("op_497_cast_fp16")]; + fp16 const_17_promoted_to_fp16 = const()[name = string("const_17_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_499_cast_fp16 = mul(x = var_497_cast_fp16_1, y = const_17_promoted_to_fp16)[name = string("op_499_cast_fp16")]; + int32 var_501 = const()[name = string("op_501"), val = int32(-2)]; + bool var_502_interleave_0 = const()[name = string("op_502_interleave_0"), val = bool(false)]; + tensor var_502_cast_fp16 = concat(axis = var_501, interleave = var_502_interleave_0, values = (var_499_cast_fp16, var_497_cast_fp16_0))[name = string("op_502_cast_fp16")]; + tensor var_503_cast_fp16 = mul(x = var_502_cast_fp16, y = sin_1_cast_fp16)[name = string("op_503_cast_fp16")]; + tensor query_states_7_cast_fp16 = add(x = var_496_cast_fp16, y = var_503_cast_fp16)[name = string("query_states_7_cast_fp16")]; + tensor embed_7_cast_fp16 = transpose(perm = embed_7_perm_0, x = var_485_cast_fp16)[name = string("transpose_14")]; + tensor var_506_cast_fp16 = mul(x = embed_7_cast_fp16, y = cos_cast_fp16)[name = string("op_506_cast_fp16")]; + tensor var_507_split_sizes_0 = const()[name = string("op_507_split_sizes_0"), val = tensor([32, 32])]; + int32 var_507_axis_0 = const()[name = string("op_507_axis_0"), val = int32(-1)]; + tensor var_507_cast_fp16_0, tensor var_507_cast_fp16_1 = split(axis = var_507_axis_0, split_sizes = var_507_split_sizes_0, x = embed_7_cast_fp16)[name = string("op_507_cast_fp16")]; + fp16 const_18_promoted_to_fp16 = const()[name = string("const_18_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_509_cast_fp16 = mul(x = var_507_cast_fp16_1, y = const_18_promoted_to_fp16)[name = string("op_509_cast_fp16")]; + int32 var_511 = const()[name = string("op_511"), val = int32(-1)]; + bool var_512_interleave_0 = const()[name = string("op_512_interleave_0"), val = bool(false)]; + tensor var_512_cast_fp16 = concat(axis = var_511, interleave = var_512_interleave_0, values = (var_509_cast_fp16, var_507_cast_fp16_0))[name = string("op_512_cast_fp16")]; + tensor var_513_cast_fp16 = mul(x = var_512_cast_fp16, y = sin_cast_fp16)[name = string("op_513_cast_fp16")]; + tensor key_states_7_cast_fp16 = add(x = var_506_cast_fp16, y = var_513_cast_fp16)[name = string("key_states_7_cast_fp16")]; + tensor expand_dims_11 = const()[name = string("expand_dims_11"), val = tensor([1])]; + tensor expand_dims_12 = const()[name = string("expand_dims_12"), val = tensor([0])]; + tensor expand_dims_14 = const()[name = string("expand_dims_14"), val = tensor([2])]; + tensor concat_11_values3_0 = const()[name = string("concat_11_values3_0"), val = tensor([0])]; + int32 concat_11_axis_0 = const()[name = string("concat_11_axis_0"), val = int32(0)]; + bool concat_11_interleave_0 = const()[name = string("concat_11_interleave_0"), val = bool(false)]; + tensor concat_11 = concat(axis = concat_11_axis_0, interleave = concat_11_interleave_0, values = (expand_dims_11, expand_dims_12, position_id, concat_11_values3_0))[name = string("concat_11")]; + tensor concat_12_values1_0 = const()[name = string("concat_12_values1_0"), val = tensor([0])]; + tensor concat_12_values3_0 = const()[name = string("concat_12_values3_0"), val = tensor([0])]; + int32 concat_12_axis_0 = const()[name = string("concat_12_axis_0"), val = int32(0)]; + bool concat_12_interleave_0 = const()[name = string("concat_12_interleave_0"), val = bool(false)]; + tensor concat_12 = concat(axis = concat_12_axis_0, interleave = concat_12_interleave_0, values = (expand_dims_14, concat_12_values1_0, var_246, concat_12_values3_0))[name = string("concat_12")]; + tensor key_cache_internal_tensor_assign_2_stride_0 = const()[name = string("key_cache_internal_tensor_assign_2_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor key_cache_internal_tensor_assign_2_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_2_begin_mask_0"), val = tensor([false, false, false, true])]; + tensor key_cache_internal_tensor_assign_2_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_2_end_mask_0"), val = tensor([false, true, false, true])]; + tensor key_cache_internal_tensor_assign_2_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_2_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor key_cache_internal_tensor_assign_2_cast_fp16 = slice_update(begin = concat_11, begin_mask = key_cache_internal_tensor_assign_2_begin_mask_0, end = concat_12, end_mask = key_cache_internal_tensor_assign_2_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_2_squeeze_mask_0, stride = key_cache_internal_tensor_assign_2_stride_0, update = key_states_7_cast_fp16, x = coreml_update_state_12)[name = string("key_cache_internal_tensor_assign_2_cast_fp16")]; + write_state(data = key_cache_internal_tensor_assign_2_cast_fp16, input = key_cache)[name = string("coreml_update_state_50_write_state")]; + tensor coreml_update_state_14 = read_state(input = key_cache)[name = string("coreml_update_state_50")]; + tensor value_cache_internal_tensor_assign_2_stride_0 = const()[name = string("value_cache_internal_tensor_assign_2_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor value_cache_internal_tensor_assign_2_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_2_begin_mask_0"), val = tensor([false, false, false, true])]; + tensor value_cache_internal_tensor_assign_2_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_2_end_mask_0"), val = tensor([false, true, false, true])]; + tensor value_cache_internal_tensor_assign_2_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_2_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor value_states_7_cast_fp16 = transpose(perm = value_states_7_perm_0, x = var_492_cast_fp16)[name = string("transpose_13")]; + tensor value_cache_internal_tensor_assign_2_cast_fp16 = slice_update(begin = concat_11, begin_mask = value_cache_internal_tensor_assign_2_begin_mask_0, end = concat_12, end_mask = value_cache_internal_tensor_assign_2_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_2_squeeze_mask_0, stride = value_cache_internal_tensor_assign_2_stride_0, update = value_states_7_cast_fp16, x = coreml_update_state_13)[name = string("value_cache_internal_tensor_assign_2_cast_fp16")]; + write_state(data = value_cache_internal_tensor_assign_2_cast_fp16, input = value_cache)[name = string("coreml_update_state_51_write_state")]; + tensor coreml_update_state_15 = read_state(input = value_cache)[name = string("coreml_update_state_51")]; + tensor var_556_begin_0 = const()[name = string("op_556_begin_0"), val = tensor([1, 0, 0, 0])]; + tensor var_556_end_0 = const()[name = string("op_556_end_0"), val = tensor([2, 2, 2048, 64])]; + tensor var_556_end_mask_0 = const()[name = string("op_556_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_556_cast_fp16 = slice_by_index(begin = var_556_begin_0, end = var_556_end_0, end_mask = var_556_end_mask_0, x = coreml_update_state_14)[name = string("op_556_cast_fp16")]; + tensor tile_2 = const()[name = string("tile_2"), val = tensor([1, 1])]; + int32 var_559_axis_0 = const()[name = string("op_559_axis_0"), val = int32(1)]; + tensor var_559_cast_fp16_0, tensor var_559_cast_fp16_1 = split(axis = var_559_axis_0, split_sizes = tile_2, x = var_556_cast_fp16)[name = string("op_559_cast_fp16")]; + tensor var_566_begin_0 = const()[name = string("op_566_begin_0"), val = tensor([1, 0, 0, 0])]; + tensor var_566_end_0 = const()[name = string("op_566_end_0"), val = tensor([2, 2, 2048, 64])]; + tensor var_566_end_mask_0 = const()[name = string("op_566_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_566_cast_fp16 = slice_by_index(begin = var_566_begin_0, end = var_566_end_0, end_mask = var_566_end_mask_0, x = coreml_update_state_15)[name = string("op_566_cast_fp16")]; + tensor tile_3 = const()[name = string("tile_3"), val = tensor([1, 1])]; + int32 var_569_axis_0 = const()[name = string("op_569_axis_0"), val = int32(1)]; + tensor var_569_cast_fp16_0, tensor var_569_cast_fp16_1 = split(axis = var_569_axis_0, split_sizes = tile_3, x = var_566_cast_fp16)[name = string("op_569_cast_fp16")]; + tensor var_572_split_sizes_0 = const()[name = string("op_572_split_sizes_0"), val = tensor([8, 8])]; + int32 var_572_axis_0 = const()[name = string("op_572_axis_0"), val = int32(1)]; + tensor var_572_cast_fp16_0, tensor var_572_cast_fp16_1 = split(axis = var_572_axis_0, split_sizes = var_572_split_sizes_0, x = query_states_7_cast_fp16)[name = string("op_572_cast_fp16")]; + bool attn_weights_17_transpose_x_0 = const()[name = string("attn_weights_17_transpose_x_0"), val = bool(false)]; + bool attn_weights_17_transpose_y_0 = const()[name = string("attn_weights_17_transpose_y_0"), val = bool(false)]; + tensor attn_weights_17_cast_fp16 = matmul(transpose_x = attn_weights_17_transpose_x_0, transpose_y = attn_weights_17_transpose_y_0, x = var_559_cast_fp16_0, y = var_572_cast_fp16_0)[name = string("attn_weights_17_cast_fp16")]; + fp16 _inversed_attn_weights_19_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_19_y_0_to_fp16"), val = fp16(0x1p-3)]; + tensor _inversed_attn_weights_19_cast_fp16 = mul(x = attn_weights_17_cast_fp16, y = _inversed_attn_weights_19_y_0_to_fp16)[name = string("_inversed_attn_weights_19_cast_fp16")]; + tensor attn_weights_21_cast_fp16 = add(x = _inversed_attn_weights_19_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_21_cast_fp16")]; + int32 var_579 = const()[name = string("op_579"), val = int32(2)]; + tensor attn_weights_23_cast_fp16 = softmax(axis = var_579, x = attn_weights_21_cast_fp16)[name = string("attn_weights_23_cast_fp16")]; + bool var_585_transpose_x_1 = const()[name = string("op_585_transpose_x_1"), val = bool(true)]; + bool var_585_transpose_y_1 = const()[name = string("op_585_transpose_y_1"), val = bool(false)]; + tensor var_585_cast_fp16 = matmul(transpose_x = var_585_transpose_x_1, transpose_y = var_585_transpose_y_1, x = attn_weights_23_cast_fp16, y = var_569_cast_fp16_0)[name = string("op_585_cast_fp16")]; + bool attn_weights_25_transpose_x_0 = const()[name = string("attn_weights_25_transpose_x_0"), val = bool(false)]; + bool attn_weights_25_transpose_y_0 = const()[name = string("attn_weights_25_transpose_y_0"), val = bool(false)]; + tensor attn_weights_25_cast_fp16 = matmul(transpose_x = attn_weights_25_transpose_x_0, transpose_y = attn_weights_25_transpose_y_0, x = var_559_cast_fp16_1, y = var_572_cast_fp16_1)[name = string("attn_weights_25_cast_fp16")]; + fp16 _inversed_attn_weights_27_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_27_y_0_to_fp16"), val = fp16(0x1p-3)]; + tensor _inversed_attn_weights_27_cast_fp16 = mul(x = attn_weights_25_cast_fp16, y = _inversed_attn_weights_27_y_0_to_fp16)[name = string("_inversed_attn_weights_27_cast_fp16")]; + tensor attn_weights_29_cast_fp16 = add(x = _inversed_attn_weights_27_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_29_cast_fp16")]; + int32 var_591 = const()[name = string("op_591"), val = int32(2)]; + tensor attn_weights_31_cast_fp16 = softmax(axis = var_591, x = attn_weights_29_cast_fp16)[name = string("attn_weights_31_cast_fp16")]; + bool attn_output_7_transpose_x_1 = const()[name = string("attn_output_7_transpose_x_1"), val = bool(true)]; + bool attn_output_7_transpose_y_1 = const()[name = string("attn_output_7_transpose_y_1"), val = bool(false)]; + tensor attn_output_7_cast_fp16 = matmul(transpose_x = attn_output_7_transpose_x_1, transpose_y = attn_output_7_transpose_y_1, x = attn_weights_31_cast_fp16, y = var_569_cast_fp16_1)[name = string("attn_output_7_cast_fp16")]; + int32 var_599 = const()[name = string("op_599"), val = int32(1)]; + bool attn_output_9_interleave_0 = const()[name = string("attn_output_9_interleave_0"), val = bool(false)]; + tensor attn_output_9_cast_fp16 = concat(axis = var_599, interleave = attn_output_9_interleave_0, values = (var_585_cast_fp16, attn_output_7_cast_fp16))[name = string("attn_output_9_cast_fp16")]; + tensor var_603_perm_0 = const()[name = string("op_603_perm_0"), val = tensor([0, 1, 3, 2])]; + tensor var_608 = const()[name = string("op_608"), val = tensor([1, 1024, 1, 32])]; + tensor var_603_cast_fp16 = transpose(perm = var_603_perm_0, x = attn_output_9_cast_fp16)[name = string("transpose_12")]; + tensor x_23_cast_fp16 = reshape(shape = var_608, x = var_603_cast_fp16)[name = string("x_23_cast_fp16")]; + string hidden_states_9_pad_type_0 = const()[name = string("hidden_states_9_pad_type_0"), val = string("valid")]; + tensor hidden_states_9_strides_0 = const()[name = string("hidden_states_9_strides_0"), val = tensor([1, 1])]; + tensor hidden_states_9_pad_0 = const()[name = string("hidden_states_9_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor hidden_states_9_dilations_0 = const()[name = string("hidden_states_9_dilations_0"), val = tensor([1, 1])]; + int32 hidden_states_9_groups_0 = const()[name = string("hidden_states_9_groups_0"), val = int32(1)]; + tensor var_615_to_fp16 = const()[name = string("op_615_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(40920192)))]; + tensor hidden_states_9_cast_fp16 = conv(dilations = hidden_states_9_dilations_0, groups = hidden_states_9_groups_0, pad = hidden_states_9_pad_0, pad_type = hidden_states_9_pad_type_0, strides = hidden_states_9_strides_0, weight = var_615_to_fp16, x = x_23_cast_fp16)[name = string("hidden_states_9_cast_fp16")]; + tensor x_25_cast_fp16 = add(x = x_17_cast_fp16, y = hidden_states_9_cast_fp16)[name = string("x_25_cast_fp16")]; + int32 var_627 = const()[name = string("op_627"), val = int32(1)]; + fp16 const_23_promoted_to_fp16 = const()[name = string("const_23_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_630_cast_fp16 = mul(x = x_25_cast_fp16, y = const_23_promoted_to_fp16)[name = string("op_630_cast_fp16")]; + bool x_27_interleave_0 = const()[name = string("x_27_interleave_0"), val = bool(false)]; + tensor x_27_cast_fp16 = concat(axis = var_627, interleave = x_27_interleave_0, values = (x_25_cast_fp16, var_630_cast_fp16))[name = string("x_27_cast_fp16")]; + tensor out_19_axes_0 = const()[name = string("out_19_axes_0"), val = tensor([1])]; + fp16 var_640_to_fp16 = const()[name = string("op_640_to_fp16"), val = fp16(0x1.5p-17)]; + tensor out_19_cast_fp16 = layer_norm(axes = out_19_axes_0, epsilon = var_640_to_fp16, x = x_27_cast_fp16)[name = string("out_19_cast_fp16")]; + tensor layer_layers_1_post_attention_layernorm_weight_to_fp16 = const()[name = string("layer_layers_1_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(43017408)))]; + tensor out_21_cast_fp16 = mul(x = out_19_cast_fp16, y = layer_layers_1_post_attention_layernorm_weight_to_fp16)[name = string("out_21_cast_fp16")]; + tensor var_646_split_sizes_0 = const()[name = string("op_646_split_sizes_0"), val = tensor([1024, 1024])]; + int32 var_646_axis_0 = const()[name = string("op_646_axis_0"), val = int32(1)]; + tensor var_646_cast_fp16_0, tensor var_646_cast_fp16_1 = split(axis = var_646_axis_0, split_sizes = var_646_split_sizes_0, x = out_21_cast_fp16)[name = string("op_646_cast_fp16")]; + string input_3_pad_type_0 = const()[name = string("input_3_pad_type_0"), val = string("valid")]; + tensor input_3_strides_0 = const()[name = string("input_3_strides_0"), val = tensor([1, 1])]; + tensor input_3_pad_0 = const()[name = string("input_3_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor input_3_dilations_0 = const()[name = string("input_3_dilations_0"), val = tensor([1, 1])]; + int32 input_3_groups_0 = const()[name = string("input_3_groups_0"), val = int32(1)]; + tensor var_651_to_fp16 = const()[name = string("op_651_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(43021568)))]; + tensor input_3_cast_fp16 = conv(dilations = input_3_dilations_0, groups = input_3_groups_0, pad = input_3_pad_0, pad_type = input_3_pad_type_0, strides = input_3_strides_0, weight = var_651_to_fp16, x = var_646_cast_fp16_0)[name = string("input_3_cast_fp16")]; + tensor var_662_cast_fp16 = silu(x = input_3_cast_fp16)[name = string("op_662_cast_fp16")]; + string var_667_pad_type_0 = const()[name = string("op_667_pad_type_0"), val = string("valid")]; + tensor var_667_strides_0 = const()[name = string("op_667_strides_0"), val = tensor([1, 1])]; + tensor var_667_pad_0 = const()[name = string("op_667_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_667_dilations_0 = const()[name = string("op_667_dilations_0"), val = tensor([1, 1])]; + int32 var_667_groups_0 = const()[name = string("op_667_groups_0"), val = int32(1)]; + tensor var_650_to_fp16 = const()[name = string("op_650_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(51410240)))]; + tensor var_667_cast_fp16 = conv(dilations = var_667_dilations_0, groups = var_667_groups_0, pad = var_667_pad_0, pad_type = var_667_pad_type_0, strides = var_667_strides_0, weight = var_650_to_fp16, x = var_646_cast_fp16_0)[name = string("op_667_cast_fp16")]; + tensor x_33_cast_fp16 = mul(x = var_662_cast_fp16, y = var_667_cast_fp16)[name = string("x_33_cast_fp16")]; + string hidden_states_11_pad_type_0 = const()[name = string("hidden_states_11_pad_type_0"), val = string("valid")]; + tensor hidden_states_11_strides_0 = const()[name = string("hidden_states_11_strides_0"), val = tensor([1, 1])]; + tensor hidden_states_11_pad_0 = const()[name = string("hidden_states_11_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor hidden_states_11_dilations_0 = const()[name = string("hidden_states_11_dilations_0"), val = tensor([1, 1])]; + int32 hidden_states_11_groups_0 = const()[name = string("hidden_states_11_groups_0"), val = int32(1)]; + tensor var_649_to_fp16 = const()[name = string("op_649_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(59798912)))]; + tensor hidden_states_11_cast_fp16 = conv(dilations = hidden_states_11_dilations_0, groups = hidden_states_11_groups_0, pad = hidden_states_11_pad_0, pad_type = hidden_states_11_pad_type_0, strides = hidden_states_11_strides_0, weight = var_649_to_fp16, x = x_33_cast_fp16)[name = string("hidden_states_11_cast_fp16")]; + tensor x_35_cast_fp16 = add(x = x_25_cast_fp16, y = hidden_states_11_cast_fp16)[name = string("x_35_cast_fp16")]; + int32 var_680 = const()[name = string("op_680"), val = int32(1)]; + fp16 const_24_promoted_to_fp16 = const()[name = string("const_24_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_683_cast_fp16 = mul(x = x_35_cast_fp16, y = const_24_promoted_to_fp16)[name = string("op_683_cast_fp16")]; + bool x_37_interleave_0 = const()[name = string("x_37_interleave_0"), val = bool(false)]; + tensor x_37_cast_fp16 = concat(axis = var_680, interleave = x_37_interleave_0, values = (x_35_cast_fp16, var_683_cast_fp16))[name = string("x_37_cast_fp16")]; + tensor out_25_axes_0 = const()[name = string("out_25_axes_0"), val = tensor([1])]; + fp16 var_693_to_fp16 = const()[name = string("op_693_to_fp16"), val = fp16(0x1.5p-17)]; + tensor out_25_cast_fp16 = layer_norm(axes = out_25_axes_0, epsilon = var_693_to_fp16, x = x_37_cast_fp16)[name = string("out_25_cast_fp16")]; + tensor layer_layers_2_input_layernorm_weight_to_fp16 = const()[name = string("layer_layers_2_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(68187584)))]; + tensor out_27_cast_fp16 = mul(x = out_25_cast_fp16, y = layer_layers_2_input_layernorm_weight_to_fp16)[name = string("out_27_cast_fp16")]; + tensor var_699_split_sizes_0 = const()[name = string("op_699_split_sizes_0"), val = tensor([1024, 1024])]; + int32 var_699_axis_0 = const()[name = string("op_699_axis_0"), val = int32(1)]; + tensor var_699_cast_fp16_0, tensor var_699_cast_fp16_1 = split(axis = var_699_axis_0, split_sizes = var_699_split_sizes_0, x = out_27_cast_fp16)[name = string("op_699_cast_fp16")]; + string query_states_9_pad_type_0 = const()[name = string("query_states_9_pad_type_0"), val = string("valid")]; + tensor query_states_9_strides_0 = const()[name = string("query_states_9_strides_0"), val = tensor([1, 1])]; + tensor query_states_9_pad_0 = const()[name = string("query_states_9_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor query_states_9_dilations_0 = const()[name = string("query_states_9_dilations_0"), val = tensor([1, 1])]; + int32 query_states_9_groups_0 = const()[name = string("query_states_9_groups_0"), val = int32(1)]; + tensor var_721_to_fp16 = const()[name = string("op_721_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(68191744)))]; + tensor query_states_9_cast_fp16 = conv(dilations = query_states_9_dilations_0, groups = query_states_9_groups_0, pad = query_states_9_pad_0, pad_type = query_states_9_pad_type_0, strides = query_states_9_strides_0, weight = var_721_to_fp16, x = var_699_cast_fp16_0)[name = string("query_states_9_cast_fp16")]; + string key_states_9_pad_type_0 = const()[name = string("key_states_9_pad_type_0"), val = string("valid")]; + tensor key_states_9_strides_0 = const()[name = string("key_states_9_strides_0"), val = tensor([1, 1])]; + tensor key_states_9_pad_0 = const()[name = string("key_states_9_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor key_states_9_dilations_0 = const()[name = string("key_states_9_dilations_0"), val = tensor([1, 1])]; + int32 key_states_9_groups_0 = const()[name = string("key_states_9_groups_0"), val = int32(1)]; + tensor var_732_to_fp16 = const()[name = string("op_732_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(70288960)))]; + tensor key_states_9_cast_fp16 = conv(dilations = key_states_9_dilations_0, groups = key_states_9_groups_0, pad = key_states_9_pad_0, pad_type = key_states_9_pad_type_0, strides = key_states_9_strides_0, weight = var_732_to_fp16, x = var_699_cast_fp16_0)[name = string("key_states_9_cast_fp16")]; + string value_states_9_pad_type_0 = const()[name = string("value_states_9_pad_type_0"), val = string("valid")]; + tensor value_states_9_strides_0 = const()[name = string("value_states_9_strides_0"), val = tensor([1, 1])]; + tensor value_states_9_pad_0 = const()[name = string("value_states_9_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor value_states_9_dilations_0 = const()[name = string("value_states_9_dilations_0"), val = tensor([1, 1])]; + int32 value_states_9_groups_0 = const()[name = string("value_states_9_groups_0"), val = int32(1)]; + tensor var_743_to_fp16 = const()[name = string("op_743_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(70551168)))]; + tensor value_states_9_cast_fp16 = conv(dilations = value_states_9_dilations_0, groups = value_states_9_groups_0, pad = value_states_9_pad_0, pad_type = value_states_9_pad_type_0, strides = value_states_9_strides_0, weight = var_743_to_fp16, x = var_699_cast_fp16_0)[name = string("value_states_9_cast_fp16")]; + tensor var_751 = const()[name = string("op_751"), val = tensor([1, 16, 64, 32])]; + tensor embed_9_cast_fp16 = reshape(shape = var_751, x = query_states_9_cast_fp16)[name = string("embed_9_cast_fp16")]; + tensor var_755 = const()[name = string("op_755"), val = tensor([1, 2, 64, 32])]; + tensor var_756_cast_fp16 = reshape(shape = var_755, x = key_states_9_cast_fp16)[name = string("op_756_cast_fp16")]; + tensor embed_11_perm_0 = const()[name = string("embed_11_perm_0"), val = tensor([0, 1, 3, 2])]; + tensor var_762 = const()[name = string("op_762"), val = tensor([1, 2, 64, 32])]; + tensor var_763_cast_fp16 = reshape(shape = var_762, x = value_states_9_cast_fp16)[name = string("op_763_cast_fp16")]; + tensor value_states_11_perm_0 = const()[name = string("value_states_11_perm_0"), val = tensor([0, 1, 3, 2])]; + tensor var_767_cast_fp16 = mul(x = embed_9_cast_fp16, y = cos_1_cast_fp16)[name = string("op_767_cast_fp16")]; + tensor var_768_split_sizes_0 = const()[name = string("op_768_split_sizes_0"), val = tensor([32, 32])]; + int32 var_768_axis_0 = const()[name = string("op_768_axis_0"), val = int32(-2)]; + tensor var_768_cast_fp16_0, tensor var_768_cast_fp16_1 = split(axis = var_768_axis_0, split_sizes = var_768_split_sizes_0, x = embed_9_cast_fp16)[name = string("op_768_cast_fp16")]; + fp16 const_27_promoted_to_fp16 = const()[name = string("const_27_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_770_cast_fp16 = mul(x = var_768_cast_fp16_1, y = const_27_promoted_to_fp16)[name = string("op_770_cast_fp16")]; + int32 var_772 = const()[name = string("op_772"), val = int32(-2)]; + bool var_773_interleave_0 = const()[name = string("op_773_interleave_0"), val = bool(false)]; + tensor var_773_cast_fp16 = concat(axis = var_772, interleave = var_773_interleave_0, values = (var_770_cast_fp16, var_768_cast_fp16_0))[name = string("op_773_cast_fp16")]; + tensor var_774_cast_fp16 = mul(x = var_773_cast_fp16, y = sin_1_cast_fp16)[name = string("op_774_cast_fp16")]; + tensor query_states_11_cast_fp16 = add(x = var_767_cast_fp16, y = var_774_cast_fp16)[name = string("query_states_11_cast_fp16")]; + tensor embed_11_cast_fp16 = transpose(perm = embed_11_perm_0, x = var_756_cast_fp16)[name = string("transpose_11")]; + tensor var_777_cast_fp16 = mul(x = embed_11_cast_fp16, y = cos_cast_fp16)[name = string("op_777_cast_fp16")]; + tensor var_778_split_sizes_0 = const()[name = string("op_778_split_sizes_0"), val = tensor([32, 32])]; + int32 var_778_axis_0 = const()[name = string("op_778_axis_0"), val = int32(-1)]; + tensor var_778_cast_fp16_0, tensor var_778_cast_fp16_1 = split(axis = var_778_axis_0, split_sizes = var_778_split_sizes_0, x = embed_11_cast_fp16)[name = string("op_778_cast_fp16")]; + fp16 const_28_promoted_to_fp16 = const()[name = string("const_28_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_780_cast_fp16 = mul(x = var_778_cast_fp16_1, y = const_28_promoted_to_fp16)[name = string("op_780_cast_fp16")]; + int32 var_782 = const()[name = string("op_782"), val = int32(-1)]; + bool var_783_interleave_0 = const()[name = string("op_783_interleave_0"), val = bool(false)]; + tensor var_783_cast_fp16 = concat(axis = var_782, interleave = var_783_interleave_0, values = (var_780_cast_fp16, var_778_cast_fp16_0))[name = string("op_783_cast_fp16")]; + tensor var_784_cast_fp16 = mul(x = var_783_cast_fp16, y = sin_cast_fp16)[name = string("op_784_cast_fp16")]; + tensor key_states_11_cast_fp16 = add(x = var_777_cast_fp16, y = var_784_cast_fp16)[name = string("key_states_11_cast_fp16")]; + tensor expand_dims_21 = const()[name = string("expand_dims_21"), val = tensor([2])]; + tensor expand_dims_22 = const()[name = string("expand_dims_22"), val = tensor([0])]; + tensor expand_dims_24 = const()[name = string("expand_dims_24"), val = tensor([3])]; + tensor concat_19_values3_0 = const()[name = string("concat_19_values3_0"), val = tensor([0])]; + int32 concat_19_axis_0 = const()[name = string("concat_19_axis_0"), val = int32(0)]; + bool concat_19_interleave_0 = const()[name = string("concat_19_interleave_0"), val = bool(false)]; + tensor concat_19 = concat(axis = concat_19_axis_0, interleave = concat_19_interleave_0, values = (expand_dims_21, expand_dims_22, position_id, concat_19_values3_0))[name = string("concat_19")]; + tensor concat_20_values1_0 = const()[name = string("concat_20_values1_0"), val = tensor([0])]; + tensor concat_20_values3_0 = const()[name = string("concat_20_values3_0"), val = tensor([0])]; + int32 concat_20_axis_0 = const()[name = string("concat_20_axis_0"), val = int32(0)]; + bool concat_20_interleave_0 = const()[name = string("concat_20_interleave_0"), val = bool(false)]; + tensor concat_20 = concat(axis = concat_20_axis_0, interleave = concat_20_interleave_0, values = (expand_dims_24, concat_20_values1_0, var_246, concat_20_values3_0))[name = string("concat_20")]; + tensor key_cache_internal_tensor_assign_3_stride_0 = const()[name = string("key_cache_internal_tensor_assign_3_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor key_cache_internal_tensor_assign_3_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_3_begin_mask_0"), val = tensor([false, false, false, true])]; + tensor key_cache_internal_tensor_assign_3_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_3_end_mask_0"), val = tensor([false, true, false, true])]; + tensor key_cache_internal_tensor_assign_3_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_3_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor key_cache_internal_tensor_assign_3_cast_fp16 = slice_update(begin = concat_19, begin_mask = key_cache_internal_tensor_assign_3_begin_mask_0, end = concat_20, end_mask = key_cache_internal_tensor_assign_3_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_3_squeeze_mask_0, stride = key_cache_internal_tensor_assign_3_stride_0, update = key_states_11_cast_fp16, x = coreml_update_state_14)[name = string("key_cache_internal_tensor_assign_3_cast_fp16")]; + write_state(data = key_cache_internal_tensor_assign_3_cast_fp16, input = key_cache)[name = string("coreml_update_state_52_write_state")]; + tensor coreml_update_state_16 = read_state(input = key_cache)[name = string("coreml_update_state_52")]; + tensor value_cache_internal_tensor_assign_3_stride_0 = const()[name = string("value_cache_internal_tensor_assign_3_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor value_cache_internal_tensor_assign_3_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_3_begin_mask_0"), val = tensor([false, false, false, true])]; + tensor value_cache_internal_tensor_assign_3_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_3_end_mask_0"), val = tensor([false, true, false, true])]; + tensor value_cache_internal_tensor_assign_3_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_3_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor value_states_11_cast_fp16 = transpose(perm = value_states_11_perm_0, x = var_763_cast_fp16)[name = string("transpose_10")]; + tensor value_cache_internal_tensor_assign_3_cast_fp16 = slice_update(begin = concat_19, begin_mask = value_cache_internal_tensor_assign_3_begin_mask_0, end = concat_20, end_mask = value_cache_internal_tensor_assign_3_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_3_squeeze_mask_0, stride = value_cache_internal_tensor_assign_3_stride_0, update = value_states_11_cast_fp16, x = coreml_update_state_15)[name = string("value_cache_internal_tensor_assign_3_cast_fp16")]; + write_state(data = value_cache_internal_tensor_assign_3_cast_fp16, input = value_cache)[name = string("coreml_update_state_53_write_state")]; + tensor coreml_update_state_17 = read_state(input = value_cache)[name = string("coreml_update_state_53")]; + tensor var_827_begin_0 = const()[name = string("op_827_begin_0"), val = tensor([2, 0, 0, 0])]; + tensor var_827_end_0 = const()[name = string("op_827_end_0"), val = tensor([3, 2, 2048, 64])]; + tensor var_827_end_mask_0 = const()[name = string("op_827_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_827_cast_fp16 = slice_by_index(begin = var_827_begin_0, end = var_827_end_0, end_mask = var_827_end_mask_0, x = coreml_update_state_16)[name = string("op_827_cast_fp16")]; + tensor tile_4 = const()[name = string("tile_4"), val = tensor([1, 1])]; + int32 var_830_axis_0 = const()[name = string("op_830_axis_0"), val = int32(1)]; + tensor var_830_cast_fp16_0, tensor var_830_cast_fp16_1 = split(axis = var_830_axis_0, split_sizes = tile_4, x = var_827_cast_fp16)[name = string("op_830_cast_fp16")]; + tensor var_837_begin_0 = const()[name = string("op_837_begin_0"), val = tensor([2, 0, 0, 0])]; + tensor var_837_end_0 = const()[name = string("op_837_end_0"), val = tensor([3, 2, 2048, 64])]; + tensor var_837_end_mask_0 = const()[name = string("op_837_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_837_cast_fp16 = slice_by_index(begin = var_837_begin_0, end = var_837_end_0, end_mask = var_837_end_mask_0, x = coreml_update_state_17)[name = string("op_837_cast_fp16")]; + tensor tile_5 = const()[name = string("tile_5"), val = tensor([1, 1])]; + int32 var_840_axis_0 = const()[name = string("op_840_axis_0"), val = int32(1)]; + tensor var_840_cast_fp16_0, tensor var_840_cast_fp16_1 = split(axis = var_840_axis_0, split_sizes = tile_5, x = var_837_cast_fp16)[name = string("op_840_cast_fp16")]; + tensor var_843_split_sizes_0 = const()[name = string("op_843_split_sizes_0"), val = tensor([8, 8])]; + int32 var_843_axis_0 = const()[name = string("op_843_axis_0"), val = int32(1)]; + tensor var_843_cast_fp16_0, tensor var_843_cast_fp16_1 = split(axis = var_843_axis_0, split_sizes = var_843_split_sizes_0, x = query_states_11_cast_fp16)[name = string("op_843_cast_fp16")]; + bool attn_weights_33_transpose_x_0 = const()[name = string("attn_weights_33_transpose_x_0"), val = bool(false)]; + bool attn_weights_33_transpose_y_0 = const()[name = string("attn_weights_33_transpose_y_0"), val = bool(false)]; + tensor attn_weights_33_cast_fp16 = matmul(transpose_x = attn_weights_33_transpose_x_0, transpose_y = attn_weights_33_transpose_y_0, x = var_830_cast_fp16_0, y = var_843_cast_fp16_0)[name = string("attn_weights_33_cast_fp16")]; + fp16 _inversed_attn_weights_35_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_35_y_0_to_fp16"), val = fp16(0x1p-3)]; + tensor _inversed_attn_weights_35_cast_fp16 = mul(x = attn_weights_33_cast_fp16, y = _inversed_attn_weights_35_y_0_to_fp16)[name = string("_inversed_attn_weights_35_cast_fp16")]; + tensor attn_weights_37_cast_fp16 = add(x = _inversed_attn_weights_35_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_37_cast_fp16")]; + int32 var_850 = const()[name = string("op_850"), val = int32(2)]; + tensor attn_weights_39_cast_fp16 = softmax(axis = var_850, x = attn_weights_37_cast_fp16)[name = string("attn_weights_39_cast_fp16")]; + bool var_856_transpose_x_1 = const()[name = string("op_856_transpose_x_1"), val = bool(true)]; + bool var_856_transpose_y_1 = const()[name = string("op_856_transpose_y_1"), val = bool(false)]; + tensor var_856_cast_fp16 = matmul(transpose_x = var_856_transpose_x_1, transpose_y = var_856_transpose_y_1, x = attn_weights_39_cast_fp16, y = var_840_cast_fp16_0)[name = string("op_856_cast_fp16")]; + bool attn_weights_41_transpose_x_0 = const()[name = string("attn_weights_41_transpose_x_0"), val = bool(false)]; + bool attn_weights_41_transpose_y_0 = const()[name = string("attn_weights_41_transpose_y_0"), val = bool(false)]; + tensor attn_weights_41_cast_fp16 = matmul(transpose_x = attn_weights_41_transpose_x_0, transpose_y = attn_weights_41_transpose_y_0, x = var_830_cast_fp16_1, y = var_843_cast_fp16_1)[name = string("attn_weights_41_cast_fp16")]; + fp16 _inversed_attn_weights_43_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_43_y_0_to_fp16"), val = fp16(0x1p-3)]; + tensor _inversed_attn_weights_43_cast_fp16 = mul(x = attn_weights_41_cast_fp16, y = _inversed_attn_weights_43_y_0_to_fp16)[name = string("_inversed_attn_weights_43_cast_fp16")]; + tensor attn_weights_45_cast_fp16 = add(x = _inversed_attn_weights_43_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_45_cast_fp16")]; + int32 var_862 = const()[name = string("op_862"), val = int32(2)]; + tensor attn_weights_47_cast_fp16 = softmax(axis = var_862, x = attn_weights_45_cast_fp16)[name = string("attn_weights_47_cast_fp16")]; + bool attn_output_13_transpose_x_1 = const()[name = string("attn_output_13_transpose_x_1"), val = bool(true)]; + bool attn_output_13_transpose_y_1 = const()[name = string("attn_output_13_transpose_y_1"), val = bool(false)]; + tensor attn_output_13_cast_fp16 = matmul(transpose_x = attn_output_13_transpose_x_1, transpose_y = attn_output_13_transpose_y_1, x = attn_weights_47_cast_fp16, y = var_840_cast_fp16_1)[name = string("attn_output_13_cast_fp16")]; + int32 var_870 = const()[name = string("op_870"), val = int32(1)]; + bool attn_output_15_interleave_0 = const()[name = string("attn_output_15_interleave_0"), val = bool(false)]; + tensor attn_output_15_cast_fp16 = concat(axis = var_870, interleave = attn_output_15_interleave_0, values = (var_856_cast_fp16, attn_output_13_cast_fp16))[name = string("attn_output_15_cast_fp16")]; + tensor var_874_perm_0 = const()[name = string("op_874_perm_0"), val = tensor([0, 1, 3, 2])]; + tensor var_879 = const()[name = string("op_879"), val = tensor([1, 1024, 1, 32])]; + tensor var_874_cast_fp16 = transpose(perm = var_874_perm_0, x = attn_output_15_cast_fp16)[name = string("transpose_9")]; + tensor x_41_cast_fp16 = reshape(shape = var_879, x = var_874_cast_fp16)[name = string("x_41_cast_fp16")]; + string hidden_states_15_pad_type_0 = const()[name = string("hidden_states_15_pad_type_0"), val = string("valid")]; + tensor hidden_states_15_strides_0 = const()[name = string("hidden_states_15_strides_0"), val = tensor([1, 1])]; + tensor hidden_states_15_pad_0 = const()[name = string("hidden_states_15_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor hidden_states_15_dilations_0 = const()[name = string("hidden_states_15_dilations_0"), val = tensor([1, 1])]; + int32 hidden_states_15_groups_0 = const()[name = string("hidden_states_15_groups_0"), val = int32(1)]; + tensor var_886_to_fp16 = const()[name = string("op_886_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(70813376)))]; + tensor hidden_states_15_cast_fp16 = conv(dilations = hidden_states_15_dilations_0, groups = hidden_states_15_groups_0, pad = hidden_states_15_pad_0, pad_type = hidden_states_15_pad_type_0, strides = hidden_states_15_strides_0, weight = var_886_to_fp16, x = x_41_cast_fp16)[name = string("hidden_states_15_cast_fp16")]; + tensor x_43_cast_fp16 = add(x = x_35_cast_fp16, y = hidden_states_15_cast_fp16)[name = string("x_43_cast_fp16")]; + int32 var_898 = const()[name = string("op_898"), val = int32(1)]; + fp16 const_33_promoted_to_fp16 = const()[name = string("const_33_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_901_cast_fp16 = mul(x = x_43_cast_fp16, y = const_33_promoted_to_fp16)[name = string("op_901_cast_fp16")]; + bool x_45_interleave_0 = const()[name = string("x_45_interleave_0"), val = bool(false)]; + tensor x_45_cast_fp16 = concat(axis = var_898, interleave = x_45_interleave_0, values = (x_43_cast_fp16, var_901_cast_fp16))[name = string("x_45_cast_fp16")]; + tensor out_31_axes_0 = const()[name = string("out_31_axes_0"), val = tensor([1])]; + fp16 var_911_to_fp16 = const()[name = string("op_911_to_fp16"), val = fp16(0x1.5p-17)]; + tensor out_31_cast_fp16 = layer_norm(axes = out_31_axes_0, epsilon = var_911_to_fp16, x = x_45_cast_fp16)[name = string("out_31_cast_fp16")]; + tensor layer_layers_2_post_attention_layernorm_weight_to_fp16 = const()[name = string("layer_layers_2_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(72910592)))]; + tensor out_33_cast_fp16 = mul(x = out_31_cast_fp16, y = layer_layers_2_post_attention_layernorm_weight_to_fp16)[name = string("out_33_cast_fp16")]; + tensor var_917_split_sizes_0 = const()[name = string("op_917_split_sizes_0"), val = tensor([1024, 1024])]; + int32 var_917_axis_0 = const()[name = string("op_917_axis_0"), val = int32(1)]; + tensor var_917_cast_fp16_0, tensor var_917_cast_fp16_1 = split(axis = var_917_axis_0, split_sizes = var_917_split_sizes_0, x = out_33_cast_fp16)[name = string("op_917_cast_fp16")]; + string input_5_pad_type_0 = const()[name = string("input_5_pad_type_0"), val = string("valid")]; + tensor input_5_strides_0 = const()[name = string("input_5_strides_0"), val = tensor([1, 1])]; + tensor input_5_pad_0 = const()[name = string("input_5_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor input_5_dilations_0 = const()[name = string("input_5_dilations_0"), val = tensor([1, 1])]; + int32 input_5_groups_0 = const()[name = string("input_5_groups_0"), val = int32(1)]; + tensor var_922_to_fp16 = const()[name = string("op_922_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(72914752)))]; + tensor input_5_cast_fp16 = conv(dilations = input_5_dilations_0, groups = input_5_groups_0, pad = input_5_pad_0, pad_type = input_5_pad_type_0, strides = input_5_strides_0, weight = var_922_to_fp16, x = var_917_cast_fp16_0)[name = string("input_5_cast_fp16")]; + tensor var_933_cast_fp16 = silu(x = input_5_cast_fp16)[name = string("op_933_cast_fp16")]; + string var_938_pad_type_0 = const()[name = string("op_938_pad_type_0"), val = string("valid")]; + tensor var_938_strides_0 = const()[name = string("op_938_strides_0"), val = tensor([1, 1])]; + tensor var_938_pad_0 = const()[name = string("op_938_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_938_dilations_0 = const()[name = string("op_938_dilations_0"), val = tensor([1, 1])]; + int32 var_938_groups_0 = const()[name = string("op_938_groups_0"), val = int32(1)]; + tensor var_921_to_fp16 = const()[name = string("op_921_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(81303424)))]; + tensor var_938_cast_fp16 = conv(dilations = var_938_dilations_0, groups = var_938_groups_0, pad = var_938_pad_0, pad_type = var_938_pad_type_0, strides = var_938_strides_0, weight = var_921_to_fp16, x = var_917_cast_fp16_0)[name = string("op_938_cast_fp16")]; + tensor x_51_cast_fp16 = mul(x = var_933_cast_fp16, y = var_938_cast_fp16)[name = string("x_51_cast_fp16")]; + string hidden_states_17_pad_type_0 = const()[name = string("hidden_states_17_pad_type_0"), val = string("valid")]; + tensor hidden_states_17_strides_0 = const()[name = string("hidden_states_17_strides_0"), val = tensor([1, 1])]; + tensor hidden_states_17_pad_0 = const()[name = string("hidden_states_17_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor hidden_states_17_dilations_0 = const()[name = string("hidden_states_17_dilations_0"), val = tensor([1, 1])]; + int32 hidden_states_17_groups_0 = const()[name = string("hidden_states_17_groups_0"), val = int32(1)]; + tensor var_920_to_fp16 = const()[name = string("op_920_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(89692096)))]; + tensor hidden_states_17_cast_fp16 = conv(dilations = hidden_states_17_dilations_0, groups = hidden_states_17_groups_0, pad = hidden_states_17_pad_0, pad_type = hidden_states_17_pad_type_0, strides = hidden_states_17_strides_0, weight = var_920_to_fp16, x = x_51_cast_fp16)[name = string("hidden_states_17_cast_fp16")]; + tensor x_53_cast_fp16 = add(x = x_43_cast_fp16, y = hidden_states_17_cast_fp16)[name = string("x_53_cast_fp16")]; + int32 var_951 = const()[name = string("op_951"), val = int32(1)]; + fp16 const_34_promoted_to_fp16 = const()[name = string("const_34_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_954_cast_fp16 = mul(x = x_53_cast_fp16, y = const_34_promoted_to_fp16)[name = string("op_954_cast_fp16")]; + bool x_55_interleave_0 = const()[name = string("x_55_interleave_0"), val = bool(false)]; + tensor x_55_cast_fp16 = concat(axis = var_951, interleave = x_55_interleave_0, values = (x_53_cast_fp16, var_954_cast_fp16))[name = string("x_55_cast_fp16")]; + tensor out_37_axes_0 = const()[name = string("out_37_axes_0"), val = tensor([1])]; + fp16 var_964_to_fp16 = const()[name = string("op_964_to_fp16"), val = fp16(0x1.5p-17)]; + tensor out_37_cast_fp16 = layer_norm(axes = out_37_axes_0, epsilon = var_964_to_fp16, x = x_55_cast_fp16)[name = string("out_37_cast_fp16")]; + tensor layer_layers_3_input_layernorm_weight_to_fp16 = const()[name = string("layer_layers_3_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(98080768)))]; + tensor out_39_cast_fp16 = mul(x = out_37_cast_fp16, y = layer_layers_3_input_layernorm_weight_to_fp16)[name = string("out_39_cast_fp16")]; + tensor var_970_split_sizes_0 = const()[name = string("op_970_split_sizes_0"), val = tensor([1024, 1024])]; + int32 var_970_axis_0 = const()[name = string("op_970_axis_0"), val = int32(1)]; + tensor var_970_cast_fp16_0, tensor var_970_cast_fp16_1 = split(axis = var_970_axis_0, split_sizes = var_970_split_sizes_0, x = out_39_cast_fp16)[name = string("op_970_cast_fp16")]; + string query_states_13_pad_type_0 = const()[name = string("query_states_13_pad_type_0"), val = string("valid")]; + tensor query_states_13_strides_0 = const()[name = string("query_states_13_strides_0"), val = tensor([1, 1])]; + tensor query_states_13_pad_0 = const()[name = string("query_states_13_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor query_states_13_dilations_0 = const()[name = string("query_states_13_dilations_0"), val = tensor([1, 1])]; + int32 query_states_13_groups_0 = const()[name = string("query_states_13_groups_0"), val = int32(1)]; + tensor var_992_to_fp16 = const()[name = string("op_992_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(98084928)))]; + tensor query_states_13_cast_fp16 = conv(dilations = query_states_13_dilations_0, groups = query_states_13_groups_0, pad = query_states_13_pad_0, pad_type = query_states_13_pad_type_0, strides = query_states_13_strides_0, weight = var_992_to_fp16, x = var_970_cast_fp16_0)[name = string("query_states_13_cast_fp16")]; + string key_states_13_pad_type_0 = const()[name = string("key_states_13_pad_type_0"), val = string("valid")]; + tensor key_states_13_strides_0 = const()[name = string("key_states_13_strides_0"), val = tensor([1, 1])]; + tensor key_states_13_pad_0 = const()[name = string("key_states_13_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor key_states_13_dilations_0 = const()[name = string("key_states_13_dilations_0"), val = tensor([1, 1])]; + int32 key_states_13_groups_0 = const()[name = string("key_states_13_groups_0"), val = int32(1)]; + tensor var_1003_to_fp16 = const()[name = string("op_1003_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(100182144)))]; + tensor key_states_13_cast_fp16 = conv(dilations = key_states_13_dilations_0, groups = key_states_13_groups_0, pad = key_states_13_pad_0, pad_type = key_states_13_pad_type_0, strides = key_states_13_strides_0, weight = var_1003_to_fp16, x = var_970_cast_fp16_0)[name = string("key_states_13_cast_fp16")]; + string value_states_13_pad_type_0 = const()[name = string("value_states_13_pad_type_0"), val = string("valid")]; + tensor value_states_13_strides_0 = const()[name = string("value_states_13_strides_0"), val = tensor([1, 1])]; + tensor value_states_13_pad_0 = const()[name = string("value_states_13_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor value_states_13_dilations_0 = const()[name = string("value_states_13_dilations_0"), val = tensor([1, 1])]; + int32 value_states_13_groups_0 = const()[name = string("value_states_13_groups_0"), val = int32(1)]; + tensor var_1014_to_fp16 = const()[name = string("op_1014_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(100444352)))]; + tensor value_states_13_cast_fp16 = conv(dilations = value_states_13_dilations_0, groups = value_states_13_groups_0, pad = value_states_13_pad_0, pad_type = value_states_13_pad_type_0, strides = value_states_13_strides_0, weight = var_1014_to_fp16, x = var_970_cast_fp16_0)[name = string("value_states_13_cast_fp16")]; + tensor var_1022 = const()[name = string("op_1022"), val = tensor([1, 16, 64, 32])]; + tensor embed_13_cast_fp16 = reshape(shape = var_1022, x = query_states_13_cast_fp16)[name = string("embed_13_cast_fp16")]; + tensor var_1026 = const()[name = string("op_1026"), val = tensor([1, 2, 64, 32])]; + tensor var_1027_cast_fp16 = reshape(shape = var_1026, x = key_states_13_cast_fp16)[name = string("op_1027_cast_fp16")]; + tensor embed_15_perm_0 = const()[name = string("embed_15_perm_0"), val = tensor([0, 1, 3, 2])]; + tensor var_1033 = const()[name = string("op_1033"), val = tensor([1, 2, 64, 32])]; + tensor var_1034_cast_fp16 = reshape(shape = var_1033, x = value_states_13_cast_fp16)[name = string("op_1034_cast_fp16")]; + tensor value_states_15_perm_0 = const()[name = string("value_states_15_perm_0"), val = tensor([0, 1, 3, 2])]; + tensor var_1038_cast_fp16 = mul(x = embed_13_cast_fp16, y = cos_1_cast_fp16)[name = string("op_1038_cast_fp16")]; + tensor var_1039_split_sizes_0 = const()[name = string("op_1039_split_sizes_0"), val = tensor([32, 32])]; + int32 var_1039_axis_0 = const()[name = string("op_1039_axis_0"), val = int32(-2)]; + tensor var_1039_cast_fp16_0, tensor var_1039_cast_fp16_1 = split(axis = var_1039_axis_0, split_sizes = var_1039_split_sizes_0, x = embed_13_cast_fp16)[name = string("op_1039_cast_fp16")]; + fp16 const_37_promoted_to_fp16 = const()[name = string("const_37_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_1041_cast_fp16 = mul(x = var_1039_cast_fp16_1, y = const_37_promoted_to_fp16)[name = string("op_1041_cast_fp16")]; + int32 var_1043 = const()[name = string("op_1043"), val = int32(-2)]; + bool var_1044_interleave_0 = const()[name = string("op_1044_interleave_0"), val = bool(false)]; + tensor var_1044_cast_fp16 = concat(axis = var_1043, interleave = var_1044_interleave_0, values = (var_1041_cast_fp16, var_1039_cast_fp16_0))[name = string("op_1044_cast_fp16")]; + tensor var_1045_cast_fp16 = mul(x = var_1044_cast_fp16, y = sin_1_cast_fp16)[name = string("op_1045_cast_fp16")]; + tensor query_states_15_cast_fp16 = add(x = var_1038_cast_fp16, y = var_1045_cast_fp16)[name = string("query_states_15_cast_fp16")]; + tensor embed_15_cast_fp16 = transpose(perm = embed_15_perm_0, x = var_1027_cast_fp16)[name = string("transpose_8")]; + tensor var_1048_cast_fp16 = mul(x = embed_15_cast_fp16, y = cos_cast_fp16)[name = string("op_1048_cast_fp16")]; + tensor var_1049_split_sizes_0 = const()[name = string("op_1049_split_sizes_0"), val = tensor([32, 32])]; + int32 var_1049_axis_0 = const()[name = string("op_1049_axis_0"), val = int32(-1)]; + tensor var_1049_cast_fp16_0, tensor var_1049_cast_fp16_1 = split(axis = var_1049_axis_0, split_sizes = var_1049_split_sizes_0, x = embed_15_cast_fp16)[name = string("op_1049_cast_fp16")]; + fp16 const_38_promoted_to_fp16 = const()[name = string("const_38_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_1051_cast_fp16 = mul(x = var_1049_cast_fp16_1, y = const_38_promoted_to_fp16)[name = string("op_1051_cast_fp16")]; + int32 var_1053 = const()[name = string("op_1053"), val = int32(-1)]; + bool var_1054_interleave_0 = const()[name = string("op_1054_interleave_0"), val = bool(false)]; + tensor var_1054_cast_fp16 = concat(axis = var_1053, interleave = var_1054_interleave_0, values = (var_1051_cast_fp16, var_1049_cast_fp16_0))[name = string("op_1054_cast_fp16")]; + tensor var_1055_cast_fp16 = mul(x = var_1054_cast_fp16, y = sin_cast_fp16)[name = string("op_1055_cast_fp16")]; + tensor key_states_15_cast_fp16 = add(x = var_1048_cast_fp16, y = var_1055_cast_fp16)[name = string("key_states_15_cast_fp16")]; + tensor expand_dims_31 = const()[name = string("expand_dims_31"), val = tensor([3])]; + tensor expand_dims_32 = const()[name = string("expand_dims_32"), val = tensor([0])]; + tensor expand_dims_34 = const()[name = string("expand_dims_34"), val = tensor([4])]; + tensor concat_27_values3_0 = const()[name = string("concat_27_values3_0"), val = tensor([0])]; + int32 concat_27_axis_0 = const()[name = string("concat_27_axis_0"), val = int32(0)]; + bool concat_27_interleave_0 = const()[name = string("concat_27_interleave_0"), val = bool(false)]; + tensor concat_27 = concat(axis = concat_27_axis_0, interleave = concat_27_interleave_0, values = (expand_dims_31, expand_dims_32, position_id, concat_27_values3_0))[name = string("concat_27")]; + tensor concat_28_values1_0 = const()[name = string("concat_28_values1_0"), val = tensor([0])]; + tensor concat_28_values3_0 = const()[name = string("concat_28_values3_0"), val = tensor([0])]; + int32 concat_28_axis_0 = const()[name = string("concat_28_axis_0"), val = int32(0)]; + bool concat_28_interleave_0 = const()[name = string("concat_28_interleave_0"), val = bool(false)]; + tensor concat_28 = concat(axis = concat_28_axis_0, interleave = concat_28_interleave_0, values = (expand_dims_34, concat_28_values1_0, var_246, concat_28_values3_0))[name = string("concat_28")]; + tensor key_cache_internal_tensor_assign_4_stride_0 = const()[name = string("key_cache_internal_tensor_assign_4_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor key_cache_internal_tensor_assign_4_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_4_begin_mask_0"), val = tensor([false, false, false, true])]; + tensor key_cache_internal_tensor_assign_4_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_4_end_mask_0"), val = tensor([false, true, false, true])]; + tensor key_cache_internal_tensor_assign_4_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_4_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor key_cache_internal_tensor_assign_4_cast_fp16 = slice_update(begin = concat_27, begin_mask = key_cache_internal_tensor_assign_4_begin_mask_0, end = concat_28, end_mask = key_cache_internal_tensor_assign_4_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_4_squeeze_mask_0, stride = key_cache_internal_tensor_assign_4_stride_0, update = key_states_15_cast_fp16, x = coreml_update_state_16)[name = string("key_cache_internal_tensor_assign_4_cast_fp16")]; + write_state(data = key_cache_internal_tensor_assign_4_cast_fp16, input = key_cache)[name = string("coreml_update_state_54_write_state")]; + tensor coreml_update_state_18 = read_state(input = key_cache)[name = string("coreml_update_state_54")]; + tensor value_cache_internal_tensor_assign_4_stride_0 = const()[name = string("value_cache_internal_tensor_assign_4_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor value_cache_internal_tensor_assign_4_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_4_begin_mask_0"), val = tensor([false, false, false, true])]; + tensor value_cache_internal_tensor_assign_4_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_4_end_mask_0"), val = tensor([false, true, false, true])]; + tensor value_cache_internal_tensor_assign_4_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_4_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor value_states_15_cast_fp16 = transpose(perm = value_states_15_perm_0, x = var_1034_cast_fp16)[name = string("transpose_7")]; + tensor value_cache_internal_tensor_assign_4_cast_fp16 = slice_update(begin = concat_27, begin_mask = value_cache_internal_tensor_assign_4_begin_mask_0, end = concat_28, end_mask = value_cache_internal_tensor_assign_4_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_4_squeeze_mask_0, stride = value_cache_internal_tensor_assign_4_stride_0, update = value_states_15_cast_fp16, x = coreml_update_state_17)[name = string("value_cache_internal_tensor_assign_4_cast_fp16")]; + write_state(data = value_cache_internal_tensor_assign_4_cast_fp16, input = value_cache)[name = string("coreml_update_state_55_write_state")]; + tensor coreml_update_state_19 = read_state(input = value_cache)[name = string("coreml_update_state_55")]; + tensor var_1098_begin_0 = const()[name = string("op_1098_begin_0"), val = tensor([3, 0, 0, 0])]; + tensor var_1098_end_0 = const()[name = string("op_1098_end_0"), val = tensor([4, 2, 2048, 64])]; + tensor var_1098_end_mask_0 = const()[name = string("op_1098_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_1098_cast_fp16 = slice_by_index(begin = var_1098_begin_0, end = var_1098_end_0, end_mask = var_1098_end_mask_0, x = coreml_update_state_18)[name = string("op_1098_cast_fp16")]; + tensor tile_6 = const()[name = string("tile_6"), val = tensor([1, 1])]; + int32 var_1101_axis_0 = const()[name = string("op_1101_axis_0"), val = int32(1)]; + tensor var_1101_cast_fp16_0, tensor var_1101_cast_fp16_1 = split(axis = var_1101_axis_0, split_sizes = tile_6, x = var_1098_cast_fp16)[name = string("op_1101_cast_fp16")]; + tensor var_1108_begin_0 = const()[name = string("op_1108_begin_0"), val = tensor([3, 0, 0, 0])]; + tensor var_1108_end_0 = const()[name = string("op_1108_end_0"), val = tensor([4, 2, 2048, 64])]; + tensor var_1108_end_mask_0 = const()[name = string("op_1108_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_1108_cast_fp16 = slice_by_index(begin = var_1108_begin_0, end = var_1108_end_0, end_mask = var_1108_end_mask_0, x = coreml_update_state_19)[name = string("op_1108_cast_fp16")]; + tensor tile_7 = const()[name = string("tile_7"), val = tensor([1, 1])]; + int32 var_1111_axis_0 = const()[name = string("op_1111_axis_0"), val = int32(1)]; + tensor var_1111_cast_fp16_0, tensor var_1111_cast_fp16_1 = split(axis = var_1111_axis_0, split_sizes = tile_7, x = var_1108_cast_fp16)[name = string("op_1111_cast_fp16")]; + tensor var_1114_split_sizes_0 = const()[name = string("op_1114_split_sizes_0"), val = tensor([8, 8])]; + int32 var_1114_axis_0 = const()[name = string("op_1114_axis_0"), val = int32(1)]; + tensor var_1114_cast_fp16_0, tensor var_1114_cast_fp16_1 = split(axis = var_1114_axis_0, split_sizes = var_1114_split_sizes_0, x = query_states_15_cast_fp16)[name = string("op_1114_cast_fp16")]; + bool attn_weights_49_transpose_x_0 = const()[name = string("attn_weights_49_transpose_x_0"), val = bool(false)]; + bool attn_weights_49_transpose_y_0 = const()[name = string("attn_weights_49_transpose_y_0"), val = bool(false)]; + tensor attn_weights_49_cast_fp16 = matmul(transpose_x = attn_weights_49_transpose_x_0, transpose_y = attn_weights_49_transpose_y_0, x = var_1101_cast_fp16_0, y = var_1114_cast_fp16_0)[name = string("attn_weights_49_cast_fp16")]; + fp16 _inversed_attn_weights_51_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_51_y_0_to_fp16"), val = fp16(0x1p-3)]; + tensor _inversed_attn_weights_51_cast_fp16 = mul(x = attn_weights_49_cast_fp16, y = _inversed_attn_weights_51_y_0_to_fp16)[name = string("_inversed_attn_weights_51_cast_fp16")]; + tensor attn_weights_53_cast_fp16 = add(x = _inversed_attn_weights_51_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_53_cast_fp16")]; + int32 var_1121 = const()[name = string("op_1121"), val = int32(2)]; + tensor attn_weights_55_cast_fp16 = softmax(axis = var_1121, x = attn_weights_53_cast_fp16)[name = string("attn_weights_55_cast_fp16")]; + bool var_1127_transpose_x_1 = const()[name = string("op_1127_transpose_x_1"), val = bool(true)]; + bool var_1127_transpose_y_1 = const()[name = string("op_1127_transpose_y_1"), val = bool(false)]; + tensor var_1127_cast_fp16 = matmul(transpose_x = var_1127_transpose_x_1, transpose_y = var_1127_transpose_y_1, x = attn_weights_55_cast_fp16, y = var_1111_cast_fp16_0)[name = string("op_1127_cast_fp16")]; + bool attn_weights_57_transpose_x_0 = const()[name = string("attn_weights_57_transpose_x_0"), val = bool(false)]; + bool attn_weights_57_transpose_y_0 = const()[name = string("attn_weights_57_transpose_y_0"), val = bool(false)]; + tensor attn_weights_57_cast_fp16 = matmul(transpose_x = attn_weights_57_transpose_x_0, transpose_y = attn_weights_57_transpose_y_0, x = var_1101_cast_fp16_1, y = var_1114_cast_fp16_1)[name = string("attn_weights_57_cast_fp16")]; + fp16 _inversed_attn_weights_59_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_59_y_0_to_fp16"), val = fp16(0x1p-3)]; + tensor _inversed_attn_weights_59_cast_fp16 = mul(x = attn_weights_57_cast_fp16, y = _inversed_attn_weights_59_y_0_to_fp16)[name = string("_inversed_attn_weights_59_cast_fp16")]; + tensor attn_weights_61_cast_fp16 = add(x = _inversed_attn_weights_59_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_61_cast_fp16")]; + int32 var_1133 = const()[name = string("op_1133"), val = int32(2)]; + tensor attn_weights_63_cast_fp16 = softmax(axis = var_1133, x = attn_weights_61_cast_fp16)[name = string("attn_weights_63_cast_fp16")]; + bool attn_output_19_transpose_x_1 = const()[name = string("attn_output_19_transpose_x_1"), val = bool(true)]; + bool attn_output_19_transpose_y_1 = const()[name = string("attn_output_19_transpose_y_1"), val = bool(false)]; + tensor attn_output_19_cast_fp16 = matmul(transpose_x = attn_output_19_transpose_x_1, transpose_y = attn_output_19_transpose_y_1, x = attn_weights_63_cast_fp16, y = var_1111_cast_fp16_1)[name = string("attn_output_19_cast_fp16")]; + int32 var_1141 = const()[name = string("op_1141"), val = int32(1)]; + bool attn_output_21_interleave_0 = const()[name = string("attn_output_21_interleave_0"), val = bool(false)]; + tensor attn_output_21_cast_fp16 = concat(axis = var_1141, interleave = attn_output_21_interleave_0, values = (var_1127_cast_fp16, attn_output_19_cast_fp16))[name = string("attn_output_21_cast_fp16")]; + tensor var_1145_perm_0 = const()[name = string("op_1145_perm_0"), val = tensor([0, 1, 3, 2])]; + tensor var_1150 = const()[name = string("op_1150"), val = tensor([1, 1024, 1, 32])]; + tensor var_1145_cast_fp16 = transpose(perm = var_1145_perm_0, x = attn_output_21_cast_fp16)[name = string("transpose_6")]; + tensor x_59_cast_fp16 = reshape(shape = var_1150, x = var_1145_cast_fp16)[name = string("x_59_cast_fp16")]; + string hidden_states_21_pad_type_0 = const()[name = string("hidden_states_21_pad_type_0"), val = string("valid")]; + tensor hidden_states_21_strides_0 = const()[name = string("hidden_states_21_strides_0"), val = tensor([1, 1])]; + tensor hidden_states_21_pad_0 = const()[name = string("hidden_states_21_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor hidden_states_21_dilations_0 = const()[name = string("hidden_states_21_dilations_0"), val = tensor([1, 1])]; + int32 hidden_states_21_groups_0 = const()[name = string("hidden_states_21_groups_0"), val = int32(1)]; + tensor var_1157_to_fp16 = const()[name = string("op_1157_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(100706560)))]; + tensor hidden_states_21_cast_fp16 = conv(dilations = hidden_states_21_dilations_0, groups = hidden_states_21_groups_0, pad = hidden_states_21_pad_0, pad_type = hidden_states_21_pad_type_0, strides = hidden_states_21_strides_0, weight = var_1157_to_fp16, x = x_59_cast_fp16)[name = string("hidden_states_21_cast_fp16")]; + tensor x_61_cast_fp16 = add(x = x_53_cast_fp16, y = hidden_states_21_cast_fp16)[name = string("x_61_cast_fp16")]; + int32 var_1169 = const()[name = string("op_1169"), val = int32(1)]; + fp16 const_43_promoted_to_fp16 = const()[name = string("const_43_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_1172_cast_fp16 = mul(x = x_61_cast_fp16, y = const_43_promoted_to_fp16)[name = string("op_1172_cast_fp16")]; + bool x_63_interleave_0 = const()[name = string("x_63_interleave_0"), val = bool(false)]; + tensor x_63_cast_fp16 = concat(axis = var_1169, interleave = x_63_interleave_0, values = (x_61_cast_fp16, var_1172_cast_fp16))[name = string("x_63_cast_fp16")]; + tensor out_43_axes_0 = const()[name = string("out_43_axes_0"), val = tensor([1])]; + fp16 var_1182_to_fp16 = const()[name = string("op_1182_to_fp16"), val = fp16(0x1.5p-17)]; + tensor out_43_cast_fp16 = layer_norm(axes = out_43_axes_0, epsilon = var_1182_to_fp16, x = x_63_cast_fp16)[name = string("out_43_cast_fp16")]; + tensor layer_layers_3_post_attention_layernorm_weight_to_fp16 = const()[name = string("layer_layers_3_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(102803776)))]; + tensor out_45_cast_fp16 = mul(x = out_43_cast_fp16, y = layer_layers_3_post_attention_layernorm_weight_to_fp16)[name = string("out_45_cast_fp16")]; + tensor var_1188_split_sizes_0 = const()[name = string("op_1188_split_sizes_0"), val = tensor([1024, 1024])]; + int32 var_1188_axis_0 = const()[name = string("op_1188_axis_0"), val = int32(1)]; + tensor var_1188_cast_fp16_0, tensor var_1188_cast_fp16_1 = split(axis = var_1188_axis_0, split_sizes = var_1188_split_sizes_0, x = out_45_cast_fp16)[name = string("op_1188_cast_fp16")]; + string input_7_pad_type_0 = const()[name = string("input_7_pad_type_0"), val = string("valid")]; + tensor input_7_strides_0 = const()[name = string("input_7_strides_0"), val = tensor([1, 1])]; + tensor input_7_pad_0 = const()[name = string("input_7_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor input_7_dilations_0 = const()[name = string("input_7_dilations_0"), val = tensor([1, 1])]; + int32 input_7_groups_0 = const()[name = string("input_7_groups_0"), val = int32(1)]; + tensor var_1193_to_fp16 = const()[name = string("op_1193_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(102807936)))]; + tensor input_7_cast_fp16 = conv(dilations = input_7_dilations_0, groups = input_7_groups_0, pad = input_7_pad_0, pad_type = input_7_pad_type_0, strides = input_7_strides_0, weight = var_1193_to_fp16, x = var_1188_cast_fp16_0)[name = string("input_7_cast_fp16")]; + tensor var_1204_cast_fp16 = silu(x = input_7_cast_fp16)[name = string("op_1204_cast_fp16")]; + string var_1209_pad_type_0 = const()[name = string("op_1209_pad_type_0"), val = string("valid")]; + tensor var_1209_strides_0 = const()[name = string("op_1209_strides_0"), val = tensor([1, 1])]; + tensor var_1209_pad_0 = const()[name = string("op_1209_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1209_dilations_0 = const()[name = string("op_1209_dilations_0"), val = tensor([1, 1])]; + int32 var_1209_groups_0 = const()[name = string("op_1209_groups_0"), val = int32(1)]; + tensor var_1192_to_fp16 = const()[name = string("op_1192_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(111196608)))]; + tensor var_1209_cast_fp16 = conv(dilations = var_1209_dilations_0, groups = var_1209_groups_0, pad = var_1209_pad_0, pad_type = var_1209_pad_type_0, strides = var_1209_strides_0, weight = var_1192_to_fp16, x = var_1188_cast_fp16_0)[name = string("op_1209_cast_fp16")]; + tensor x_69_cast_fp16 = mul(x = var_1204_cast_fp16, y = var_1209_cast_fp16)[name = string("x_69_cast_fp16")]; + string hidden_states_23_pad_type_0 = const()[name = string("hidden_states_23_pad_type_0"), val = string("valid")]; + tensor hidden_states_23_strides_0 = const()[name = string("hidden_states_23_strides_0"), val = tensor([1, 1])]; + tensor hidden_states_23_pad_0 = const()[name = string("hidden_states_23_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor hidden_states_23_dilations_0 = const()[name = string("hidden_states_23_dilations_0"), val = tensor([1, 1])]; + int32 hidden_states_23_groups_0 = const()[name = string("hidden_states_23_groups_0"), val = int32(1)]; + tensor var_1191_to_fp16 = const()[name = string("op_1191_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(119585280)))]; + tensor hidden_states_23_cast_fp16 = conv(dilations = hidden_states_23_dilations_0, groups = hidden_states_23_groups_0, pad = hidden_states_23_pad_0, pad_type = hidden_states_23_pad_type_0, strides = hidden_states_23_strides_0, weight = var_1191_to_fp16, x = x_69_cast_fp16)[name = string("hidden_states_23_cast_fp16")]; + tensor x_71_cast_fp16 = add(x = x_61_cast_fp16, y = hidden_states_23_cast_fp16)[name = string("x_71_cast_fp16")]; + int32 var_1222 = const()[name = string("op_1222"), val = int32(1)]; + fp16 const_44_promoted_to_fp16 = const()[name = string("const_44_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_1225_cast_fp16 = mul(x = x_71_cast_fp16, y = const_44_promoted_to_fp16)[name = string("op_1225_cast_fp16")]; + bool x_73_interleave_0 = const()[name = string("x_73_interleave_0"), val = bool(false)]; + tensor x_73_cast_fp16 = concat(axis = var_1222, interleave = x_73_interleave_0, values = (x_71_cast_fp16, var_1225_cast_fp16))[name = string("x_73_cast_fp16")]; + tensor out_49_axes_0 = const()[name = string("out_49_axes_0"), val = tensor([1])]; + fp16 var_1235_to_fp16 = const()[name = string("op_1235_to_fp16"), val = fp16(0x1.5p-17)]; + tensor out_49_cast_fp16 = layer_norm(axes = out_49_axes_0, epsilon = var_1235_to_fp16, x = x_73_cast_fp16)[name = string("out_49_cast_fp16")]; + tensor layer_layers_4_input_layernorm_weight_to_fp16 = const()[name = string("layer_layers_4_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(127973952)))]; + tensor out_51_cast_fp16 = mul(x = out_49_cast_fp16, y = layer_layers_4_input_layernorm_weight_to_fp16)[name = string("out_51_cast_fp16")]; + tensor var_1241_split_sizes_0 = const()[name = string("op_1241_split_sizes_0"), val = tensor([1024, 1024])]; + int32 var_1241_axis_0 = const()[name = string("op_1241_axis_0"), val = int32(1)]; + tensor var_1241_cast_fp16_0, tensor var_1241_cast_fp16_1 = split(axis = var_1241_axis_0, split_sizes = var_1241_split_sizes_0, x = out_51_cast_fp16)[name = string("op_1241_cast_fp16")]; + string query_states_17_pad_type_0 = const()[name = string("query_states_17_pad_type_0"), val = string("valid")]; + tensor query_states_17_strides_0 = const()[name = string("query_states_17_strides_0"), val = tensor([1, 1])]; + tensor query_states_17_pad_0 = const()[name = string("query_states_17_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor query_states_17_dilations_0 = const()[name = string("query_states_17_dilations_0"), val = tensor([1, 1])]; + int32 query_states_17_groups_0 = const()[name = string("query_states_17_groups_0"), val = int32(1)]; + tensor var_1263_to_fp16 = const()[name = string("op_1263_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(127978112)))]; + tensor query_states_17_cast_fp16 = conv(dilations = query_states_17_dilations_0, groups = query_states_17_groups_0, pad = query_states_17_pad_0, pad_type = query_states_17_pad_type_0, strides = query_states_17_strides_0, weight = var_1263_to_fp16, x = var_1241_cast_fp16_0)[name = string("query_states_17_cast_fp16")]; + string key_states_17_pad_type_0 = const()[name = string("key_states_17_pad_type_0"), val = string("valid")]; + tensor key_states_17_strides_0 = const()[name = string("key_states_17_strides_0"), val = tensor([1, 1])]; + tensor key_states_17_pad_0 = const()[name = string("key_states_17_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor key_states_17_dilations_0 = const()[name = string("key_states_17_dilations_0"), val = tensor([1, 1])]; + int32 key_states_17_groups_0 = const()[name = string("key_states_17_groups_0"), val = int32(1)]; + tensor var_1274_to_fp16 = const()[name = string("op_1274_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(130075328)))]; + tensor key_states_17_cast_fp16 = conv(dilations = key_states_17_dilations_0, groups = key_states_17_groups_0, pad = key_states_17_pad_0, pad_type = key_states_17_pad_type_0, strides = key_states_17_strides_0, weight = var_1274_to_fp16, x = var_1241_cast_fp16_0)[name = string("key_states_17_cast_fp16")]; + string value_states_17_pad_type_0 = const()[name = string("value_states_17_pad_type_0"), val = string("valid")]; + tensor value_states_17_strides_0 = const()[name = string("value_states_17_strides_0"), val = tensor([1, 1])]; + tensor value_states_17_pad_0 = const()[name = string("value_states_17_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor value_states_17_dilations_0 = const()[name = string("value_states_17_dilations_0"), val = tensor([1, 1])]; + int32 value_states_17_groups_0 = const()[name = string("value_states_17_groups_0"), val = int32(1)]; + tensor var_1285_to_fp16 = const()[name = string("op_1285_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(130337536)))]; + tensor value_states_17_cast_fp16 = conv(dilations = value_states_17_dilations_0, groups = value_states_17_groups_0, pad = value_states_17_pad_0, pad_type = value_states_17_pad_type_0, strides = value_states_17_strides_0, weight = var_1285_to_fp16, x = var_1241_cast_fp16_0)[name = string("value_states_17_cast_fp16")]; + tensor var_1293 = const()[name = string("op_1293"), val = tensor([1, 16, 64, 32])]; + tensor embed_17_cast_fp16 = reshape(shape = var_1293, x = query_states_17_cast_fp16)[name = string("embed_17_cast_fp16")]; + tensor var_1297 = const()[name = string("op_1297"), val = tensor([1, 2, 64, 32])]; + tensor var_1298_cast_fp16 = reshape(shape = var_1297, x = key_states_17_cast_fp16)[name = string("op_1298_cast_fp16")]; + tensor embed_19_perm_0 = const()[name = string("embed_19_perm_0"), val = tensor([0, 1, 3, 2])]; + tensor var_1304 = const()[name = string("op_1304"), val = tensor([1, 2, 64, 32])]; + tensor var_1305_cast_fp16 = reshape(shape = var_1304, x = value_states_17_cast_fp16)[name = string("op_1305_cast_fp16")]; + tensor value_states_19_perm_0 = const()[name = string("value_states_19_perm_0"), val = tensor([0, 1, 3, 2])]; + tensor var_1309_cast_fp16 = mul(x = embed_17_cast_fp16, y = cos_1_cast_fp16)[name = string("op_1309_cast_fp16")]; + tensor var_1310_split_sizes_0 = const()[name = string("op_1310_split_sizes_0"), val = tensor([32, 32])]; + int32 var_1310_axis_0 = const()[name = string("op_1310_axis_0"), val = int32(-2)]; + tensor var_1310_cast_fp16_0, tensor var_1310_cast_fp16_1 = split(axis = var_1310_axis_0, split_sizes = var_1310_split_sizes_0, x = embed_17_cast_fp16)[name = string("op_1310_cast_fp16")]; + fp16 const_47_promoted_to_fp16 = const()[name = string("const_47_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_1312_cast_fp16 = mul(x = var_1310_cast_fp16_1, y = const_47_promoted_to_fp16)[name = string("op_1312_cast_fp16")]; + int32 var_1314 = const()[name = string("op_1314"), val = int32(-2)]; + bool var_1315_interleave_0 = const()[name = string("op_1315_interleave_0"), val = bool(false)]; + tensor var_1315_cast_fp16 = concat(axis = var_1314, interleave = var_1315_interleave_0, values = (var_1312_cast_fp16, var_1310_cast_fp16_0))[name = string("op_1315_cast_fp16")]; + tensor var_1316_cast_fp16 = mul(x = var_1315_cast_fp16, y = sin_1_cast_fp16)[name = string("op_1316_cast_fp16")]; + tensor query_states_19_cast_fp16 = add(x = var_1309_cast_fp16, y = var_1316_cast_fp16)[name = string("query_states_19_cast_fp16")]; + tensor embed_19_cast_fp16 = transpose(perm = embed_19_perm_0, x = var_1298_cast_fp16)[name = string("transpose_5")]; + tensor var_1319_cast_fp16 = mul(x = embed_19_cast_fp16, y = cos_cast_fp16)[name = string("op_1319_cast_fp16")]; + tensor var_1320_split_sizes_0 = const()[name = string("op_1320_split_sizes_0"), val = tensor([32, 32])]; + int32 var_1320_axis_0 = const()[name = string("op_1320_axis_0"), val = int32(-1)]; + tensor var_1320_cast_fp16_0, tensor var_1320_cast_fp16_1 = split(axis = var_1320_axis_0, split_sizes = var_1320_split_sizes_0, x = embed_19_cast_fp16)[name = string("op_1320_cast_fp16")]; + fp16 const_48_promoted_to_fp16 = const()[name = string("const_48_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_1322_cast_fp16 = mul(x = var_1320_cast_fp16_1, y = const_48_promoted_to_fp16)[name = string("op_1322_cast_fp16")]; + int32 var_1324 = const()[name = string("op_1324"), val = int32(-1)]; + bool var_1325_interleave_0 = const()[name = string("op_1325_interleave_0"), val = bool(false)]; + tensor var_1325_cast_fp16 = concat(axis = var_1324, interleave = var_1325_interleave_0, values = (var_1322_cast_fp16, var_1320_cast_fp16_0))[name = string("op_1325_cast_fp16")]; + tensor var_1326_cast_fp16 = mul(x = var_1325_cast_fp16, y = sin_cast_fp16)[name = string("op_1326_cast_fp16")]; + tensor key_states_19_cast_fp16 = add(x = var_1319_cast_fp16, y = var_1326_cast_fp16)[name = string("key_states_19_cast_fp16")]; + tensor expand_dims_41 = const()[name = string("expand_dims_41"), val = tensor([4])]; + tensor expand_dims_42 = const()[name = string("expand_dims_42"), val = tensor([0])]; + tensor expand_dims_44 = const()[name = string("expand_dims_44"), val = tensor([5])]; + tensor concat_35_values3_0 = const()[name = string("concat_35_values3_0"), val = tensor([0])]; + int32 concat_35_axis_0 = const()[name = string("concat_35_axis_0"), val = int32(0)]; + bool concat_35_interleave_0 = const()[name = string("concat_35_interleave_0"), val = bool(false)]; + tensor concat_35 = concat(axis = concat_35_axis_0, interleave = concat_35_interleave_0, values = (expand_dims_41, expand_dims_42, position_id, concat_35_values3_0))[name = string("concat_35")]; + tensor concat_36_values1_0 = const()[name = string("concat_36_values1_0"), val = tensor([0])]; + tensor concat_36_values3_0 = const()[name = string("concat_36_values3_0"), val = tensor([0])]; + int32 concat_36_axis_0 = const()[name = string("concat_36_axis_0"), val = int32(0)]; + bool concat_36_interleave_0 = const()[name = string("concat_36_interleave_0"), val = bool(false)]; + tensor concat_36 = concat(axis = concat_36_axis_0, interleave = concat_36_interleave_0, values = (expand_dims_44, concat_36_values1_0, var_246, concat_36_values3_0))[name = string("concat_36")]; + tensor key_cache_internal_tensor_assign_5_stride_0 = const()[name = string("key_cache_internal_tensor_assign_5_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor key_cache_internal_tensor_assign_5_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_5_begin_mask_0"), val = tensor([false, false, false, true])]; + tensor key_cache_internal_tensor_assign_5_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_5_end_mask_0"), val = tensor([false, true, false, true])]; + tensor key_cache_internal_tensor_assign_5_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_5_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor key_cache_internal_tensor_assign_5_cast_fp16 = slice_update(begin = concat_35, begin_mask = key_cache_internal_tensor_assign_5_begin_mask_0, end = concat_36, end_mask = key_cache_internal_tensor_assign_5_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_5_squeeze_mask_0, stride = key_cache_internal_tensor_assign_5_stride_0, update = key_states_19_cast_fp16, x = coreml_update_state_18)[name = string("key_cache_internal_tensor_assign_5_cast_fp16")]; + write_state(data = key_cache_internal_tensor_assign_5_cast_fp16, input = key_cache)[name = string("coreml_update_state_56_write_state")]; + tensor coreml_update_state_20 = read_state(input = key_cache)[name = string("coreml_update_state_56")]; + tensor value_cache_internal_tensor_assign_5_stride_0 = const()[name = string("value_cache_internal_tensor_assign_5_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor value_cache_internal_tensor_assign_5_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_5_begin_mask_0"), val = tensor([false, false, false, true])]; + tensor value_cache_internal_tensor_assign_5_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_5_end_mask_0"), val = tensor([false, true, false, true])]; + tensor value_cache_internal_tensor_assign_5_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_5_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor value_states_19_cast_fp16 = transpose(perm = value_states_19_perm_0, x = var_1305_cast_fp16)[name = string("transpose_4")]; + tensor value_cache_internal_tensor_assign_5_cast_fp16 = slice_update(begin = concat_35, begin_mask = value_cache_internal_tensor_assign_5_begin_mask_0, end = concat_36, end_mask = value_cache_internal_tensor_assign_5_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_5_squeeze_mask_0, stride = value_cache_internal_tensor_assign_5_stride_0, update = value_states_19_cast_fp16, x = coreml_update_state_19)[name = string("value_cache_internal_tensor_assign_5_cast_fp16")]; + write_state(data = value_cache_internal_tensor_assign_5_cast_fp16, input = value_cache)[name = string("coreml_update_state_57_write_state")]; + tensor coreml_update_state_21 = read_state(input = value_cache)[name = string("coreml_update_state_57")]; + tensor var_1369_begin_0 = const()[name = string("op_1369_begin_0"), val = tensor([4, 0, 0, 0])]; + tensor var_1369_end_0 = const()[name = string("op_1369_end_0"), val = tensor([5, 2, 2048, 64])]; + tensor var_1369_end_mask_0 = const()[name = string("op_1369_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_1369_cast_fp16 = slice_by_index(begin = var_1369_begin_0, end = var_1369_end_0, end_mask = var_1369_end_mask_0, x = coreml_update_state_20)[name = string("op_1369_cast_fp16")]; + tensor tile_8 = const()[name = string("tile_8"), val = tensor([1, 1])]; + int32 var_1372_axis_0 = const()[name = string("op_1372_axis_0"), val = int32(1)]; + tensor var_1372_cast_fp16_0, tensor var_1372_cast_fp16_1 = split(axis = var_1372_axis_0, split_sizes = tile_8, x = var_1369_cast_fp16)[name = string("op_1372_cast_fp16")]; + tensor var_1379_begin_0 = const()[name = string("op_1379_begin_0"), val = tensor([4, 0, 0, 0])]; + tensor var_1379_end_0 = const()[name = string("op_1379_end_0"), val = tensor([5, 2, 2048, 64])]; + tensor var_1379_end_mask_0 = const()[name = string("op_1379_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_1379_cast_fp16 = slice_by_index(begin = var_1379_begin_0, end = var_1379_end_0, end_mask = var_1379_end_mask_0, x = coreml_update_state_21)[name = string("op_1379_cast_fp16")]; + tensor tile_9 = const()[name = string("tile_9"), val = tensor([1, 1])]; + int32 var_1382_axis_0 = const()[name = string("op_1382_axis_0"), val = int32(1)]; + tensor var_1382_cast_fp16_0, tensor var_1382_cast_fp16_1 = split(axis = var_1382_axis_0, split_sizes = tile_9, x = var_1379_cast_fp16)[name = string("op_1382_cast_fp16")]; + tensor var_1385_split_sizes_0 = const()[name = string("op_1385_split_sizes_0"), val = tensor([8, 8])]; + int32 var_1385_axis_0 = const()[name = string("op_1385_axis_0"), val = int32(1)]; + tensor var_1385_cast_fp16_0, tensor var_1385_cast_fp16_1 = split(axis = var_1385_axis_0, split_sizes = var_1385_split_sizes_0, x = query_states_19_cast_fp16)[name = string("op_1385_cast_fp16")]; + bool attn_weights_65_transpose_x_0 = const()[name = string("attn_weights_65_transpose_x_0"), val = bool(false)]; + bool attn_weights_65_transpose_y_0 = const()[name = string("attn_weights_65_transpose_y_0"), val = bool(false)]; + tensor attn_weights_65_cast_fp16 = matmul(transpose_x = attn_weights_65_transpose_x_0, transpose_y = attn_weights_65_transpose_y_0, x = var_1372_cast_fp16_0, y = var_1385_cast_fp16_0)[name = string("attn_weights_65_cast_fp16")]; + fp16 _inversed_attn_weights_67_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_67_y_0_to_fp16"), val = fp16(0x1p-3)]; + tensor _inversed_attn_weights_67_cast_fp16 = mul(x = attn_weights_65_cast_fp16, y = _inversed_attn_weights_67_y_0_to_fp16)[name = string("_inversed_attn_weights_67_cast_fp16")]; + tensor attn_weights_69_cast_fp16 = add(x = _inversed_attn_weights_67_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_69_cast_fp16")]; + int32 var_1392 = const()[name = string("op_1392"), val = int32(2)]; + tensor attn_weights_71_cast_fp16 = softmax(axis = var_1392, x = attn_weights_69_cast_fp16)[name = string("attn_weights_71_cast_fp16")]; + bool var_1398_transpose_x_1 = const()[name = string("op_1398_transpose_x_1"), val = bool(true)]; + bool var_1398_transpose_y_1 = const()[name = string("op_1398_transpose_y_1"), val = bool(false)]; + tensor var_1398_cast_fp16 = matmul(transpose_x = var_1398_transpose_x_1, transpose_y = var_1398_transpose_y_1, x = attn_weights_71_cast_fp16, y = var_1382_cast_fp16_0)[name = string("op_1398_cast_fp16")]; + bool attn_weights_73_transpose_x_0 = const()[name = string("attn_weights_73_transpose_x_0"), val = bool(false)]; + bool attn_weights_73_transpose_y_0 = const()[name = string("attn_weights_73_transpose_y_0"), val = bool(false)]; + tensor attn_weights_73_cast_fp16 = matmul(transpose_x = attn_weights_73_transpose_x_0, transpose_y = attn_weights_73_transpose_y_0, x = var_1372_cast_fp16_1, y = var_1385_cast_fp16_1)[name = string("attn_weights_73_cast_fp16")]; + fp16 _inversed_attn_weights_75_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_75_y_0_to_fp16"), val = fp16(0x1p-3)]; + tensor _inversed_attn_weights_75_cast_fp16 = mul(x = attn_weights_73_cast_fp16, y = _inversed_attn_weights_75_y_0_to_fp16)[name = string("_inversed_attn_weights_75_cast_fp16")]; + tensor attn_weights_77_cast_fp16 = add(x = _inversed_attn_weights_75_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_77_cast_fp16")]; + int32 var_1404 = const()[name = string("op_1404"), val = int32(2)]; + tensor attn_weights_79_cast_fp16 = softmax(axis = var_1404, x = attn_weights_77_cast_fp16)[name = string("attn_weights_79_cast_fp16")]; + bool attn_output_25_transpose_x_1 = const()[name = string("attn_output_25_transpose_x_1"), val = bool(true)]; + bool attn_output_25_transpose_y_1 = const()[name = string("attn_output_25_transpose_y_1"), val = bool(false)]; + tensor attn_output_25_cast_fp16 = matmul(transpose_x = attn_output_25_transpose_x_1, transpose_y = attn_output_25_transpose_y_1, x = attn_weights_79_cast_fp16, y = var_1382_cast_fp16_1)[name = string("attn_output_25_cast_fp16")]; + int32 var_1412 = const()[name = string("op_1412"), val = int32(1)]; + bool attn_output_27_interleave_0 = const()[name = string("attn_output_27_interleave_0"), val = bool(false)]; + tensor attn_output_27_cast_fp16 = concat(axis = var_1412, interleave = attn_output_27_interleave_0, values = (var_1398_cast_fp16, attn_output_25_cast_fp16))[name = string("attn_output_27_cast_fp16")]; + tensor var_1416_perm_0 = const()[name = string("op_1416_perm_0"), val = tensor([0, 1, 3, 2])]; + tensor var_1421 = const()[name = string("op_1421"), val = tensor([1, 1024, 1, 32])]; + tensor var_1416_cast_fp16 = transpose(perm = var_1416_perm_0, x = attn_output_27_cast_fp16)[name = string("transpose_3")]; + tensor x_77_cast_fp16 = reshape(shape = var_1421, x = var_1416_cast_fp16)[name = string("x_77_cast_fp16")]; + string hidden_states_27_pad_type_0 = const()[name = string("hidden_states_27_pad_type_0"), val = string("valid")]; + tensor hidden_states_27_strides_0 = const()[name = string("hidden_states_27_strides_0"), val = tensor([1, 1])]; + tensor hidden_states_27_pad_0 = const()[name = string("hidden_states_27_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor hidden_states_27_dilations_0 = const()[name = string("hidden_states_27_dilations_0"), val = tensor([1, 1])]; + int32 hidden_states_27_groups_0 = const()[name = string("hidden_states_27_groups_0"), val = int32(1)]; + tensor var_1428_to_fp16 = const()[name = string("op_1428_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(130599744)))]; + tensor hidden_states_27_cast_fp16 = conv(dilations = hidden_states_27_dilations_0, groups = hidden_states_27_groups_0, pad = hidden_states_27_pad_0, pad_type = hidden_states_27_pad_type_0, strides = hidden_states_27_strides_0, weight = var_1428_to_fp16, x = x_77_cast_fp16)[name = string("hidden_states_27_cast_fp16")]; + tensor x_79_cast_fp16 = add(x = x_71_cast_fp16, y = hidden_states_27_cast_fp16)[name = string("x_79_cast_fp16")]; + int32 var_1440 = const()[name = string("op_1440"), val = int32(1)]; + fp16 const_53_promoted_to_fp16 = const()[name = string("const_53_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_1443_cast_fp16 = mul(x = x_79_cast_fp16, y = const_53_promoted_to_fp16)[name = string("op_1443_cast_fp16")]; + bool x_81_interleave_0 = const()[name = string("x_81_interleave_0"), val = bool(false)]; + tensor x_81_cast_fp16 = concat(axis = var_1440, interleave = x_81_interleave_0, values = (x_79_cast_fp16, var_1443_cast_fp16))[name = string("x_81_cast_fp16")]; + tensor out_55_axes_0 = const()[name = string("out_55_axes_0"), val = tensor([1])]; + fp16 var_1453_to_fp16 = const()[name = string("op_1453_to_fp16"), val = fp16(0x1.5p-17)]; + tensor out_55_cast_fp16 = layer_norm(axes = out_55_axes_0, epsilon = var_1453_to_fp16, x = x_81_cast_fp16)[name = string("out_55_cast_fp16")]; + tensor layer_layers_4_post_attention_layernorm_weight_to_fp16 = const()[name = string("layer_layers_4_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(132696960)))]; + tensor out_57_cast_fp16 = mul(x = out_55_cast_fp16, y = layer_layers_4_post_attention_layernorm_weight_to_fp16)[name = string("out_57_cast_fp16")]; + tensor var_1459_split_sizes_0 = const()[name = string("op_1459_split_sizes_0"), val = tensor([1024, 1024])]; + int32 var_1459_axis_0 = const()[name = string("op_1459_axis_0"), val = int32(1)]; + tensor var_1459_cast_fp16_0, tensor var_1459_cast_fp16_1 = split(axis = var_1459_axis_0, split_sizes = var_1459_split_sizes_0, x = out_57_cast_fp16)[name = string("op_1459_cast_fp16")]; + string input_9_pad_type_0 = const()[name = string("input_9_pad_type_0"), val = string("valid")]; + tensor input_9_strides_0 = const()[name = string("input_9_strides_0"), val = tensor([1, 1])]; + tensor input_9_pad_0 = const()[name = string("input_9_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor input_9_dilations_0 = const()[name = string("input_9_dilations_0"), val = tensor([1, 1])]; + int32 input_9_groups_0 = const()[name = string("input_9_groups_0"), val = int32(1)]; + tensor var_1464_to_fp16 = const()[name = string("op_1464_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(132701120)))]; + tensor input_9_cast_fp16 = conv(dilations = input_9_dilations_0, groups = input_9_groups_0, pad = input_9_pad_0, pad_type = input_9_pad_type_0, strides = input_9_strides_0, weight = var_1464_to_fp16, x = var_1459_cast_fp16_0)[name = string("input_9_cast_fp16")]; + tensor var_1475_cast_fp16 = silu(x = input_9_cast_fp16)[name = string("op_1475_cast_fp16")]; + string var_1480_pad_type_0 = const()[name = string("op_1480_pad_type_0"), val = string("valid")]; + tensor var_1480_strides_0 = const()[name = string("op_1480_strides_0"), val = tensor([1, 1])]; + tensor var_1480_pad_0 = const()[name = string("op_1480_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1480_dilations_0 = const()[name = string("op_1480_dilations_0"), val = tensor([1, 1])]; + int32 var_1480_groups_0 = const()[name = string("op_1480_groups_0"), val = int32(1)]; + tensor var_1463_to_fp16 = const()[name = string("op_1463_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(141089792)))]; + tensor var_1480_cast_fp16 = conv(dilations = var_1480_dilations_0, groups = var_1480_groups_0, pad = var_1480_pad_0, pad_type = var_1480_pad_type_0, strides = var_1480_strides_0, weight = var_1463_to_fp16, x = var_1459_cast_fp16_0)[name = string("op_1480_cast_fp16")]; + tensor x_87_cast_fp16 = mul(x = var_1475_cast_fp16, y = var_1480_cast_fp16)[name = string("x_87_cast_fp16")]; + string hidden_states_29_pad_type_0 = const()[name = string("hidden_states_29_pad_type_0"), val = string("valid")]; + tensor hidden_states_29_strides_0 = const()[name = string("hidden_states_29_strides_0"), val = tensor([1, 1])]; + tensor hidden_states_29_pad_0 = const()[name = string("hidden_states_29_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor hidden_states_29_dilations_0 = const()[name = string("hidden_states_29_dilations_0"), val = tensor([1, 1])]; + int32 hidden_states_29_groups_0 = const()[name = string("hidden_states_29_groups_0"), val = int32(1)]; + tensor var_1462_to_fp16 = const()[name = string("op_1462_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(149478464)))]; + tensor hidden_states_29_cast_fp16 = conv(dilations = hidden_states_29_dilations_0, groups = hidden_states_29_groups_0, pad = hidden_states_29_pad_0, pad_type = hidden_states_29_pad_type_0, strides = hidden_states_29_strides_0, weight = var_1462_to_fp16, x = x_87_cast_fp16)[name = string("hidden_states_29_cast_fp16")]; + tensor x_89_cast_fp16 = add(x = x_79_cast_fp16, y = hidden_states_29_cast_fp16)[name = string("x_89_cast_fp16")]; + int32 var_1493 = const()[name = string("op_1493"), val = int32(1)]; + fp16 const_54_promoted_to_fp16 = const()[name = string("const_54_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_1496_cast_fp16 = mul(x = x_89_cast_fp16, y = const_54_promoted_to_fp16)[name = string("op_1496_cast_fp16")]; + bool x_91_interleave_0 = const()[name = string("x_91_interleave_0"), val = bool(false)]; + tensor x_91_cast_fp16 = concat(axis = var_1493, interleave = x_91_interleave_0, values = (x_89_cast_fp16, var_1496_cast_fp16))[name = string("x_91_cast_fp16")]; + tensor out_61_axes_0 = const()[name = string("out_61_axes_0"), val = tensor([1])]; + fp16 var_1506_to_fp16 = const()[name = string("op_1506_to_fp16"), val = fp16(0x1.5p-17)]; + tensor out_61_cast_fp16 = layer_norm(axes = out_61_axes_0, epsilon = var_1506_to_fp16, x = x_91_cast_fp16)[name = string("out_61_cast_fp16")]; + tensor layer_layers_5_input_layernorm_weight_to_fp16 = const()[name = string("layer_layers_5_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(157867136)))]; + tensor out_63_cast_fp16 = mul(x = out_61_cast_fp16, y = layer_layers_5_input_layernorm_weight_to_fp16)[name = string("out_63_cast_fp16")]; + tensor var_1512_split_sizes_0 = const()[name = string("op_1512_split_sizes_0"), val = tensor([1024, 1024])]; + int32 var_1512_axis_0 = const()[name = string("op_1512_axis_0"), val = int32(1)]; + tensor var_1512_cast_fp16_0, tensor var_1512_cast_fp16_1 = split(axis = var_1512_axis_0, split_sizes = var_1512_split_sizes_0, x = out_63_cast_fp16)[name = string("op_1512_cast_fp16")]; + string query_states_21_pad_type_0 = const()[name = string("query_states_21_pad_type_0"), val = string("valid")]; + tensor query_states_21_strides_0 = const()[name = string("query_states_21_strides_0"), val = tensor([1, 1])]; + tensor query_states_21_pad_0 = const()[name = string("query_states_21_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor query_states_21_dilations_0 = const()[name = string("query_states_21_dilations_0"), val = tensor([1, 1])]; + int32 query_states_21_groups_0 = const()[name = string("query_states_21_groups_0"), val = int32(1)]; + tensor var_1534_to_fp16 = const()[name = string("op_1534_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(157871296)))]; + tensor query_states_21_cast_fp16 = conv(dilations = query_states_21_dilations_0, groups = query_states_21_groups_0, pad = query_states_21_pad_0, pad_type = query_states_21_pad_type_0, strides = query_states_21_strides_0, weight = var_1534_to_fp16, x = var_1512_cast_fp16_0)[name = string("query_states_21_cast_fp16")]; + string key_states_21_pad_type_0 = const()[name = string("key_states_21_pad_type_0"), val = string("valid")]; + tensor key_states_21_strides_0 = const()[name = string("key_states_21_strides_0"), val = tensor([1, 1])]; + tensor key_states_21_pad_0 = const()[name = string("key_states_21_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor key_states_21_dilations_0 = const()[name = string("key_states_21_dilations_0"), val = tensor([1, 1])]; + int32 key_states_21_groups_0 = const()[name = string("key_states_21_groups_0"), val = int32(1)]; + tensor var_1545_to_fp16 = const()[name = string("op_1545_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(159968512)))]; + tensor key_states_21_cast_fp16 = conv(dilations = key_states_21_dilations_0, groups = key_states_21_groups_0, pad = key_states_21_pad_0, pad_type = key_states_21_pad_type_0, strides = key_states_21_strides_0, weight = var_1545_to_fp16, x = var_1512_cast_fp16_0)[name = string("key_states_21_cast_fp16")]; + string value_states_21_pad_type_0 = const()[name = string("value_states_21_pad_type_0"), val = string("valid")]; + tensor value_states_21_strides_0 = const()[name = string("value_states_21_strides_0"), val = tensor([1, 1])]; + tensor value_states_21_pad_0 = const()[name = string("value_states_21_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor value_states_21_dilations_0 = const()[name = string("value_states_21_dilations_0"), val = tensor([1, 1])]; + int32 value_states_21_groups_0 = const()[name = string("value_states_21_groups_0"), val = int32(1)]; + tensor var_1556_to_fp16 = const()[name = string("op_1556_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(160230720)))]; + tensor value_states_21_cast_fp16 = conv(dilations = value_states_21_dilations_0, groups = value_states_21_groups_0, pad = value_states_21_pad_0, pad_type = value_states_21_pad_type_0, strides = value_states_21_strides_0, weight = var_1556_to_fp16, x = var_1512_cast_fp16_0)[name = string("value_states_21_cast_fp16")]; + tensor var_1564 = const()[name = string("op_1564"), val = tensor([1, 16, 64, 32])]; + tensor embed_21_cast_fp16 = reshape(shape = var_1564, x = query_states_21_cast_fp16)[name = string("embed_21_cast_fp16")]; + tensor var_1568 = const()[name = string("op_1568"), val = tensor([1, 2, 64, 32])]; + tensor var_1569_cast_fp16 = reshape(shape = var_1568, x = key_states_21_cast_fp16)[name = string("op_1569_cast_fp16")]; + tensor embed_perm_0 = const()[name = string("embed_perm_0"), val = tensor([0, 1, 3, 2])]; + tensor var_1575 = const()[name = string("op_1575"), val = tensor([1, 2, 64, 32])]; + tensor var_1576_cast_fp16 = reshape(shape = var_1575, x = value_states_21_cast_fp16)[name = string("op_1576_cast_fp16")]; + tensor value_states_perm_0 = const()[name = string("value_states_perm_0"), val = tensor([0, 1, 3, 2])]; + tensor var_1580_cast_fp16 = mul(x = embed_21_cast_fp16, y = cos_1_cast_fp16)[name = string("op_1580_cast_fp16")]; + tensor var_1581_split_sizes_0 = const()[name = string("op_1581_split_sizes_0"), val = tensor([32, 32])]; + int32 var_1581_axis_0 = const()[name = string("op_1581_axis_0"), val = int32(-2)]; + tensor var_1581_cast_fp16_0, tensor var_1581_cast_fp16_1 = split(axis = var_1581_axis_0, split_sizes = var_1581_split_sizes_0, x = embed_21_cast_fp16)[name = string("op_1581_cast_fp16")]; + fp16 const_57_promoted_to_fp16 = const()[name = string("const_57_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_1583_cast_fp16 = mul(x = var_1581_cast_fp16_1, y = const_57_promoted_to_fp16)[name = string("op_1583_cast_fp16")]; + int32 var_1585 = const()[name = string("op_1585"), val = int32(-2)]; + bool var_1586_interleave_0 = const()[name = string("op_1586_interleave_0"), val = bool(false)]; + tensor var_1586_cast_fp16 = concat(axis = var_1585, interleave = var_1586_interleave_0, values = (var_1583_cast_fp16, var_1581_cast_fp16_0))[name = string("op_1586_cast_fp16")]; + tensor var_1587_cast_fp16 = mul(x = var_1586_cast_fp16, y = sin_1_cast_fp16)[name = string("op_1587_cast_fp16")]; + tensor query_states_cast_fp16 = add(x = var_1580_cast_fp16, y = var_1587_cast_fp16)[name = string("query_states_cast_fp16")]; + tensor embed_cast_fp16 = transpose(perm = embed_perm_0, x = var_1569_cast_fp16)[name = string("transpose_2")]; + tensor var_1590_cast_fp16 = mul(x = embed_cast_fp16, y = cos_cast_fp16)[name = string("op_1590_cast_fp16")]; + tensor var_1591_split_sizes_0 = const()[name = string("op_1591_split_sizes_0"), val = tensor([32, 32])]; + int32 var_1591_axis_0 = const()[name = string("op_1591_axis_0"), val = int32(-1)]; + tensor var_1591_cast_fp16_0, tensor var_1591_cast_fp16_1 = split(axis = var_1591_axis_0, split_sizes = var_1591_split_sizes_0, x = embed_cast_fp16)[name = string("op_1591_cast_fp16")]; + fp16 const_58_promoted_to_fp16 = const()[name = string("const_58_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_1593_cast_fp16 = mul(x = var_1591_cast_fp16_1, y = const_58_promoted_to_fp16)[name = string("op_1593_cast_fp16")]; + int32 var_1595 = const()[name = string("op_1595"), val = int32(-1)]; + bool var_1596_interleave_0 = const()[name = string("op_1596_interleave_0"), val = bool(false)]; + tensor var_1596_cast_fp16 = concat(axis = var_1595, interleave = var_1596_interleave_0, values = (var_1593_cast_fp16, var_1591_cast_fp16_0))[name = string("op_1596_cast_fp16")]; + tensor var_1597_cast_fp16 = mul(x = var_1596_cast_fp16, y = sin_cast_fp16)[name = string("op_1597_cast_fp16")]; + tensor key_states_cast_fp16 = add(x = var_1590_cast_fp16, y = var_1597_cast_fp16)[name = string("key_states_cast_fp16")]; + tensor expand_dims_51 = const()[name = string("expand_dims_51"), val = tensor([5])]; + tensor expand_dims_52 = const()[name = string("expand_dims_52"), val = tensor([0])]; + tensor expand_dims_54 = const()[name = string("expand_dims_54"), val = tensor([6])]; + tensor concat_43_values3_0 = const()[name = string("concat_43_values3_0"), val = tensor([0])]; + int32 concat_43_axis_0 = const()[name = string("concat_43_axis_0"), val = int32(0)]; + bool concat_43_interleave_0 = const()[name = string("concat_43_interleave_0"), val = bool(false)]; + tensor concat_43 = concat(axis = concat_43_axis_0, interleave = concat_43_interleave_0, values = (expand_dims_51, expand_dims_52, position_id, concat_43_values3_0))[name = string("concat_43")]; + tensor concat_44_values1_0 = const()[name = string("concat_44_values1_0"), val = tensor([0])]; + tensor concat_44_values3_0 = const()[name = string("concat_44_values3_0"), val = tensor([0])]; + int32 concat_44_axis_0 = const()[name = string("concat_44_axis_0"), val = int32(0)]; + bool concat_44_interleave_0 = const()[name = string("concat_44_interleave_0"), val = bool(false)]; + tensor concat_44 = concat(axis = concat_44_axis_0, interleave = concat_44_interleave_0, values = (expand_dims_54, concat_44_values1_0, var_246, concat_44_values3_0))[name = string("concat_44")]; + tensor key_cache_internal_tensor_assign_6_stride_0 = const()[name = string("key_cache_internal_tensor_assign_6_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor key_cache_internal_tensor_assign_6_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_6_begin_mask_0"), val = tensor([false, false, false, true])]; + tensor key_cache_internal_tensor_assign_6_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_6_end_mask_0"), val = tensor([false, true, false, true])]; + tensor key_cache_internal_tensor_assign_6_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_6_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor key_cache_internal_tensor_assign_6_cast_fp16 = slice_update(begin = concat_43, begin_mask = key_cache_internal_tensor_assign_6_begin_mask_0, end = concat_44, end_mask = key_cache_internal_tensor_assign_6_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_6_squeeze_mask_0, stride = key_cache_internal_tensor_assign_6_stride_0, update = key_states_cast_fp16, x = coreml_update_state_20)[name = string("key_cache_internal_tensor_assign_6_cast_fp16")]; + write_state(data = key_cache_internal_tensor_assign_6_cast_fp16, input = key_cache)[name = string("coreml_update_state_58_write_state")]; + tensor coreml_update_state_22 = read_state(input = key_cache)[name = string("coreml_update_state_58")]; + tensor value_cache_internal_tensor_assign_6_stride_0 = const()[name = string("value_cache_internal_tensor_assign_6_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor value_cache_internal_tensor_assign_6_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_6_begin_mask_0"), val = tensor([false, false, false, true])]; + tensor value_cache_internal_tensor_assign_6_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_6_end_mask_0"), val = tensor([false, true, false, true])]; + tensor value_cache_internal_tensor_assign_6_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_6_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor value_states_cast_fp16 = transpose(perm = value_states_perm_0, x = var_1576_cast_fp16)[name = string("transpose_1")]; + tensor value_cache_internal_tensor_assign_6_cast_fp16 = slice_update(begin = concat_43, begin_mask = value_cache_internal_tensor_assign_6_begin_mask_0, end = concat_44, end_mask = value_cache_internal_tensor_assign_6_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_6_squeeze_mask_0, stride = value_cache_internal_tensor_assign_6_stride_0, update = value_states_cast_fp16, x = coreml_update_state_21)[name = string("value_cache_internal_tensor_assign_6_cast_fp16")]; + write_state(data = value_cache_internal_tensor_assign_6_cast_fp16, input = value_cache)[name = string("coreml_update_state_59_write_state")]; + tensor coreml_update_state_23 = read_state(input = value_cache)[name = string("coreml_update_state_59")]; + tensor var_1640_begin_0 = const()[name = string("op_1640_begin_0"), val = tensor([5, 0, 0, 0])]; + tensor var_1640_end_0 = const()[name = string("op_1640_end_0"), val = tensor([1, 2, 2048, 64])]; + tensor var_1640_end_mask_0 = const()[name = string("op_1640_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_1640_cast_fp16 = slice_by_index(begin = var_1640_begin_0, end = var_1640_end_0, end_mask = var_1640_end_mask_0, x = coreml_update_state_22)[name = string("op_1640_cast_fp16")]; + tensor tile_10 = const()[name = string("tile_10"), val = tensor([1, 1])]; + int32 var_1643_axis_0 = const()[name = string("op_1643_axis_0"), val = int32(1)]; + tensor var_1643_cast_fp16_0, tensor var_1643_cast_fp16_1 = split(axis = var_1643_axis_0, split_sizes = tile_10, x = var_1640_cast_fp16)[name = string("op_1643_cast_fp16")]; + tensor var_1650_begin_0 = const()[name = string("op_1650_begin_0"), val = tensor([5, 0, 0, 0])]; + tensor var_1650_end_0 = const()[name = string("op_1650_end_0"), val = tensor([1, 2, 2048, 64])]; + tensor var_1650_end_mask_0 = const()[name = string("op_1650_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_1650_cast_fp16 = slice_by_index(begin = var_1650_begin_0, end = var_1650_end_0, end_mask = var_1650_end_mask_0, x = coreml_update_state_23)[name = string("op_1650_cast_fp16")]; + tensor tile_11 = const()[name = string("tile_11"), val = tensor([1, 1])]; + int32 var_1653_axis_0 = const()[name = string("op_1653_axis_0"), val = int32(1)]; + tensor var_1653_cast_fp16_0, tensor var_1653_cast_fp16_1 = split(axis = var_1653_axis_0, split_sizes = tile_11, x = var_1650_cast_fp16)[name = string("op_1653_cast_fp16")]; + tensor var_1656_split_sizes_0 = const()[name = string("op_1656_split_sizes_0"), val = tensor([8, 8])]; + int32 var_1656_axis_0 = const()[name = string("op_1656_axis_0"), val = int32(1)]; + tensor var_1656_cast_fp16_0, tensor var_1656_cast_fp16_1 = split(axis = var_1656_axis_0, split_sizes = var_1656_split_sizes_0, x = query_states_cast_fp16)[name = string("op_1656_cast_fp16")]; + bool attn_weights_81_transpose_x_0 = const()[name = string("attn_weights_81_transpose_x_0"), val = bool(false)]; + bool attn_weights_81_transpose_y_0 = const()[name = string("attn_weights_81_transpose_y_0"), val = bool(false)]; + tensor attn_weights_81_cast_fp16 = matmul(transpose_x = attn_weights_81_transpose_x_0, transpose_y = attn_weights_81_transpose_y_0, x = var_1643_cast_fp16_0, y = var_1656_cast_fp16_0)[name = string("attn_weights_81_cast_fp16")]; + fp16 _inversed_attn_weights_83_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_83_y_0_to_fp16"), val = fp16(0x1p-3)]; + tensor _inversed_attn_weights_83_cast_fp16 = mul(x = attn_weights_81_cast_fp16, y = _inversed_attn_weights_83_y_0_to_fp16)[name = string("_inversed_attn_weights_83_cast_fp16")]; + tensor attn_weights_85_cast_fp16 = add(x = _inversed_attn_weights_83_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_85_cast_fp16")]; + int32 var_1663 = const()[name = string("op_1663"), val = int32(2)]; + tensor attn_weights_87_cast_fp16 = softmax(axis = var_1663, x = attn_weights_85_cast_fp16)[name = string("attn_weights_87_cast_fp16")]; + bool var_1669_transpose_x_1 = const()[name = string("op_1669_transpose_x_1"), val = bool(true)]; + bool var_1669_transpose_y_1 = const()[name = string("op_1669_transpose_y_1"), val = bool(false)]; + tensor var_1669_cast_fp16 = matmul(transpose_x = var_1669_transpose_x_1, transpose_y = var_1669_transpose_y_1, x = attn_weights_87_cast_fp16, y = var_1653_cast_fp16_0)[name = string("op_1669_cast_fp16")]; + bool attn_weights_89_transpose_x_0 = const()[name = string("attn_weights_89_transpose_x_0"), val = bool(false)]; + bool attn_weights_89_transpose_y_0 = const()[name = string("attn_weights_89_transpose_y_0"), val = bool(false)]; + tensor attn_weights_89_cast_fp16 = matmul(transpose_x = attn_weights_89_transpose_x_0, transpose_y = attn_weights_89_transpose_y_0, x = var_1643_cast_fp16_1, y = var_1656_cast_fp16_1)[name = string("attn_weights_89_cast_fp16")]; + fp16 _inversed_attn_weights_91_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_91_y_0_to_fp16"), val = fp16(0x1p-3)]; + tensor _inversed_attn_weights_91_cast_fp16 = mul(x = attn_weights_89_cast_fp16, y = _inversed_attn_weights_91_y_0_to_fp16)[name = string("_inversed_attn_weights_91_cast_fp16")]; + tensor attn_weights_93_cast_fp16 = add(x = _inversed_attn_weights_91_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_93_cast_fp16")]; + int32 var_1675 = const()[name = string("op_1675"), val = int32(2)]; + tensor attn_weights_cast_fp16 = softmax(axis = var_1675, x = attn_weights_93_cast_fp16)[name = string("attn_weights_cast_fp16")]; + bool attn_output_31_transpose_x_1 = const()[name = string("attn_output_31_transpose_x_1"), val = bool(true)]; + bool attn_output_31_transpose_y_1 = const()[name = string("attn_output_31_transpose_y_1"), val = bool(false)]; + tensor attn_output_31_cast_fp16 = matmul(transpose_x = attn_output_31_transpose_x_1, transpose_y = attn_output_31_transpose_y_1, x = attn_weights_cast_fp16, y = var_1653_cast_fp16_1)[name = string("attn_output_31_cast_fp16")]; + int32 var_1683 = const()[name = string("op_1683"), val = int32(1)]; + bool attn_output_33_interleave_0 = const()[name = string("attn_output_33_interleave_0"), val = bool(false)]; + tensor attn_output_33_cast_fp16 = concat(axis = var_1683, interleave = attn_output_33_interleave_0, values = (var_1669_cast_fp16, attn_output_31_cast_fp16))[name = string("attn_output_33_cast_fp16")]; + tensor var_1687_perm_0 = const()[name = string("op_1687_perm_0"), val = tensor([0, 1, 3, 2])]; + tensor var_1692 = const()[name = string("op_1692"), val = tensor([1, 1024, 1, 32])]; + tensor var_1687_cast_fp16 = transpose(perm = var_1687_perm_0, x = attn_output_33_cast_fp16)[name = string("transpose_0")]; + tensor x_95_cast_fp16 = reshape(shape = var_1692, x = var_1687_cast_fp16)[name = string("x_95_cast_fp16")]; + string hidden_states_33_pad_type_0 = const()[name = string("hidden_states_33_pad_type_0"), val = string("valid")]; + tensor hidden_states_33_strides_0 = const()[name = string("hidden_states_33_strides_0"), val = tensor([1, 1])]; + tensor hidden_states_33_pad_0 = const()[name = string("hidden_states_33_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor hidden_states_33_dilations_0 = const()[name = string("hidden_states_33_dilations_0"), val = tensor([1, 1])]; + int32 hidden_states_33_groups_0 = const()[name = string("hidden_states_33_groups_0"), val = int32(1)]; + tensor var_1699_to_fp16 = const()[name = string("op_1699_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(160492928)))]; + tensor hidden_states_33_cast_fp16 = conv(dilations = hidden_states_33_dilations_0, groups = hidden_states_33_groups_0, pad = hidden_states_33_pad_0, pad_type = hidden_states_33_pad_type_0, strides = hidden_states_33_strides_0, weight = var_1699_to_fp16, x = x_95_cast_fp16)[name = string("hidden_states_33_cast_fp16")]; + tensor x_97_cast_fp16 = add(x = x_89_cast_fp16, y = hidden_states_33_cast_fp16)[name = string("x_97_cast_fp16")]; + int32 var_1711 = const()[name = string("op_1711"), val = int32(1)]; + fp16 const_63_promoted_to_fp16 = const()[name = string("const_63_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_1714_cast_fp16 = mul(x = x_97_cast_fp16, y = const_63_promoted_to_fp16)[name = string("op_1714_cast_fp16")]; + bool x_99_interleave_0 = const()[name = string("x_99_interleave_0"), val = bool(false)]; + tensor x_99_cast_fp16 = concat(axis = var_1711, interleave = x_99_interleave_0, values = (x_97_cast_fp16, var_1714_cast_fp16))[name = string("x_99_cast_fp16")]; + tensor out_67_axes_0 = const()[name = string("out_67_axes_0"), val = tensor([1])]; + fp16 var_1724_to_fp16 = const()[name = string("op_1724_to_fp16"), val = fp16(0x1.5p-17)]; + tensor out_67_cast_fp16 = layer_norm(axes = out_67_axes_0, epsilon = var_1724_to_fp16, x = x_99_cast_fp16)[name = string("out_67_cast_fp16")]; + tensor layer_layers_5_post_attention_layernorm_weight_to_fp16 = const()[name = string("layer_layers_5_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(162590144)))]; + tensor out_69_cast_fp16 = mul(x = out_67_cast_fp16, y = layer_layers_5_post_attention_layernorm_weight_to_fp16)[name = string("out_69_cast_fp16")]; + tensor var_1730_split_sizes_0 = const()[name = string("op_1730_split_sizes_0"), val = tensor([1024, 1024])]; + int32 var_1730_axis_0 = const()[name = string("op_1730_axis_0"), val = int32(1)]; + tensor var_1730_cast_fp16_0, tensor var_1730_cast_fp16_1 = split(axis = var_1730_axis_0, split_sizes = var_1730_split_sizes_0, x = out_69_cast_fp16)[name = string("op_1730_cast_fp16")]; + string input_pad_type_0 = const()[name = string("input_pad_type_0"), val = string("valid")]; + tensor input_strides_0 = const()[name = string("input_strides_0"), val = tensor([1, 1])]; + tensor input_pad_0 = const()[name = string("input_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor input_dilations_0 = const()[name = string("input_dilations_0"), val = tensor([1, 1])]; + int32 input_groups_0 = const()[name = string("input_groups_0"), val = int32(1)]; + tensor var_1735_to_fp16 = const()[name = string("op_1735_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(162594304)))]; + tensor input_cast_fp16 = conv(dilations = input_dilations_0, groups = input_groups_0, pad = input_pad_0, pad_type = input_pad_type_0, strides = input_strides_0, weight = var_1735_to_fp16, x = var_1730_cast_fp16_0)[name = string("input_cast_fp16")]; + tensor var_1746_cast_fp16 = silu(x = input_cast_fp16)[name = string("op_1746_cast_fp16")]; + string var_1751_pad_type_0 = const()[name = string("op_1751_pad_type_0"), val = string("valid")]; + tensor var_1751_strides_0 = const()[name = string("op_1751_strides_0"), val = tensor([1, 1])]; + tensor var_1751_pad_0 = const()[name = string("op_1751_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1751_dilations_0 = const()[name = string("op_1751_dilations_0"), val = tensor([1, 1])]; + int32 var_1751_groups_0 = const()[name = string("op_1751_groups_0"), val = int32(1)]; + tensor var_1734_to_fp16 = const()[name = string("op_1734_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(170982976)))]; + tensor var_1751_cast_fp16 = conv(dilations = var_1751_dilations_0, groups = var_1751_groups_0, pad = var_1751_pad_0, pad_type = var_1751_pad_type_0, strides = var_1751_strides_0, weight = var_1734_to_fp16, x = var_1730_cast_fp16_0)[name = string("op_1751_cast_fp16")]; + tensor x_105_cast_fp16 = mul(x = var_1746_cast_fp16, y = var_1751_cast_fp16)[name = string("x_105_cast_fp16")]; + string hidden_states_pad_type_0 = const()[name = string("hidden_states_pad_type_0"), val = string("valid")]; + tensor hidden_states_strides_0 = const()[name = string("hidden_states_strides_0"), val = tensor([1, 1])]; + tensor hidden_states_pad_0 = const()[name = string("hidden_states_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor hidden_states_dilations_0 = const()[name = string("hidden_states_dilations_0"), val = tensor([1, 1])]; + int32 hidden_states_groups_0 = const()[name = string("hidden_states_groups_0"), val = int32(1)]; + tensor var_1733_to_fp16 = const()[name = string("op_1733_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(179371648)))]; + tensor hidden_states_cast_fp16 = conv(dilations = hidden_states_dilations_0, groups = hidden_states_groups_0, pad = hidden_states_pad_0, pad_type = hidden_states_pad_type_0, strides = hidden_states_strides_0, weight = var_1733_to_fp16, x = x_105_cast_fp16)[name = string("hidden_states_cast_fp16")]; + tensor x_107_cast_fp16 = add(x = x_97_cast_fp16, y = hidden_states_cast_fp16)[name = string("x_107_cast_fp16")]; + int32 var_1764 = const()[name = string("op_1764"), val = int32(1)]; + fp16 const_64_promoted_to_fp16 = const()[name = string("const_64_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_1767_cast_fp16 = mul(x = x_107_cast_fp16, y = const_64_promoted_to_fp16)[name = string("op_1767_cast_fp16")]; + bool x_109_interleave_0 = const()[name = string("x_109_interleave_0"), val = bool(false)]; + tensor x_109_cast_fp16 = concat(axis = var_1764, interleave = x_109_interleave_0, values = (x_107_cast_fp16, var_1767_cast_fp16))[name = string("x_109_cast_fp16")]; + tensor out_73_axes_0 = const()[name = string("out_73_axes_0"), val = tensor([1])]; + fp16 var_1777_to_fp16 = const()[name = string("op_1777_to_fp16"), val = fp16(0x1.5p-17)]; + tensor out_73_cast_fp16 = layer_norm(axes = out_73_axes_0, epsilon = var_1777_to_fp16, x = x_109_cast_fp16)[name = string("out_73_cast_fp16")]; + tensor layer_norm_weight_to_fp16 = const()[name = string("layer_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(187760320)))]; + tensor out_75_cast_fp16 = mul(x = out_73_cast_fp16, y = layer_norm_weight_to_fp16)[name = string("out_75_cast_fp16")]; + tensor var_1783_split_sizes_0 = const()[name = string("op_1783_split_sizes_0"), val = tensor([1024, 1024])]; + int32 var_1783_axis_0 = const()[name = string("op_1783_axis_0"), val = int32(1)]; + tensor output, tensor var_1783_cast_fp16_1 = split(axis = var_1783_axis_0, split_sizes = var_1783_split_sizes_0, x = out_75_cast_fp16)[name = string("op_1783_cast_fp16")]; + } -> (output); + func length_64(tensor inputs_embeds, state> key_cache, tensor position_id, state> value_cache) { + tensor var_80 = const()[name = string("op_80"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(188519296)))]; + tensor position_ids_1 = add(x = var_80, y = position_id)[name = string("position_ids_1")]; + int32 cos_batch_dims_0 = const()[name = string("cos_batch_dims_0"), val = int32(0)]; + bool cos_validate_indices_0 = const()[name = string("cos_validate_indices_0"), val = bool(false)]; + tensor layer_rope_emb_cos_cached_to_fp16 = const()[name = string("layer_rope_emb_cos_cached_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64)))]; + int32 greater_equal_0_y_0 = const()[name = string("greater_equal_0_y_0"), val = int32(0)]; + tensor greater_equal_0 = greater_equal(x = position_ids_1, y = greater_equal_0_y_0)[name = string("greater_equal_0")]; + int32 slice_by_index_0 = const()[name = string("slice_by_index_0"), val = int32(32768)]; + tensor add_0 = add(x = position_ids_1, y = slice_by_index_0)[name = string("add_0")]; + tensor select_0 = select(a = position_ids_1, b = add_0, cond = greater_equal_0)[name = string("select_0")]; + int32 cos_cast_fp16_axis_0 = const()[name = string("cos_cast_fp16_axis_0"), val = int32(0)]; + tensor cos_cast_fp16 = gather(axis = cos_cast_fp16_axis_0, batch_dims = cos_batch_dims_0, indices = select_0, validate_indices = cos_validate_indices_0, x = layer_rope_emb_cos_cached_to_fp16)[name = string("cos_cast_fp16")]; + int32 sin_batch_dims_0 = const()[name = string("sin_batch_dims_0"), val = int32(0)]; + bool sin_validate_indices_0 = const()[name = string("sin_validate_indices_0"), val = bool(false)]; + tensor layer_rope_emb_sin_cached_to_fp16 = const()[name = string("layer_rope_emb_sin_cached_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(4194432)))]; + int32 sin_cast_fp16_axis_0 = const()[name = string("sin_cast_fp16_axis_0"), val = int32(0)]; + tensor sin_cast_fp16 = gather(axis = sin_cast_fp16_axis_0, batch_dims = sin_batch_dims_0, indices = select_0, validate_indices = sin_validate_indices_0, x = layer_rope_emb_sin_cached_to_fp16)[name = string("sin_cast_fp16")]; + tensor var_105 = const()[name = string("op_105"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(8388800)))]; + tensor var_112_axes_0 = const()[name = string("op_112_axes_0"), val = tensor([2])]; + tensor var_112 = expand_dims(axes = var_112_axes_0, x = position_ids_1)[name = string("op_112")]; + tensor var_113 = greater(x = var_105, y = var_112)[name = string("op_113")]; + tensor attention_mask_3_axes_0 = const()[name = string("attention_mask_3_axes_0"), val = tensor([1])]; + string attention_mask_1_to_fp16_dtype_0 = const()[name = string("attention_mask_1_to_fp16_dtype_0"), val = string("fp16")]; + tensor var_113_to_fp16 = cast(dtype = attention_mask_1_to_fp16_dtype_0, x = var_113)[name = string("cast_65")]; + tensor attention_mask_3_cast_fp16 = expand_dims(axes = attention_mask_3_axes_0, x = var_113_to_fp16)[name = string("attention_mask_3_cast_fp16")]; + fp16 var_121_promoted_to_fp16 = const()[name = string("op_121_promoted_to_fp16"), val = fp16(0x0p+0)]; + tensor var_122_cast_fp16 = equal(x = attention_mask_3_cast_fp16, y = var_121_promoted_to_fp16)[name = string("op_122_cast_fp16")]; + tensor var_123_after_broadcast_to_fp16 = const()[name = string("op_123_after_broadcast_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(188519616)))]; + tensor attention_mask_cast_fp16 = select(a = attention_mask_3_cast_fp16, b = var_123_after_broadcast_to_fp16, cond = var_122_cast_fp16)[name = string("attention_mask_cast_fp16")]; + tensor cos_1_perm_0 = const()[name = string("cos_1_perm_0"), val = tensor([0, -1, -2])]; + tensor sin_1_perm_0 = const()[name = string("sin_1_perm_0"), val = tensor([0, -1, -2])]; + tensor attn_mask_perm_0 = const()[name = string("attn_mask_perm_0"), val = tensor([0, 1, -1, -2])]; + int32 var_138 = const()[name = string("op_138"), val = int32(1)]; + fp16 const_4_promoted_to_fp16 = const()[name = string("const_4_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_141_cast_fp16 = mul(x = inputs_embeds, y = const_4_promoted_to_fp16)[name = string("op_141_cast_fp16")]; + bool x_1_interleave_0 = const()[name = string("x_1_interleave_0"), val = bool(false)]; + tensor x_1_cast_fp16 = concat(axis = var_138, interleave = x_1_interleave_0, values = (inputs_embeds, var_141_cast_fp16))[name = string("x_1_cast_fp16")]; + tensor out_1_axes_0 = const()[name = string("out_1_axes_0"), val = tensor([1])]; + fp16 var_151_to_fp16 = const()[name = string("op_151_to_fp16"), val = fp16(0x1.5p-17)]; + tensor out_1_cast_fp16 = layer_norm(axes = out_1_axes_0, epsilon = var_151_to_fp16, x = x_1_cast_fp16)[name = string("out_1_cast_fp16")]; + tensor layer_layers_0_input_layernorm_weight_to_fp16 = const()[name = string("layer_layers_0_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(8401216)))]; + tensor out_3_cast_fp16 = mul(x = out_1_cast_fp16, y = layer_layers_0_input_layernorm_weight_to_fp16)[name = string("out_3_cast_fp16")]; + tensor var_157_split_sizes_0 = const()[name = string("op_157_split_sizes_0"), val = tensor([1024, 1024])]; + int32 var_157_axis_0 = const()[name = string("op_157_axis_0"), val = int32(1)]; + tensor var_157_cast_fp16_0, tensor var_157_cast_fp16_1 = split(axis = var_157_axis_0, split_sizes = var_157_split_sizes_0, x = out_3_cast_fp16)[name = string("op_157_cast_fp16")]; + tensor q_len_1 = const()[name = string("q_len_1"), val = tensor([64])]; + string query_states_1_pad_type_0 = const()[name = string("query_states_1_pad_type_0"), val = string("valid")]; + tensor query_states_1_strides_0 = const()[name = string("query_states_1_strides_0"), val = tensor([1, 1])]; + tensor query_states_1_pad_0 = const()[name = string("query_states_1_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor query_states_1_dilations_0 = const()[name = string("query_states_1_dilations_0"), val = tensor([1, 1])]; + int32 query_states_1_groups_0 = const()[name = string("query_states_1_groups_0"), val = int32(1)]; + tensor var_179_to_fp16 = const()[name = string("op_179_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(8405376)))]; + tensor query_states_1_cast_fp16 = conv(dilations = query_states_1_dilations_0, groups = query_states_1_groups_0, pad = query_states_1_pad_0, pad_type = query_states_1_pad_type_0, strides = query_states_1_strides_0, weight = var_179_to_fp16, x = var_157_cast_fp16_0)[name = string("query_states_1_cast_fp16")]; + string key_states_1_pad_type_0 = const()[name = string("key_states_1_pad_type_0"), val = string("valid")]; + tensor key_states_1_strides_0 = const()[name = string("key_states_1_strides_0"), val = tensor([1, 1])]; + tensor key_states_1_pad_0 = const()[name = string("key_states_1_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor key_states_1_dilations_0 = const()[name = string("key_states_1_dilations_0"), val = tensor([1, 1])]; + int32 key_states_1_groups_0 = const()[name = string("key_states_1_groups_0"), val = int32(1)]; + tensor var_190_to_fp16 = const()[name = string("op_190_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(10502592)))]; + tensor key_states_1_cast_fp16 = conv(dilations = key_states_1_dilations_0, groups = key_states_1_groups_0, pad = key_states_1_pad_0, pad_type = key_states_1_pad_type_0, strides = key_states_1_strides_0, weight = var_190_to_fp16, x = var_157_cast_fp16_0)[name = string("key_states_1_cast_fp16")]; + string value_states_1_pad_type_0 = const()[name = string("value_states_1_pad_type_0"), val = string("valid")]; + tensor value_states_1_strides_0 = const()[name = string("value_states_1_strides_0"), val = tensor([1, 1])]; + tensor value_states_1_pad_0 = const()[name = string("value_states_1_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor value_states_1_dilations_0 = const()[name = string("value_states_1_dilations_0"), val = tensor([1, 1])]; + int32 value_states_1_groups_0 = const()[name = string("value_states_1_groups_0"), val = int32(1)]; + tensor var_201_to_fp16 = const()[name = string("op_201_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(10764800)))]; + tensor value_states_1_cast_fp16 = conv(dilations = value_states_1_dilations_0, groups = value_states_1_groups_0, pad = value_states_1_pad_0, pad_type = value_states_1_pad_type_0, strides = value_states_1_strides_0, weight = var_201_to_fp16, x = var_157_cast_fp16_0)[name = string("value_states_1_cast_fp16")]; + tensor var_209 = const()[name = string("op_209"), val = tensor([1, 16, 64, 64])]; + tensor embed_1_cast_fp16 = reshape(shape = var_209, x = query_states_1_cast_fp16)[name = string("embed_1_cast_fp16")]; + tensor var_213 = const()[name = string("op_213"), val = tensor([1, 2, 64, 64])]; + tensor var_214_cast_fp16 = reshape(shape = var_213, x = key_states_1_cast_fp16)[name = string("op_214_cast_fp16")]; + tensor embed_3_perm_0 = const()[name = string("embed_3_perm_0"), val = tensor([0, 1, 3, 2])]; + tensor var_220 = const()[name = string("op_220"), val = tensor([1, 2, 64, 64])]; + tensor var_221_cast_fp16 = reshape(shape = var_220, x = value_states_1_cast_fp16)[name = string("op_221_cast_fp16")]; + tensor value_states_3_perm_0 = const()[name = string("value_states_3_perm_0"), val = tensor([0, 1, 3, 2])]; + tensor cos_1_cast_fp16 = transpose(perm = cos_1_perm_0, x = cos_cast_fp16)[name = string("transpose_20")]; + tensor var_225_cast_fp16 = mul(x = embed_1_cast_fp16, y = cos_1_cast_fp16)[name = string("op_225_cast_fp16")]; + tensor var_226_split_sizes_0 = const()[name = string("op_226_split_sizes_0"), val = tensor([32, 32])]; + int32 var_226_axis_0 = const()[name = string("op_226_axis_0"), val = int32(-2)]; + tensor var_226_cast_fp16_0, tensor var_226_cast_fp16_1 = split(axis = var_226_axis_0, split_sizes = var_226_split_sizes_0, x = embed_1_cast_fp16)[name = string("op_226_cast_fp16")]; + fp16 const_7_promoted_to_fp16 = const()[name = string("const_7_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_228_cast_fp16 = mul(x = var_226_cast_fp16_1, y = const_7_promoted_to_fp16)[name = string("op_228_cast_fp16")]; + int32 var_230 = const()[name = string("op_230"), val = int32(-2)]; + bool var_231_interleave_0 = const()[name = string("op_231_interleave_0"), val = bool(false)]; + tensor var_231_cast_fp16 = concat(axis = var_230, interleave = var_231_interleave_0, values = (var_228_cast_fp16, var_226_cast_fp16_0))[name = string("op_231_cast_fp16")]; + tensor sin_1_cast_fp16 = transpose(perm = sin_1_perm_0, x = sin_cast_fp16)[name = string("transpose_19")]; + tensor var_232_cast_fp16 = mul(x = var_231_cast_fp16, y = sin_1_cast_fp16)[name = string("op_232_cast_fp16")]; + tensor query_states_3_cast_fp16 = add(x = var_225_cast_fp16, y = var_232_cast_fp16)[name = string("query_states_3_cast_fp16")]; + tensor embed_3_cast_fp16 = transpose(perm = embed_3_perm_0, x = var_214_cast_fp16)[name = string("transpose_17")]; + tensor var_235_cast_fp16 = mul(x = embed_3_cast_fp16, y = cos_cast_fp16)[name = string("op_235_cast_fp16")]; + tensor var_236_split_sizes_0 = const()[name = string("op_236_split_sizes_0"), val = tensor([32, 32])]; + int32 var_236_axis_0 = const()[name = string("op_236_axis_0"), val = int32(-1)]; + tensor var_236_cast_fp16_0, tensor var_236_cast_fp16_1 = split(axis = var_236_axis_0, split_sizes = var_236_split_sizes_0, x = embed_3_cast_fp16)[name = string("op_236_cast_fp16")]; + fp16 const_8_promoted_to_fp16 = const()[name = string("const_8_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_238_cast_fp16 = mul(x = var_236_cast_fp16_1, y = const_8_promoted_to_fp16)[name = string("op_238_cast_fp16")]; + int32 var_240 = const()[name = string("op_240"), val = int32(-1)]; + bool var_241_interleave_0 = const()[name = string("op_241_interleave_0"), val = bool(false)]; + tensor var_241_cast_fp16 = concat(axis = var_240, interleave = var_241_interleave_0, values = (var_238_cast_fp16, var_236_cast_fp16_0))[name = string("op_241_cast_fp16")]; + tensor var_242_cast_fp16 = mul(x = var_241_cast_fp16, y = sin_cast_fp16)[name = string("op_242_cast_fp16")]; + tensor key_states_3_cast_fp16 = add(x = var_235_cast_fp16, y = var_242_cast_fp16)[name = string("key_states_3_cast_fp16")]; + tensor var_246 = add(x = position_id, y = q_len_1)[name = string("op_246")]; + tensor read_state_0 = read_state(input = key_cache)[name = string("read_state_0")]; + tensor expand_dims_1 = const()[name = string("expand_dims_1"), val = tensor([0])]; + tensor expand_dims_2 = const()[name = string("expand_dims_2"), val = tensor([0])]; + tensor expand_dims_4 = const()[name = string("expand_dims_4"), val = tensor([1])]; + tensor concat_3_values3_0 = const()[name = string("concat_3_values3_0"), val = tensor([0])]; + int32 concat_3_axis_0 = const()[name = string("concat_3_axis_0"), val = int32(0)]; + bool concat_3_interleave_0 = const()[name = string("concat_3_interleave_0"), val = bool(false)]; + tensor concat_3 = concat(axis = concat_3_axis_0, interleave = concat_3_interleave_0, values = (expand_dims_1, expand_dims_2, position_id, concat_3_values3_0))[name = string("concat_3")]; + tensor concat_4_values1_0 = const()[name = string("concat_4_values1_0"), val = tensor([0])]; + tensor concat_4_values3_0 = const()[name = string("concat_4_values3_0"), val = tensor([0])]; + int32 concat_4_axis_0 = const()[name = string("concat_4_axis_0"), val = int32(0)]; + bool concat_4_interleave_0 = const()[name = string("concat_4_interleave_0"), val = bool(false)]; + tensor concat_4 = concat(axis = concat_4_axis_0, interleave = concat_4_interleave_0, values = (expand_dims_4, concat_4_values1_0, var_246, concat_4_values3_0))[name = string("concat_4")]; + tensor key_cache_internal_tensor_assign_1_stride_0 = const()[name = string("key_cache_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor key_cache_internal_tensor_assign_1_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false, true])]; + tensor key_cache_internal_tensor_assign_1_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_1_end_mask_0"), val = tensor([false, true, false, true])]; + tensor key_cache_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor key_cache_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_3, begin_mask = key_cache_internal_tensor_assign_1_begin_mask_0, end = concat_4, end_mask = key_cache_internal_tensor_assign_1_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_1_squeeze_mask_0, stride = key_cache_internal_tensor_assign_1_stride_0, update = key_states_3_cast_fp16, x = read_state_0)[name = string("key_cache_internal_tensor_assign_1_cast_fp16")]; + write_state(data = key_cache_internal_tensor_assign_1_cast_fp16, input = key_cache)[name = string("coreml_update_state_60_write_state")]; + tensor coreml_update_state_12 = read_state(input = key_cache)[name = string("coreml_update_state_60")]; + tensor read_state_1 = read_state(input = value_cache)[name = string("read_state_1")]; + tensor value_cache_internal_tensor_assign_1_stride_0 = const()[name = string("value_cache_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor value_cache_internal_tensor_assign_1_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false, true])]; + tensor value_cache_internal_tensor_assign_1_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_1_end_mask_0"), val = tensor([false, true, false, true])]; + tensor value_cache_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor value_states_3_cast_fp16 = transpose(perm = value_states_3_perm_0, x = var_221_cast_fp16)[name = string("transpose_16")]; + tensor value_cache_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_3, begin_mask = value_cache_internal_tensor_assign_1_begin_mask_0, end = concat_4, end_mask = value_cache_internal_tensor_assign_1_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_1_squeeze_mask_0, stride = value_cache_internal_tensor_assign_1_stride_0, update = value_states_3_cast_fp16, x = read_state_1)[name = string("value_cache_internal_tensor_assign_1_cast_fp16")]; + write_state(data = value_cache_internal_tensor_assign_1_cast_fp16, input = value_cache)[name = string("coreml_update_state_61_write_state")]; + tensor coreml_update_state_13 = read_state(input = value_cache)[name = string("coreml_update_state_61")]; + tensor var_285_begin_0 = const()[name = string("op_285_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_285_end_0 = const()[name = string("op_285_end_0"), val = tensor([1, 2, 2048, 64])]; + tensor var_285_end_mask_0 = const()[name = string("op_285_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_285_cast_fp16 = slice_by_index(begin = var_285_begin_0, end = var_285_end_0, end_mask = var_285_end_mask_0, x = coreml_update_state_12)[name = string("op_285_cast_fp16")]; + tensor tile_0 = const()[name = string("tile_0"), val = tensor([1, 1])]; + int32 var_288_axis_0 = const()[name = string("op_288_axis_0"), val = int32(1)]; + tensor var_288_cast_fp16_0, tensor var_288_cast_fp16_1 = split(axis = var_288_axis_0, split_sizes = tile_0, x = var_285_cast_fp16)[name = string("op_288_cast_fp16")]; + tensor var_295_begin_0 = const()[name = string("op_295_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_295_end_0 = const()[name = string("op_295_end_0"), val = tensor([1, 2, 2048, 64])]; + tensor var_295_end_mask_0 = const()[name = string("op_295_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_295_cast_fp16 = slice_by_index(begin = var_295_begin_0, end = var_295_end_0, end_mask = var_295_end_mask_0, x = coreml_update_state_13)[name = string("op_295_cast_fp16")]; + tensor tile_1 = const()[name = string("tile_1"), val = tensor([1, 1])]; + int32 var_298_axis_0 = const()[name = string("op_298_axis_0"), val = int32(1)]; + tensor var_298_cast_fp16_0, tensor var_298_cast_fp16_1 = split(axis = var_298_axis_0, split_sizes = tile_1, x = var_295_cast_fp16)[name = string("op_298_cast_fp16")]; + tensor var_301_split_sizes_0 = const()[name = string("op_301_split_sizes_0"), val = tensor([8, 8])]; + int32 var_301_axis_0 = const()[name = string("op_301_axis_0"), val = int32(1)]; + tensor var_301_cast_fp16_0, tensor var_301_cast_fp16_1 = split(axis = var_301_axis_0, split_sizes = var_301_split_sizes_0, x = query_states_3_cast_fp16)[name = string("op_301_cast_fp16")]; + bool attn_weights_1_transpose_x_0 = const()[name = string("attn_weights_1_transpose_x_0"), val = bool(false)]; + bool attn_weights_1_transpose_y_0 = const()[name = string("attn_weights_1_transpose_y_0"), val = bool(false)]; + tensor attn_weights_1_cast_fp16 = matmul(transpose_x = attn_weights_1_transpose_x_0, transpose_y = attn_weights_1_transpose_y_0, x = var_288_cast_fp16_0, y = var_301_cast_fp16_0)[name = string("attn_weights_1_cast_fp16")]; + fp16 _inversed_attn_weights_3_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_3_y_0_to_fp16"), val = fp16(0x1p-3)]; + tensor _inversed_attn_weights_3_cast_fp16 = mul(x = attn_weights_1_cast_fp16, y = _inversed_attn_weights_3_y_0_to_fp16)[name = string("_inversed_attn_weights_3_cast_fp16")]; + tensor attn_mask_cast_fp16 = transpose(perm = attn_mask_perm_0, x = attention_mask_cast_fp16)[name = string("transpose_18")]; + tensor attn_weights_5_cast_fp16 = add(x = _inversed_attn_weights_3_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_5_cast_fp16")]; + int32 var_308 = const()[name = string("op_308"), val = int32(2)]; + tensor attn_weights_7_cast_fp16 = softmax(axis = var_308, x = attn_weights_5_cast_fp16)[name = string("attn_weights_7_cast_fp16")]; + bool var_314_transpose_x_1 = const()[name = string("op_314_transpose_x_1"), val = bool(true)]; + bool var_314_transpose_y_1 = const()[name = string("op_314_transpose_y_1"), val = bool(false)]; + tensor var_314_cast_fp16 = matmul(transpose_x = var_314_transpose_x_1, transpose_y = var_314_transpose_y_1, x = attn_weights_7_cast_fp16, y = var_298_cast_fp16_0)[name = string("op_314_cast_fp16")]; + bool attn_weights_9_transpose_x_0 = const()[name = string("attn_weights_9_transpose_x_0"), val = bool(false)]; + bool attn_weights_9_transpose_y_0 = const()[name = string("attn_weights_9_transpose_y_0"), val = bool(false)]; + tensor attn_weights_9_cast_fp16 = matmul(transpose_x = attn_weights_9_transpose_x_0, transpose_y = attn_weights_9_transpose_y_0, x = var_288_cast_fp16_1, y = var_301_cast_fp16_1)[name = string("attn_weights_9_cast_fp16")]; + fp16 _inversed_attn_weights_11_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_11_y_0_to_fp16"), val = fp16(0x1p-3)]; + tensor _inversed_attn_weights_11_cast_fp16 = mul(x = attn_weights_9_cast_fp16, y = _inversed_attn_weights_11_y_0_to_fp16)[name = string("_inversed_attn_weights_11_cast_fp16")]; + tensor attn_weights_13_cast_fp16 = add(x = _inversed_attn_weights_11_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_13_cast_fp16")]; + int32 var_320 = const()[name = string("op_320"), val = int32(2)]; + tensor attn_weights_15_cast_fp16 = softmax(axis = var_320, x = attn_weights_13_cast_fp16)[name = string("attn_weights_15_cast_fp16")]; + bool attn_output_1_transpose_x_1 = const()[name = string("attn_output_1_transpose_x_1"), val = bool(true)]; + bool attn_output_1_transpose_y_1 = const()[name = string("attn_output_1_transpose_y_1"), val = bool(false)]; + tensor attn_output_1_cast_fp16 = matmul(transpose_x = attn_output_1_transpose_x_1, transpose_y = attn_output_1_transpose_y_1, x = attn_weights_15_cast_fp16, y = var_298_cast_fp16_1)[name = string("attn_output_1_cast_fp16")]; + int32 var_328 = const()[name = string("op_328"), val = int32(1)]; + bool attn_output_3_interleave_0 = const()[name = string("attn_output_3_interleave_0"), val = bool(false)]; + tensor attn_output_3_cast_fp16 = concat(axis = var_328, interleave = attn_output_3_interleave_0, values = (var_314_cast_fp16, attn_output_1_cast_fp16))[name = string("attn_output_3_cast_fp16")]; + tensor var_332_perm_0 = const()[name = string("op_332_perm_0"), val = tensor([0, 1, 3, 2])]; + tensor var_337 = const()[name = string("op_337"), val = tensor([1, 1024, 1, 64])]; + tensor var_332_cast_fp16 = transpose(perm = var_332_perm_0, x = attn_output_3_cast_fp16)[name = string("transpose_15")]; + tensor x_5_cast_fp16 = reshape(shape = var_337, x = var_332_cast_fp16)[name = string("x_5_cast_fp16")]; + string hidden_states_3_pad_type_0 = const()[name = string("hidden_states_3_pad_type_0"), val = string("valid")]; + tensor hidden_states_3_strides_0 = const()[name = string("hidden_states_3_strides_0"), val = tensor([1, 1])]; + tensor hidden_states_3_pad_0 = const()[name = string("hidden_states_3_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor hidden_states_3_dilations_0 = const()[name = string("hidden_states_3_dilations_0"), val = tensor([1, 1])]; + int32 hidden_states_3_groups_0 = const()[name = string("hidden_states_3_groups_0"), val = int32(1)]; + tensor var_344_to_fp16 = const()[name = string("op_344_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(11027008)))]; + tensor hidden_states_3_cast_fp16 = conv(dilations = hidden_states_3_dilations_0, groups = hidden_states_3_groups_0, pad = hidden_states_3_pad_0, pad_type = hidden_states_3_pad_type_0, strides = hidden_states_3_strides_0, weight = var_344_to_fp16, x = x_5_cast_fp16)[name = string("hidden_states_3_cast_fp16")]; + tensor x_7_cast_fp16 = add(x = inputs_embeds, y = hidden_states_3_cast_fp16)[name = string("x_7_cast_fp16")]; + int32 var_356 = const()[name = string("op_356"), val = int32(1)]; + fp16 const_13_promoted_to_fp16 = const()[name = string("const_13_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_359_cast_fp16 = mul(x = x_7_cast_fp16, y = const_13_promoted_to_fp16)[name = string("op_359_cast_fp16")]; + bool x_9_interleave_0 = const()[name = string("x_9_interleave_0"), val = bool(false)]; + tensor x_9_cast_fp16 = concat(axis = var_356, interleave = x_9_interleave_0, values = (x_7_cast_fp16, var_359_cast_fp16))[name = string("x_9_cast_fp16")]; + tensor out_7_axes_0 = const()[name = string("out_7_axes_0"), val = tensor([1])]; + fp16 var_369_to_fp16 = const()[name = string("op_369_to_fp16"), val = fp16(0x1.5p-17)]; + tensor out_7_cast_fp16 = layer_norm(axes = out_7_axes_0, epsilon = var_369_to_fp16, x = x_9_cast_fp16)[name = string("out_7_cast_fp16")]; + tensor layer_layers_0_post_attention_layernorm_weight_to_fp16 = const()[name = string("layer_layers_0_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(13124224)))]; + tensor out_9_cast_fp16 = mul(x = out_7_cast_fp16, y = layer_layers_0_post_attention_layernorm_weight_to_fp16)[name = string("out_9_cast_fp16")]; + tensor var_375_split_sizes_0 = const()[name = string("op_375_split_sizes_0"), val = tensor([1024, 1024])]; + int32 var_375_axis_0 = const()[name = string("op_375_axis_0"), val = int32(1)]; + tensor var_375_cast_fp16_0, tensor var_375_cast_fp16_1 = split(axis = var_375_axis_0, split_sizes = var_375_split_sizes_0, x = out_9_cast_fp16)[name = string("op_375_cast_fp16")]; + string input_1_pad_type_0 = const()[name = string("input_1_pad_type_0"), val = string("valid")]; + tensor input_1_strides_0 = const()[name = string("input_1_strides_0"), val = tensor([1, 1])]; + tensor input_1_pad_0 = const()[name = string("input_1_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor input_1_dilations_0 = const()[name = string("input_1_dilations_0"), val = tensor([1, 1])]; + int32 input_1_groups_0 = const()[name = string("input_1_groups_0"), val = int32(1)]; + tensor var_380_to_fp16 = const()[name = string("op_380_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(13128384)))]; + tensor input_1_cast_fp16 = conv(dilations = input_1_dilations_0, groups = input_1_groups_0, pad = input_1_pad_0, pad_type = input_1_pad_type_0, strides = input_1_strides_0, weight = var_380_to_fp16, x = var_375_cast_fp16_0)[name = string("input_1_cast_fp16")]; + tensor var_391_cast_fp16 = silu(x = input_1_cast_fp16)[name = string("op_391_cast_fp16")]; + string var_396_pad_type_0 = const()[name = string("op_396_pad_type_0"), val = string("valid")]; + tensor var_396_strides_0 = const()[name = string("op_396_strides_0"), val = tensor([1, 1])]; + tensor var_396_pad_0 = const()[name = string("op_396_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_396_dilations_0 = const()[name = string("op_396_dilations_0"), val = tensor([1, 1])]; + int32 var_396_groups_0 = const()[name = string("op_396_groups_0"), val = int32(1)]; + tensor var_379_to_fp16 = const()[name = string("op_379_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(21517056)))]; + tensor var_396_cast_fp16 = conv(dilations = var_396_dilations_0, groups = var_396_groups_0, pad = var_396_pad_0, pad_type = var_396_pad_type_0, strides = var_396_strides_0, weight = var_379_to_fp16, x = var_375_cast_fp16_0)[name = string("op_396_cast_fp16")]; + tensor x_15_cast_fp16 = mul(x = var_391_cast_fp16, y = var_396_cast_fp16)[name = string("x_15_cast_fp16")]; + string hidden_states_5_pad_type_0 = const()[name = string("hidden_states_5_pad_type_0"), val = string("valid")]; + tensor hidden_states_5_strides_0 = const()[name = string("hidden_states_5_strides_0"), val = tensor([1, 1])]; + tensor hidden_states_5_pad_0 = const()[name = string("hidden_states_5_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor hidden_states_5_dilations_0 = const()[name = string("hidden_states_5_dilations_0"), val = tensor([1, 1])]; + int32 hidden_states_5_groups_0 = const()[name = string("hidden_states_5_groups_0"), val = int32(1)]; + tensor var_378_to_fp16 = const()[name = string("op_378_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(29905728)))]; + tensor hidden_states_5_cast_fp16 = conv(dilations = hidden_states_5_dilations_0, groups = hidden_states_5_groups_0, pad = hidden_states_5_pad_0, pad_type = hidden_states_5_pad_type_0, strides = hidden_states_5_strides_0, weight = var_378_to_fp16, x = x_15_cast_fp16)[name = string("hidden_states_5_cast_fp16")]; + tensor x_17_cast_fp16 = add(x = x_7_cast_fp16, y = hidden_states_5_cast_fp16)[name = string("x_17_cast_fp16")]; + int32 var_409 = const()[name = string("op_409"), val = int32(1)]; + fp16 const_14_promoted_to_fp16 = const()[name = string("const_14_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_412_cast_fp16 = mul(x = x_17_cast_fp16, y = const_14_promoted_to_fp16)[name = string("op_412_cast_fp16")]; + bool x_19_interleave_0 = const()[name = string("x_19_interleave_0"), val = bool(false)]; + tensor x_19_cast_fp16 = concat(axis = var_409, interleave = x_19_interleave_0, values = (x_17_cast_fp16, var_412_cast_fp16))[name = string("x_19_cast_fp16")]; + tensor out_13_axes_0 = const()[name = string("out_13_axes_0"), val = tensor([1])]; + fp16 var_422_to_fp16 = const()[name = string("op_422_to_fp16"), val = fp16(0x1.5p-17)]; + tensor out_13_cast_fp16 = layer_norm(axes = out_13_axes_0, epsilon = var_422_to_fp16, x = x_19_cast_fp16)[name = string("out_13_cast_fp16")]; + tensor layer_layers_1_input_layernorm_weight_to_fp16 = const()[name = string("layer_layers_1_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(38294400)))]; + tensor out_15_cast_fp16 = mul(x = out_13_cast_fp16, y = layer_layers_1_input_layernorm_weight_to_fp16)[name = string("out_15_cast_fp16")]; + tensor var_428_split_sizes_0 = const()[name = string("op_428_split_sizes_0"), val = tensor([1024, 1024])]; + int32 var_428_axis_0 = const()[name = string("op_428_axis_0"), val = int32(1)]; + tensor var_428_cast_fp16_0, tensor var_428_cast_fp16_1 = split(axis = var_428_axis_0, split_sizes = var_428_split_sizes_0, x = out_15_cast_fp16)[name = string("op_428_cast_fp16")]; + string query_states_5_pad_type_0 = const()[name = string("query_states_5_pad_type_0"), val = string("valid")]; + tensor query_states_5_strides_0 = const()[name = string("query_states_5_strides_0"), val = tensor([1, 1])]; + tensor query_states_5_pad_0 = const()[name = string("query_states_5_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor query_states_5_dilations_0 = const()[name = string("query_states_5_dilations_0"), val = tensor([1, 1])]; + int32 query_states_5_groups_0 = const()[name = string("query_states_5_groups_0"), val = int32(1)]; + tensor var_450_to_fp16 = const()[name = string("op_450_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(38298560)))]; + tensor query_states_5_cast_fp16 = conv(dilations = query_states_5_dilations_0, groups = query_states_5_groups_0, pad = query_states_5_pad_0, pad_type = query_states_5_pad_type_0, strides = query_states_5_strides_0, weight = var_450_to_fp16, x = var_428_cast_fp16_0)[name = string("query_states_5_cast_fp16")]; + string key_states_5_pad_type_0 = const()[name = string("key_states_5_pad_type_0"), val = string("valid")]; + tensor key_states_5_strides_0 = const()[name = string("key_states_5_strides_0"), val = tensor([1, 1])]; + tensor key_states_5_pad_0 = const()[name = string("key_states_5_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor key_states_5_dilations_0 = const()[name = string("key_states_5_dilations_0"), val = tensor([1, 1])]; + int32 key_states_5_groups_0 = const()[name = string("key_states_5_groups_0"), val = int32(1)]; + tensor var_461_to_fp16 = const()[name = string("op_461_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(40395776)))]; + tensor key_states_5_cast_fp16 = conv(dilations = key_states_5_dilations_0, groups = key_states_5_groups_0, pad = key_states_5_pad_0, pad_type = key_states_5_pad_type_0, strides = key_states_5_strides_0, weight = var_461_to_fp16, x = var_428_cast_fp16_0)[name = string("key_states_5_cast_fp16")]; + string value_states_5_pad_type_0 = const()[name = string("value_states_5_pad_type_0"), val = string("valid")]; + tensor value_states_5_strides_0 = const()[name = string("value_states_5_strides_0"), val = tensor([1, 1])]; + tensor value_states_5_pad_0 = const()[name = string("value_states_5_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor value_states_5_dilations_0 = const()[name = string("value_states_5_dilations_0"), val = tensor([1, 1])]; + int32 value_states_5_groups_0 = const()[name = string("value_states_5_groups_0"), val = int32(1)]; + tensor var_472_to_fp16 = const()[name = string("op_472_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(40657984)))]; + tensor value_states_5_cast_fp16 = conv(dilations = value_states_5_dilations_0, groups = value_states_5_groups_0, pad = value_states_5_pad_0, pad_type = value_states_5_pad_type_0, strides = value_states_5_strides_0, weight = var_472_to_fp16, x = var_428_cast_fp16_0)[name = string("value_states_5_cast_fp16")]; + tensor var_480 = const()[name = string("op_480"), val = tensor([1, 16, 64, 64])]; + tensor embed_5_cast_fp16 = reshape(shape = var_480, x = query_states_5_cast_fp16)[name = string("embed_5_cast_fp16")]; + tensor var_484 = const()[name = string("op_484"), val = tensor([1, 2, 64, 64])]; + tensor var_485_cast_fp16 = reshape(shape = var_484, x = key_states_5_cast_fp16)[name = string("op_485_cast_fp16")]; + tensor embed_7_perm_0 = const()[name = string("embed_7_perm_0"), val = tensor([0, 1, 3, 2])]; + tensor var_491 = const()[name = string("op_491"), val = tensor([1, 2, 64, 64])]; + tensor var_492_cast_fp16 = reshape(shape = var_491, x = value_states_5_cast_fp16)[name = string("op_492_cast_fp16")]; + tensor value_states_7_perm_0 = const()[name = string("value_states_7_perm_0"), val = tensor([0, 1, 3, 2])]; + tensor var_496_cast_fp16 = mul(x = embed_5_cast_fp16, y = cos_1_cast_fp16)[name = string("op_496_cast_fp16")]; + tensor var_497_split_sizes_0 = const()[name = string("op_497_split_sizes_0"), val = tensor([32, 32])]; + int32 var_497_axis_0 = const()[name = string("op_497_axis_0"), val = int32(-2)]; + tensor var_497_cast_fp16_0, tensor var_497_cast_fp16_1 = split(axis = var_497_axis_0, split_sizes = var_497_split_sizes_0, x = embed_5_cast_fp16)[name = string("op_497_cast_fp16")]; + fp16 const_17_promoted_to_fp16 = const()[name = string("const_17_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_499_cast_fp16 = mul(x = var_497_cast_fp16_1, y = const_17_promoted_to_fp16)[name = string("op_499_cast_fp16")]; + int32 var_501 = const()[name = string("op_501"), val = int32(-2)]; + bool var_502_interleave_0 = const()[name = string("op_502_interleave_0"), val = bool(false)]; + tensor var_502_cast_fp16 = concat(axis = var_501, interleave = var_502_interleave_0, values = (var_499_cast_fp16, var_497_cast_fp16_0))[name = string("op_502_cast_fp16")]; + tensor var_503_cast_fp16 = mul(x = var_502_cast_fp16, y = sin_1_cast_fp16)[name = string("op_503_cast_fp16")]; + tensor query_states_7_cast_fp16 = add(x = var_496_cast_fp16, y = var_503_cast_fp16)[name = string("query_states_7_cast_fp16")]; + tensor embed_7_cast_fp16 = transpose(perm = embed_7_perm_0, x = var_485_cast_fp16)[name = string("transpose_14")]; + tensor var_506_cast_fp16 = mul(x = embed_7_cast_fp16, y = cos_cast_fp16)[name = string("op_506_cast_fp16")]; + tensor var_507_split_sizes_0 = const()[name = string("op_507_split_sizes_0"), val = tensor([32, 32])]; + int32 var_507_axis_0 = const()[name = string("op_507_axis_0"), val = int32(-1)]; + tensor var_507_cast_fp16_0, tensor var_507_cast_fp16_1 = split(axis = var_507_axis_0, split_sizes = var_507_split_sizes_0, x = embed_7_cast_fp16)[name = string("op_507_cast_fp16")]; + fp16 const_18_promoted_to_fp16 = const()[name = string("const_18_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_509_cast_fp16 = mul(x = var_507_cast_fp16_1, y = const_18_promoted_to_fp16)[name = string("op_509_cast_fp16")]; + int32 var_511 = const()[name = string("op_511"), val = int32(-1)]; + bool var_512_interleave_0 = const()[name = string("op_512_interleave_0"), val = bool(false)]; + tensor var_512_cast_fp16 = concat(axis = var_511, interleave = var_512_interleave_0, values = (var_509_cast_fp16, var_507_cast_fp16_0))[name = string("op_512_cast_fp16")]; + tensor var_513_cast_fp16 = mul(x = var_512_cast_fp16, y = sin_cast_fp16)[name = string("op_513_cast_fp16")]; + tensor key_states_7_cast_fp16 = add(x = var_506_cast_fp16, y = var_513_cast_fp16)[name = string("key_states_7_cast_fp16")]; + tensor expand_dims_11 = const()[name = string("expand_dims_11"), val = tensor([1])]; + tensor expand_dims_12 = const()[name = string("expand_dims_12"), val = tensor([0])]; + tensor expand_dims_14 = const()[name = string("expand_dims_14"), val = tensor([2])]; + tensor concat_11_values3_0 = const()[name = string("concat_11_values3_0"), val = tensor([0])]; + int32 concat_11_axis_0 = const()[name = string("concat_11_axis_0"), val = int32(0)]; + bool concat_11_interleave_0 = const()[name = string("concat_11_interleave_0"), val = bool(false)]; + tensor concat_11 = concat(axis = concat_11_axis_0, interleave = concat_11_interleave_0, values = (expand_dims_11, expand_dims_12, position_id, concat_11_values3_0))[name = string("concat_11")]; + tensor concat_12_values1_0 = const()[name = string("concat_12_values1_0"), val = tensor([0])]; + tensor concat_12_values3_0 = const()[name = string("concat_12_values3_0"), val = tensor([0])]; + int32 concat_12_axis_0 = const()[name = string("concat_12_axis_0"), val = int32(0)]; + bool concat_12_interleave_0 = const()[name = string("concat_12_interleave_0"), val = bool(false)]; + tensor concat_12 = concat(axis = concat_12_axis_0, interleave = concat_12_interleave_0, values = (expand_dims_14, concat_12_values1_0, var_246, concat_12_values3_0))[name = string("concat_12")]; + tensor key_cache_internal_tensor_assign_2_stride_0 = const()[name = string("key_cache_internal_tensor_assign_2_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor key_cache_internal_tensor_assign_2_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_2_begin_mask_0"), val = tensor([false, false, false, true])]; + tensor key_cache_internal_tensor_assign_2_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_2_end_mask_0"), val = tensor([false, true, false, true])]; + tensor key_cache_internal_tensor_assign_2_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_2_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor key_cache_internal_tensor_assign_2_cast_fp16 = slice_update(begin = concat_11, begin_mask = key_cache_internal_tensor_assign_2_begin_mask_0, end = concat_12, end_mask = key_cache_internal_tensor_assign_2_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_2_squeeze_mask_0, stride = key_cache_internal_tensor_assign_2_stride_0, update = key_states_7_cast_fp16, x = coreml_update_state_12)[name = string("key_cache_internal_tensor_assign_2_cast_fp16")]; + write_state(data = key_cache_internal_tensor_assign_2_cast_fp16, input = key_cache)[name = string("coreml_update_state_62_write_state")]; + tensor coreml_update_state_14 = read_state(input = key_cache)[name = string("coreml_update_state_62")]; + tensor value_cache_internal_tensor_assign_2_stride_0 = const()[name = string("value_cache_internal_tensor_assign_2_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor value_cache_internal_tensor_assign_2_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_2_begin_mask_0"), val = tensor([false, false, false, true])]; + tensor value_cache_internal_tensor_assign_2_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_2_end_mask_0"), val = tensor([false, true, false, true])]; + tensor value_cache_internal_tensor_assign_2_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_2_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor value_states_7_cast_fp16 = transpose(perm = value_states_7_perm_0, x = var_492_cast_fp16)[name = string("transpose_13")]; + tensor value_cache_internal_tensor_assign_2_cast_fp16 = slice_update(begin = concat_11, begin_mask = value_cache_internal_tensor_assign_2_begin_mask_0, end = concat_12, end_mask = value_cache_internal_tensor_assign_2_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_2_squeeze_mask_0, stride = value_cache_internal_tensor_assign_2_stride_0, update = value_states_7_cast_fp16, x = coreml_update_state_13)[name = string("value_cache_internal_tensor_assign_2_cast_fp16")]; + write_state(data = value_cache_internal_tensor_assign_2_cast_fp16, input = value_cache)[name = string("coreml_update_state_63_write_state")]; + tensor coreml_update_state_15 = read_state(input = value_cache)[name = string("coreml_update_state_63")]; + tensor var_556_begin_0 = const()[name = string("op_556_begin_0"), val = tensor([1, 0, 0, 0])]; + tensor var_556_end_0 = const()[name = string("op_556_end_0"), val = tensor([2, 2, 2048, 64])]; + tensor var_556_end_mask_0 = const()[name = string("op_556_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_556_cast_fp16 = slice_by_index(begin = var_556_begin_0, end = var_556_end_0, end_mask = var_556_end_mask_0, x = coreml_update_state_14)[name = string("op_556_cast_fp16")]; + tensor tile_2 = const()[name = string("tile_2"), val = tensor([1, 1])]; + int32 var_559_axis_0 = const()[name = string("op_559_axis_0"), val = int32(1)]; + tensor var_559_cast_fp16_0, tensor var_559_cast_fp16_1 = split(axis = var_559_axis_0, split_sizes = tile_2, x = var_556_cast_fp16)[name = string("op_559_cast_fp16")]; + tensor var_566_begin_0 = const()[name = string("op_566_begin_0"), val = tensor([1, 0, 0, 0])]; + tensor var_566_end_0 = const()[name = string("op_566_end_0"), val = tensor([2, 2, 2048, 64])]; + tensor var_566_end_mask_0 = const()[name = string("op_566_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_566_cast_fp16 = slice_by_index(begin = var_566_begin_0, end = var_566_end_0, end_mask = var_566_end_mask_0, x = coreml_update_state_15)[name = string("op_566_cast_fp16")]; + tensor tile_3 = const()[name = string("tile_3"), val = tensor([1, 1])]; + int32 var_569_axis_0 = const()[name = string("op_569_axis_0"), val = int32(1)]; + tensor var_569_cast_fp16_0, tensor var_569_cast_fp16_1 = split(axis = var_569_axis_0, split_sizes = tile_3, x = var_566_cast_fp16)[name = string("op_569_cast_fp16")]; + tensor var_572_split_sizes_0 = const()[name = string("op_572_split_sizes_0"), val = tensor([8, 8])]; + int32 var_572_axis_0 = const()[name = string("op_572_axis_0"), val = int32(1)]; + tensor var_572_cast_fp16_0, tensor var_572_cast_fp16_1 = split(axis = var_572_axis_0, split_sizes = var_572_split_sizes_0, x = query_states_7_cast_fp16)[name = string("op_572_cast_fp16")]; + bool attn_weights_17_transpose_x_0 = const()[name = string("attn_weights_17_transpose_x_0"), val = bool(false)]; + bool attn_weights_17_transpose_y_0 = const()[name = string("attn_weights_17_transpose_y_0"), val = bool(false)]; + tensor attn_weights_17_cast_fp16 = matmul(transpose_x = attn_weights_17_transpose_x_0, transpose_y = attn_weights_17_transpose_y_0, x = var_559_cast_fp16_0, y = var_572_cast_fp16_0)[name = string("attn_weights_17_cast_fp16")]; + fp16 _inversed_attn_weights_19_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_19_y_0_to_fp16"), val = fp16(0x1p-3)]; + tensor _inversed_attn_weights_19_cast_fp16 = mul(x = attn_weights_17_cast_fp16, y = _inversed_attn_weights_19_y_0_to_fp16)[name = string("_inversed_attn_weights_19_cast_fp16")]; + tensor attn_weights_21_cast_fp16 = add(x = _inversed_attn_weights_19_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_21_cast_fp16")]; + int32 var_579 = const()[name = string("op_579"), val = int32(2)]; + tensor attn_weights_23_cast_fp16 = softmax(axis = var_579, x = attn_weights_21_cast_fp16)[name = string("attn_weights_23_cast_fp16")]; + bool var_585_transpose_x_1 = const()[name = string("op_585_transpose_x_1"), val = bool(true)]; + bool var_585_transpose_y_1 = const()[name = string("op_585_transpose_y_1"), val = bool(false)]; + tensor var_585_cast_fp16 = matmul(transpose_x = var_585_transpose_x_1, transpose_y = var_585_transpose_y_1, x = attn_weights_23_cast_fp16, y = var_569_cast_fp16_0)[name = string("op_585_cast_fp16")]; + bool attn_weights_25_transpose_x_0 = const()[name = string("attn_weights_25_transpose_x_0"), val = bool(false)]; + bool attn_weights_25_transpose_y_0 = const()[name = string("attn_weights_25_transpose_y_0"), val = bool(false)]; + tensor attn_weights_25_cast_fp16 = matmul(transpose_x = attn_weights_25_transpose_x_0, transpose_y = attn_weights_25_transpose_y_0, x = var_559_cast_fp16_1, y = var_572_cast_fp16_1)[name = string("attn_weights_25_cast_fp16")]; + fp16 _inversed_attn_weights_27_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_27_y_0_to_fp16"), val = fp16(0x1p-3)]; + tensor _inversed_attn_weights_27_cast_fp16 = mul(x = attn_weights_25_cast_fp16, y = _inversed_attn_weights_27_y_0_to_fp16)[name = string("_inversed_attn_weights_27_cast_fp16")]; + tensor attn_weights_29_cast_fp16 = add(x = _inversed_attn_weights_27_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_29_cast_fp16")]; + int32 var_591 = const()[name = string("op_591"), val = int32(2)]; + tensor attn_weights_31_cast_fp16 = softmax(axis = var_591, x = attn_weights_29_cast_fp16)[name = string("attn_weights_31_cast_fp16")]; + bool attn_output_7_transpose_x_1 = const()[name = string("attn_output_7_transpose_x_1"), val = bool(true)]; + bool attn_output_7_transpose_y_1 = const()[name = string("attn_output_7_transpose_y_1"), val = bool(false)]; + tensor attn_output_7_cast_fp16 = matmul(transpose_x = attn_output_7_transpose_x_1, transpose_y = attn_output_7_transpose_y_1, x = attn_weights_31_cast_fp16, y = var_569_cast_fp16_1)[name = string("attn_output_7_cast_fp16")]; + int32 var_599 = const()[name = string("op_599"), val = int32(1)]; + bool attn_output_9_interleave_0 = const()[name = string("attn_output_9_interleave_0"), val = bool(false)]; + tensor attn_output_9_cast_fp16 = concat(axis = var_599, interleave = attn_output_9_interleave_0, values = (var_585_cast_fp16, attn_output_7_cast_fp16))[name = string("attn_output_9_cast_fp16")]; + tensor var_603_perm_0 = const()[name = string("op_603_perm_0"), val = tensor([0, 1, 3, 2])]; + tensor var_608 = const()[name = string("op_608"), val = tensor([1, 1024, 1, 64])]; + tensor var_603_cast_fp16 = transpose(perm = var_603_perm_0, x = attn_output_9_cast_fp16)[name = string("transpose_12")]; + tensor x_23_cast_fp16 = reshape(shape = var_608, x = var_603_cast_fp16)[name = string("x_23_cast_fp16")]; + string hidden_states_9_pad_type_0 = const()[name = string("hidden_states_9_pad_type_0"), val = string("valid")]; + tensor hidden_states_9_strides_0 = const()[name = string("hidden_states_9_strides_0"), val = tensor([1, 1])]; + tensor hidden_states_9_pad_0 = const()[name = string("hidden_states_9_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor hidden_states_9_dilations_0 = const()[name = string("hidden_states_9_dilations_0"), val = tensor([1, 1])]; + int32 hidden_states_9_groups_0 = const()[name = string("hidden_states_9_groups_0"), val = int32(1)]; + tensor var_615_to_fp16 = const()[name = string("op_615_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(40920192)))]; + tensor hidden_states_9_cast_fp16 = conv(dilations = hidden_states_9_dilations_0, groups = hidden_states_9_groups_0, pad = hidden_states_9_pad_0, pad_type = hidden_states_9_pad_type_0, strides = hidden_states_9_strides_0, weight = var_615_to_fp16, x = x_23_cast_fp16)[name = string("hidden_states_9_cast_fp16")]; + tensor x_25_cast_fp16 = add(x = x_17_cast_fp16, y = hidden_states_9_cast_fp16)[name = string("x_25_cast_fp16")]; + int32 var_627 = const()[name = string("op_627"), val = int32(1)]; + fp16 const_23_promoted_to_fp16 = const()[name = string("const_23_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_630_cast_fp16 = mul(x = x_25_cast_fp16, y = const_23_promoted_to_fp16)[name = string("op_630_cast_fp16")]; + bool x_27_interleave_0 = const()[name = string("x_27_interleave_0"), val = bool(false)]; + tensor x_27_cast_fp16 = concat(axis = var_627, interleave = x_27_interleave_0, values = (x_25_cast_fp16, var_630_cast_fp16))[name = string("x_27_cast_fp16")]; + tensor out_19_axes_0 = const()[name = string("out_19_axes_0"), val = tensor([1])]; + fp16 var_640_to_fp16 = const()[name = string("op_640_to_fp16"), val = fp16(0x1.5p-17)]; + tensor out_19_cast_fp16 = layer_norm(axes = out_19_axes_0, epsilon = var_640_to_fp16, x = x_27_cast_fp16)[name = string("out_19_cast_fp16")]; + tensor layer_layers_1_post_attention_layernorm_weight_to_fp16 = const()[name = string("layer_layers_1_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(43017408)))]; + tensor out_21_cast_fp16 = mul(x = out_19_cast_fp16, y = layer_layers_1_post_attention_layernorm_weight_to_fp16)[name = string("out_21_cast_fp16")]; + tensor var_646_split_sizes_0 = const()[name = string("op_646_split_sizes_0"), val = tensor([1024, 1024])]; + int32 var_646_axis_0 = const()[name = string("op_646_axis_0"), val = int32(1)]; + tensor var_646_cast_fp16_0, tensor var_646_cast_fp16_1 = split(axis = var_646_axis_0, split_sizes = var_646_split_sizes_0, x = out_21_cast_fp16)[name = string("op_646_cast_fp16")]; + string input_3_pad_type_0 = const()[name = string("input_3_pad_type_0"), val = string("valid")]; + tensor input_3_strides_0 = const()[name = string("input_3_strides_0"), val = tensor([1, 1])]; + tensor input_3_pad_0 = const()[name = string("input_3_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor input_3_dilations_0 = const()[name = string("input_3_dilations_0"), val = tensor([1, 1])]; + int32 input_3_groups_0 = const()[name = string("input_3_groups_0"), val = int32(1)]; + tensor var_651_to_fp16 = const()[name = string("op_651_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(43021568)))]; + tensor input_3_cast_fp16 = conv(dilations = input_3_dilations_0, groups = input_3_groups_0, pad = input_3_pad_0, pad_type = input_3_pad_type_0, strides = input_3_strides_0, weight = var_651_to_fp16, x = var_646_cast_fp16_0)[name = string("input_3_cast_fp16")]; + tensor var_662_cast_fp16 = silu(x = input_3_cast_fp16)[name = string("op_662_cast_fp16")]; + string var_667_pad_type_0 = const()[name = string("op_667_pad_type_0"), val = string("valid")]; + tensor var_667_strides_0 = const()[name = string("op_667_strides_0"), val = tensor([1, 1])]; + tensor var_667_pad_0 = const()[name = string("op_667_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_667_dilations_0 = const()[name = string("op_667_dilations_0"), val = tensor([1, 1])]; + int32 var_667_groups_0 = const()[name = string("op_667_groups_0"), val = int32(1)]; + tensor var_650_to_fp16 = const()[name = string("op_650_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(51410240)))]; + tensor var_667_cast_fp16 = conv(dilations = var_667_dilations_0, groups = var_667_groups_0, pad = var_667_pad_0, pad_type = var_667_pad_type_0, strides = var_667_strides_0, weight = var_650_to_fp16, x = var_646_cast_fp16_0)[name = string("op_667_cast_fp16")]; + tensor x_33_cast_fp16 = mul(x = var_662_cast_fp16, y = var_667_cast_fp16)[name = string("x_33_cast_fp16")]; + string hidden_states_11_pad_type_0 = const()[name = string("hidden_states_11_pad_type_0"), val = string("valid")]; + tensor hidden_states_11_strides_0 = const()[name = string("hidden_states_11_strides_0"), val = tensor([1, 1])]; + tensor hidden_states_11_pad_0 = const()[name = string("hidden_states_11_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor hidden_states_11_dilations_0 = const()[name = string("hidden_states_11_dilations_0"), val = tensor([1, 1])]; + int32 hidden_states_11_groups_0 = const()[name = string("hidden_states_11_groups_0"), val = int32(1)]; + tensor var_649_to_fp16 = const()[name = string("op_649_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(59798912)))]; + tensor hidden_states_11_cast_fp16 = conv(dilations = hidden_states_11_dilations_0, groups = hidden_states_11_groups_0, pad = hidden_states_11_pad_0, pad_type = hidden_states_11_pad_type_0, strides = hidden_states_11_strides_0, weight = var_649_to_fp16, x = x_33_cast_fp16)[name = string("hidden_states_11_cast_fp16")]; + tensor x_35_cast_fp16 = add(x = x_25_cast_fp16, y = hidden_states_11_cast_fp16)[name = string("x_35_cast_fp16")]; + int32 var_680 = const()[name = string("op_680"), val = int32(1)]; + fp16 const_24_promoted_to_fp16 = const()[name = string("const_24_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_683_cast_fp16 = mul(x = x_35_cast_fp16, y = const_24_promoted_to_fp16)[name = string("op_683_cast_fp16")]; + bool x_37_interleave_0 = const()[name = string("x_37_interleave_0"), val = bool(false)]; + tensor x_37_cast_fp16 = concat(axis = var_680, interleave = x_37_interleave_0, values = (x_35_cast_fp16, var_683_cast_fp16))[name = string("x_37_cast_fp16")]; + tensor out_25_axes_0 = const()[name = string("out_25_axes_0"), val = tensor([1])]; + fp16 var_693_to_fp16 = const()[name = string("op_693_to_fp16"), val = fp16(0x1.5p-17)]; + tensor out_25_cast_fp16 = layer_norm(axes = out_25_axes_0, epsilon = var_693_to_fp16, x = x_37_cast_fp16)[name = string("out_25_cast_fp16")]; + tensor layer_layers_2_input_layernorm_weight_to_fp16 = const()[name = string("layer_layers_2_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(68187584)))]; + tensor out_27_cast_fp16 = mul(x = out_25_cast_fp16, y = layer_layers_2_input_layernorm_weight_to_fp16)[name = string("out_27_cast_fp16")]; + tensor var_699_split_sizes_0 = const()[name = string("op_699_split_sizes_0"), val = tensor([1024, 1024])]; + int32 var_699_axis_0 = const()[name = string("op_699_axis_0"), val = int32(1)]; + tensor var_699_cast_fp16_0, tensor var_699_cast_fp16_1 = split(axis = var_699_axis_0, split_sizes = var_699_split_sizes_0, x = out_27_cast_fp16)[name = string("op_699_cast_fp16")]; + string query_states_9_pad_type_0 = const()[name = string("query_states_9_pad_type_0"), val = string("valid")]; + tensor query_states_9_strides_0 = const()[name = string("query_states_9_strides_0"), val = tensor([1, 1])]; + tensor query_states_9_pad_0 = const()[name = string("query_states_9_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor query_states_9_dilations_0 = const()[name = string("query_states_9_dilations_0"), val = tensor([1, 1])]; + int32 query_states_9_groups_0 = const()[name = string("query_states_9_groups_0"), val = int32(1)]; + tensor var_721_to_fp16 = const()[name = string("op_721_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(68191744)))]; + tensor query_states_9_cast_fp16 = conv(dilations = query_states_9_dilations_0, groups = query_states_9_groups_0, pad = query_states_9_pad_0, pad_type = query_states_9_pad_type_0, strides = query_states_9_strides_0, weight = var_721_to_fp16, x = var_699_cast_fp16_0)[name = string("query_states_9_cast_fp16")]; + string key_states_9_pad_type_0 = const()[name = string("key_states_9_pad_type_0"), val = string("valid")]; + tensor key_states_9_strides_0 = const()[name = string("key_states_9_strides_0"), val = tensor([1, 1])]; + tensor key_states_9_pad_0 = const()[name = string("key_states_9_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor key_states_9_dilations_0 = const()[name = string("key_states_9_dilations_0"), val = tensor([1, 1])]; + int32 key_states_9_groups_0 = const()[name = string("key_states_9_groups_0"), val = int32(1)]; + tensor var_732_to_fp16 = const()[name = string("op_732_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(70288960)))]; + tensor key_states_9_cast_fp16 = conv(dilations = key_states_9_dilations_0, groups = key_states_9_groups_0, pad = key_states_9_pad_0, pad_type = key_states_9_pad_type_0, strides = key_states_9_strides_0, weight = var_732_to_fp16, x = var_699_cast_fp16_0)[name = string("key_states_9_cast_fp16")]; + string value_states_9_pad_type_0 = const()[name = string("value_states_9_pad_type_0"), val = string("valid")]; + tensor value_states_9_strides_0 = const()[name = string("value_states_9_strides_0"), val = tensor([1, 1])]; + tensor value_states_9_pad_0 = const()[name = string("value_states_9_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor value_states_9_dilations_0 = const()[name = string("value_states_9_dilations_0"), val = tensor([1, 1])]; + int32 value_states_9_groups_0 = const()[name = string("value_states_9_groups_0"), val = int32(1)]; + tensor var_743_to_fp16 = const()[name = string("op_743_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(70551168)))]; + tensor value_states_9_cast_fp16 = conv(dilations = value_states_9_dilations_0, groups = value_states_9_groups_0, pad = value_states_9_pad_0, pad_type = value_states_9_pad_type_0, strides = value_states_9_strides_0, weight = var_743_to_fp16, x = var_699_cast_fp16_0)[name = string("value_states_9_cast_fp16")]; + tensor var_751 = const()[name = string("op_751"), val = tensor([1, 16, 64, 64])]; + tensor embed_9_cast_fp16 = reshape(shape = var_751, x = query_states_9_cast_fp16)[name = string("embed_9_cast_fp16")]; + tensor var_755 = const()[name = string("op_755"), val = tensor([1, 2, 64, 64])]; + tensor var_756_cast_fp16 = reshape(shape = var_755, x = key_states_9_cast_fp16)[name = string("op_756_cast_fp16")]; + tensor embed_11_perm_0 = const()[name = string("embed_11_perm_0"), val = tensor([0, 1, 3, 2])]; + tensor var_762 = const()[name = string("op_762"), val = tensor([1, 2, 64, 64])]; + tensor var_763_cast_fp16 = reshape(shape = var_762, x = value_states_9_cast_fp16)[name = string("op_763_cast_fp16")]; + tensor value_states_11_perm_0 = const()[name = string("value_states_11_perm_0"), val = tensor([0, 1, 3, 2])]; + tensor var_767_cast_fp16 = mul(x = embed_9_cast_fp16, y = cos_1_cast_fp16)[name = string("op_767_cast_fp16")]; + tensor var_768_split_sizes_0 = const()[name = string("op_768_split_sizes_0"), val = tensor([32, 32])]; + int32 var_768_axis_0 = const()[name = string("op_768_axis_0"), val = int32(-2)]; + tensor var_768_cast_fp16_0, tensor var_768_cast_fp16_1 = split(axis = var_768_axis_0, split_sizes = var_768_split_sizes_0, x = embed_9_cast_fp16)[name = string("op_768_cast_fp16")]; + fp16 const_27_promoted_to_fp16 = const()[name = string("const_27_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_770_cast_fp16 = mul(x = var_768_cast_fp16_1, y = const_27_promoted_to_fp16)[name = string("op_770_cast_fp16")]; + int32 var_772 = const()[name = string("op_772"), val = int32(-2)]; + bool var_773_interleave_0 = const()[name = string("op_773_interleave_0"), val = bool(false)]; + tensor var_773_cast_fp16 = concat(axis = var_772, interleave = var_773_interleave_0, values = (var_770_cast_fp16, var_768_cast_fp16_0))[name = string("op_773_cast_fp16")]; + tensor var_774_cast_fp16 = mul(x = var_773_cast_fp16, y = sin_1_cast_fp16)[name = string("op_774_cast_fp16")]; + tensor query_states_11_cast_fp16 = add(x = var_767_cast_fp16, y = var_774_cast_fp16)[name = string("query_states_11_cast_fp16")]; + tensor embed_11_cast_fp16 = transpose(perm = embed_11_perm_0, x = var_756_cast_fp16)[name = string("transpose_11")]; + tensor var_777_cast_fp16 = mul(x = embed_11_cast_fp16, y = cos_cast_fp16)[name = string("op_777_cast_fp16")]; + tensor var_778_split_sizes_0 = const()[name = string("op_778_split_sizes_0"), val = tensor([32, 32])]; + int32 var_778_axis_0 = const()[name = string("op_778_axis_0"), val = int32(-1)]; + tensor var_778_cast_fp16_0, tensor var_778_cast_fp16_1 = split(axis = var_778_axis_0, split_sizes = var_778_split_sizes_0, x = embed_11_cast_fp16)[name = string("op_778_cast_fp16")]; + fp16 const_28_promoted_to_fp16 = const()[name = string("const_28_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_780_cast_fp16 = mul(x = var_778_cast_fp16_1, y = const_28_promoted_to_fp16)[name = string("op_780_cast_fp16")]; + int32 var_782 = const()[name = string("op_782"), val = int32(-1)]; + bool var_783_interleave_0 = const()[name = string("op_783_interleave_0"), val = bool(false)]; + tensor var_783_cast_fp16 = concat(axis = var_782, interleave = var_783_interleave_0, values = (var_780_cast_fp16, var_778_cast_fp16_0))[name = string("op_783_cast_fp16")]; + tensor var_784_cast_fp16 = mul(x = var_783_cast_fp16, y = sin_cast_fp16)[name = string("op_784_cast_fp16")]; + tensor key_states_11_cast_fp16 = add(x = var_777_cast_fp16, y = var_784_cast_fp16)[name = string("key_states_11_cast_fp16")]; + tensor expand_dims_21 = const()[name = string("expand_dims_21"), val = tensor([2])]; + tensor expand_dims_22 = const()[name = string("expand_dims_22"), val = tensor([0])]; + tensor expand_dims_24 = const()[name = string("expand_dims_24"), val = tensor([3])]; + tensor concat_19_values3_0 = const()[name = string("concat_19_values3_0"), val = tensor([0])]; + int32 concat_19_axis_0 = const()[name = string("concat_19_axis_0"), val = int32(0)]; + bool concat_19_interleave_0 = const()[name = string("concat_19_interleave_0"), val = bool(false)]; + tensor concat_19 = concat(axis = concat_19_axis_0, interleave = concat_19_interleave_0, values = (expand_dims_21, expand_dims_22, position_id, concat_19_values3_0))[name = string("concat_19")]; + tensor concat_20_values1_0 = const()[name = string("concat_20_values1_0"), val = tensor([0])]; + tensor concat_20_values3_0 = const()[name = string("concat_20_values3_0"), val = tensor([0])]; + int32 concat_20_axis_0 = const()[name = string("concat_20_axis_0"), val = int32(0)]; + bool concat_20_interleave_0 = const()[name = string("concat_20_interleave_0"), val = bool(false)]; + tensor concat_20 = concat(axis = concat_20_axis_0, interleave = concat_20_interleave_0, values = (expand_dims_24, concat_20_values1_0, var_246, concat_20_values3_0))[name = string("concat_20")]; + tensor key_cache_internal_tensor_assign_3_stride_0 = const()[name = string("key_cache_internal_tensor_assign_3_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor key_cache_internal_tensor_assign_3_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_3_begin_mask_0"), val = tensor([false, false, false, true])]; + tensor key_cache_internal_tensor_assign_3_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_3_end_mask_0"), val = tensor([false, true, false, true])]; + tensor key_cache_internal_tensor_assign_3_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_3_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor key_cache_internal_tensor_assign_3_cast_fp16 = slice_update(begin = concat_19, begin_mask = key_cache_internal_tensor_assign_3_begin_mask_0, end = concat_20, end_mask = key_cache_internal_tensor_assign_3_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_3_squeeze_mask_0, stride = key_cache_internal_tensor_assign_3_stride_0, update = key_states_11_cast_fp16, x = coreml_update_state_14)[name = string("key_cache_internal_tensor_assign_3_cast_fp16")]; + write_state(data = key_cache_internal_tensor_assign_3_cast_fp16, input = key_cache)[name = string("coreml_update_state_64_write_state")]; + tensor coreml_update_state_16 = read_state(input = key_cache)[name = string("coreml_update_state_64")]; + tensor value_cache_internal_tensor_assign_3_stride_0 = const()[name = string("value_cache_internal_tensor_assign_3_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor value_cache_internal_tensor_assign_3_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_3_begin_mask_0"), val = tensor([false, false, false, true])]; + tensor value_cache_internal_tensor_assign_3_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_3_end_mask_0"), val = tensor([false, true, false, true])]; + tensor value_cache_internal_tensor_assign_3_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_3_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor value_states_11_cast_fp16 = transpose(perm = value_states_11_perm_0, x = var_763_cast_fp16)[name = string("transpose_10")]; + tensor value_cache_internal_tensor_assign_3_cast_fp16 = slice_update(begin = concat_19, begin_mask = value_cache_internal_tensor_assign_3_begin_mask_0, end = concat_20, end_mask = value_cache_internal_tensor_assign_3_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_3_squeeze_mask_0, stride = value_cache_internal_tensor_assign_3_stride_0, update = value_states_11_cast_fp16, x = coreml_update_state_15)[name = string("value_cache_internal_tensor_assign_3_cast_fp16")]; + write_state(data = value_cache_internal_tensor_assign_3_cast_fp16, input = value_cache)[name = string("coreml_update_state_65_write_state")]; + tensor coreml_update_state_17 = read_state(input = value_cache)[name = string("coreml_update_state_65")]; + tensor var_827_begin_0 = const()[name = string("op_827_begin_0"), val = tensor([2, 0, 0, 0])]; + tensor var_827_end_0 = const()[name = string("op_827_end_0"), val = tensor([3, 2, 2048, 64])]; + tensor var_827_end_mask_0 = const()[name = string("op_827_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_827_cast_fp16 = slice_by_index(begin = var_827_begin_0, end = var_827_end_0, end_mask = var_827_end_mask_0, x = coreml_update_state_16)[name = string("op_827_cast_fp16")]; + tensor tile_4 = const()[name = string("tile_4"), val = tensor([1, 1])]; + int32 var_830_axis_0 = const()[name = string("op_830_axis_0"), val = int32(1)]; + tensor var_830_cast_fp16_0, tensor var_830_cast_fp16_1 = split(axis = var_830_axis_0, split_sizes = tile_4, x = var_827_cast_fp16)[name = string("op_830_cast_fp16")]; + tensor var_837_begin_0 = const()[name = string("op_837_begin_0"), val = tensor([2, 0, 0, 0])]; + tensor var_837_end_0 = const()[name = string("op_837_end_0"), val = tensor([3, 2, 2048, 64])]; + tensor var_837_end_mask_0 = const()[name = string("op_837_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_837_cast_fp16 = slice_by_index(begin = var_837_begin_0, end = var_837_end_0, end_mask = var_837_end_mask_0, x = coreml_update_state_17)[name = string("op_837_cast_fp16")]; + tensor tile_5 = const()[name = string("tile_5"), val = tensor([1, 1])]; + int32 var_840_axis_0 = const()[name = string("op_840_axis_0"), val = int32(1)]; + tensor var_840_cast_fp16_0, tensor var_840_cast_fp16_1 = split(axis = var_840_axis_0, split_sizes = tile_5, x = var_837_cast_fp16)[name = string("op_840_cast_fp16")]; + tensor var_843_split_sizes_0 = const()[name = string("op_843_split_sizes_0"), val = tensor([8, 8])]; + int32 var_843_axis_0 = const()[name = string("op_843_axis_0"), val = int32(1)]; + tensor var_843_cast_fp16_0, tensor var_843_cast_fp16_1 = split(axis = var_843_axis_0, split_sizes = var_843_split_sizes_0, x = query_states_11_cast_fp16)[name = string("op_843_cast_fp16")]; + bool attn_weights_33_transpose_x_0 = const()[name = string("attn_weights_33_transpose_x_0"), val = bool(false)]; + bool attn_weights_33_transpose_y_0 = const()[name = string("attn_weights_33_transpose_y_0"), val = bool(false)]; + tensor attn_weights_33_cast_fp16 = matmul(transpose_x = attn_weights_33_transpose_x_0, transpose_y = attn_weights_33_transpose_y_0, x = var_830_cast_fp16_0, y = var_843_cast_fp16_0)[name = string("attn_weights_33_cast_fp16")]; + fp16 _inversed_attn_weights_35_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_35_y_0_to_fp16"), val = fp16(0x1p-3)]; + tensor _inversed_attn_weights_35_cast_fp16 = mul(x = attn_weights_33_cast_fp16, y = _inversed_attn_weights_35_y_0_to_fp16)[name = string("_inversed_attn_weights_35_cast_fp16")]; + tensor attn_weights_37_cast_fp16 = add(x = _inversed_attn_weights_35_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_37_cast_fp16")]; + int32 var_850 = const()[name = string("op_850"), val = int32(2)]; + tensor attn_weights_39_cast_fp16 = softmax(axis = var_850, x = attn_weights_37_cast_fp16)[name = string("attn_weights_39_cast_fp16")]; + bool var_856_transpose_x_1 = const()[name = string("op_856_transpose_x_1"), val = bool(true)]; + bool var_856_transpose_y_1 = const()[name = string("op_856_transpose_y_1"), val = bool(false)]; + tensor var_856_cast_fp16 = matmul(transpose_x = var_856_transpose_x_1, transpose_y = var_856_transpose_y_1, x = attn_weights_39_cast_fp16, y = var_840_cast_fp16_0)[name = string("op_856_cast_fp16")]; + bool attn_weights_41_transpose_x_0 = const()[name = string("attn_weights_41_transpose_x_0"), val = bool(false)]; + bool attn_weights_41_transpose_y_0 = const()[name = string("attn_weights_41_transpose_y_0"), val = bool(false)]; + tensor attn_weights_41_cast_fp16 = matmul(transpose_x = attn_weights_41_transpose_x_0, transpose_y = attn_weights_41_transpose_y_0, x = var_830_cast_fp16_1, y = var_843_cast_fp16_1)[name = string("attn_weights_41_cast_fp16")]; + fp16 _inversed_attn_weights_43_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_43_y_0_to_fp16"), val = fp16(0x1p-3)]; + tensor _inversed_attn_weights_43_cast_fp16 = mul(x = attn_weights_41_cast_fp16, y = _inversed_attn_weights_43_y_0_to_fp16)[name = string("_inversed_attn_weights_43_cast_fp16")]; + tensor attn_weights_45_cast_fp16 = add(x = _inversed_attn_weights_43_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_45_cast_fp16")]; + int32 var_862 = const()[name = string("op_862"), val = int32(2)]; + tensor attn_weights_47_cast_fp16 = softmax(axis = var_862, x = attn_weights_45_cast_fp16)[name = string("attn_weights_47_cast_fp16")]; + bool attn_output_13_transpose_x_1 = const()[name = string("attn_output_13_transpose_x_1"), val = bool(true)]; + bool attn_output_13_transpose_y_1 = const()[name = string("attn_output_13_transpose_y_1"), val = bool(false)]; + tensor attn_output_13_cast_fp16 = matmul(transpose_x = attn_output_13_transpose_x_1, transpose_y = attn_output_13_transpose_y_1, x = attn_weights_47_cast_fp16, y = var_840_cast_fp16_1)[name = string("attn_output_13_cast_fp16")]; + int32 var_870 = const()[name = string("op_870"), val = int32(1)]; + bool attn_output_15_interleave_0 = const()[name = string("attn_output_15_interleave_0"), val = bool(false)]; + tensor attn_output_15_cast_fp16 = concat(axis = var_870, interleave = attn_output_15_interleave_0, values = (var_856_cast_fp16, attn_output_13_cast_fp16))[name = string("attn_output_15_cast_fp16")]; + tensor var_874_perm_0 = const()[name = string("op_874_perm_0"), val = tensor([0, 1, 3, 2])]; + tensor var_879 = const()[name = string("op_879"), val = tensor([1, 1024, 1, 64])]; + tensor var_874_cast_fp16 = transpose(perm = var_874_perm_0, x = attn_output_15_cast_fp16)[name = string("transpose_9")]; + tensor x_41_cast_fp16 = reshape(shape = var_879, x = var_874_cast_fp16)[name = string("x_41_cast_fp16")]; + string hidden_states_15_pad_type_0 = const()[name = string("hidden_states_15_pad_type_0"), val = string("valid")]; + tensor hidden_states_15_strides_0 = const()[name = string("hidden_states_15_strides_0"), val = tensor([1, 1])]; + tensor hidden_states_15_pad_0 = const()[name = string("hidden_states_15_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor hidden_states_15_dilations_0 = const()[name = string("hidden_states_15_dilations_0"), val = tensor([1, 1])]; + int32 hidden_states_15_groups_0 = const()[name = string("hidden_states_15_groups_0"), val = int32(1)]; + tensor var_886_to_fp16 = const()[name = string("op_886_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(70813376)))]; + tensor hidden_states_15_cast_fp16 = conv(dilations = hidden_states_15_dilations_0, groups = hidden_states_15_groups_0, pad = hidden_states_15_pad_0, pad_type = hidden_states_15_pad_type_0, strides = hidden_states_15_strides_0, weight = var_886_to_fp16, x = x_41_cast_fp16)[name = string("hidden_states_15_cast_fp16")]; + tensor x_43_cast_fp16 = add(x = x_35_cast_fp16, y = hidden_states_15_cast_fp16)[name = string("x_43_cast_fp16")]; + int32 var_898 = const()[name = string("op_898"), val = int32(1)]; + fp16 const_33_promoted_to_fp16 = const()[name = string("const_33_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_901_cast_fp16 = mul(x = x_43_cast_fp16, y = const_33_promoted_to_fp16)[name = string("op_901_cast_fp16")]; + bool x_45_interleave_0 = const()[name = string("x_45_interleave_0"), val = bool(false)]; + tensor x_45_cast_fp16 = concat(axis = var_898, interleave = x_45_interleave_0, values = (x_43_cast_fp16, var_901_cast_fp16))[name = string("x_45_cast_fp16")]; + tensor out_31_axes_0 = const()[name = string("out_31_axes_0"), val = tensor([1])]; + fp16 var_911_to_fp16 = const()[name = string("op_911_to_fp16"), val = fp16(0x1.5p-17)]; + tensor out_31_cast_fp16 = layer_norm(axes = out_31_axes_0, epsilon = var_911_to_fp16, x = x_45_cast_fp16)[name = string("out_31_cast_fp16")]; + tensor layer_layers_2_post_attention_layernorm_weight_to_fp16 = const()[name = string("layer_layers_2_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(72910592)))]; + tensor out_33_cast_fp16 = mul(x = out_31_cast_fp16, y = layer_layers_2_post_attention_layernorm_weight_to_fp16)[name = string("out_33_cast_fp16")]; + tensor var_917_split_sizes_0 = const()[name = string("op_917_split_sizes_0"), val = tensor([1024, 1024])]; + int32 var_917_axis_0 = const()[name = string("op_917_axis_0"), val = int32(1)]; + tensor var_917_cast_fp16_0, tensor var_917_cast_fp16_1 = split(axis = var_917_axis_0, split_sizes = var_917_split_sizes_0, x = out_33_cast_fp16)[name = string("op_917_cast_fp16")]; + string input_5_pad_type_0 = const()[name = string("input_5_pad_type_0"), val = string("valid")]; + tensor input_5_strides_0 = const()[name = string("input_5_strides_0"), val = tensor([1, 1])]; + tensor input_5_pad_0 = const()[name = string("input_5_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor input_5_dilations_0 = const()[name = string("input_5_dilations_0"), val = tensor([1, 1])]; + int32 input_5_groups_0 = const()[name = string("input_5_groups_0"), val = int32(1)]; + tensor var_922_to_fp16 = const()[name = string("op_922_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(72914752)))]; + tensor input_5_cast_fp16 = conv(dilations = input_5_dilations_0, groups = input_5_groups_0, pad = input_5_pad_0, pad_type = input_5_pad_type_0, strides = input_5_strides_0, weight = var_922_to_fp16, x = var_917_cast_fp16_0)[name = string("input_5_cast_fp16")]; + tensor var_933_cast_fp16 = silu(x = input_5_cast_fp16)[name = string("op_933_cast_fp16")]; + string var_938_pad_type_0 = const()[name = string("op_938_pad_type_0"), val = string("valid")]; + tensor var_938_strides_0 = const()[name = string("op_938_strides_0"), val = tensor([1, 1])]; + tensor var_938_pad_0 = const()[name = string("op_938_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_938_dilations_0 = const()[name = string("op_938_dilations_0"), val = tensor([1, 1])]; + int32 var_938_groups_0 = const()[name = string("op_938_groups_0"), val = int32(1)]; + tensor var_921_to_fp16 = const()[name = string("op_921_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(81303424)))]; + tensor var_938_cast_fp16 = conv(dilations = var_938_dilations_0, groups = var_938_groups_0, pad = var_938_pad_0, pad_type = var_938_pad_type_0, strides = var_938_strides_0, weight = var_921_to_fp16, x = var_917_cast_fp16_0)[name = string("op_938_cast_fp16")]; + tensor x_51_cast_fp16 = mul(x = var_933_cast_fp16, y = var_938_cast_fp16)[name = string("x_51_cast_fp16")]; + string hidden_states_17_pad_type_0 = const()[name = string("hidden_states_17_pad_type_0"), val = string("valid")]; + tensor hidden_states_17_strides_0 = const()[name = string("hidden_states_17_strides_0"), val = tensor([1, 1])]; + tensor hidden_states_17_pad_0 = const()[name = string("hidden_states_17_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor hidden_states_17_dilations_0 = const()[name = string("hidden_states_17_dilations_0"), val = tensor([1, 1])]; + int32 hidden_states_17_groups_0 = const()[name = string("hidden_states_17_groups_0"), val = int32(1)]; + tensor var_920_to_fp16 = const()[name = string("op_920_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(89692096)))]; + tensor hidden_states_17_cast_fp16 = conv(dilations = hidden_states_17_dilations_0, groups = hidden_states_17_groups_0, pad = hidden_states_17_pad_0, pad_type = hidden_states_17_pad_type_0, strides = hidden_states_17_strides_0, weight = var_920_to_fp16, x = x_51_cast_fp16)[name = string("hidden_states_17_cast_fp16")]; + tensor x_53_cast_fp16 = add(x = x_43_cast_fp16, y = hidden_states_17_cast_fp16)[name = string("x_53_cast_fp16")]; + int32 var_951 = const()[name = string("op_951"), val = int32(1)]; + fp16 const_34_promoted_to_fp16 = const()[name = string("const_34_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_954_cast_fp16 = mul(x = x_53_cast_fp16, y = const_34_promoted_to_fp16)[name = string("op_954_cast_fp16")]; + bool x_55_interleave_0 = const()[name = string("x_55_interleave_0"), val = bool(false)]; + tensor x_55_cast_fp16 = concat(axis = var_951, interleave = x_55_interleave_0, values = (x_53_cast_fp16, var_954_cast_fp16))[name = string("x_55_cast_fp16")]; + tensor out_37_axes_0 = const()[name = string("out_37_axes_0"), val = tensor([1])]; + fp16 var_964_to_fp16 = const()[name = string("op_964_to_fp16"), val = fp16(0x1.5p-17)]; + tensor out_37_cast_fp16 = layer_norm(axes = out_37_axes_0, epsilon = var_964_to_fp16, x = x_55_cast_fp16)[name = string("out_37_cast_fp16")]; + tensor layer_layers_3_input_layernorm_weight_to_fp16 = const()[name = string("layer_layers_3_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(98080768)))]; + tensor out_39_cast_fp16 = mul(x = out_37_cast_fp16, y = layer_layers_3_input_layernorm_weight_to_fp16)[name = string("out_39_cast_fp16")]; + tensor var_970_split_sizes_0 = const()[name = string("op_970_split_sizes_0"), val = tensor([1024, 1024])]; + int32 var_970_axis_0 = const()[name = string("op_970_axis_0"), val = int32(1)]; + tensor var_970_cast_fp16_0, tensor var_970_cast_fp16_1 = split(axis = var_970_axis_0, split_sizes = var_970_split_sizes_0, x = out_39_cast_fp16)[name = string("op_970_cast_fp16")]; + string query_states_13_pad_type_0 = const()[name = string("query_states_13_pad_type_0"), val = string("valid")]; + tensor query_states_13_strides_0 = const()[name = string("query_states_13_strides_0"), val = tensor([1, 1])]; + tensor query_states_13_pad_0 = const()[name = string("query_states_13_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor query_states_13_dilations_0 = const()[name = string("query_states_13_dilations_0"), val = tensor([1, 1])]; + int32 query_states_13_groups_0 = const()[name = string("query_states_13_groups_0"), val = int32(1)]; + tensor var_992_to_fp16 = const()[name = string("op_992_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(98084928)))]; + tensor query_states_13_cast_fp16 = conv(dilations = query_states_13_dilations_0, groups = query_states_13_groups_0, pad = query_states_13_pad_0, pad_type = query_states_13_pad_type_0, strides = query_states_13_strides_0, weight = var_992_to_fp16, x = var_970_cast_fp16_0)[name = string("query_states_13_cast_fp16")]; + string key_states_13_pad_type_0 = const()[name = string("key_states_13_pad_type_0"), val = string("valid")]; + tensor key_states_13_strides_0 = const()[name = string("key_states_13_strides_0"), val = tensor([1, 1])]; + tensor key_states_13_pad_0 = const()[name = string("key_states_13_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor key_states_13_dilations_0 = const()[name = string("key_states_13_dilations_0"), val = tensor([1, 1])]; + int32 key_states_13_groups_0 = const()[name = string("key_states_13_groups_0"), val = int32(1)]; + tensor var_1003_to_fp16 = const()[name = string("op_1003_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(100182144)))]; + tensor key_states_13_cast_fp16 = conv(dilations = key_states_13_dilations_0, groups = key_states_13_groups_0, pad = key_states_13_pad_0, pad_type = key_states_13_pad_type_0, strides = key_states_13_strides_0, weight = var_1003_to_fp16, x = var_970_cast_fp16_0)[name = string("key_states_13_cast_fp16")]; + string value_states_13_pad_type_0 = const()[name = string("value_states_13_pad_type_0"), val = string("valid")]; + tensor value_states_13_strides_0 = const()[name = string("value_states_13_strides_0"), val = tensor([1, 1])]; + tensor value_states_13_pad_0 = const()[name = string("value_states_13_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor value_states_13_dilations_0 = const()[name = string("value_states_13_dilations_0"), val = tensor([1, 1])]; + int32 value_states_13_groups_0 = const()[name = string("value_states_13_groups_0"), val = int32(1)]; + tensor var_1014_to_fp16 = const()[name = string("op_1014_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(100444352)))]; + tensor value_states_13_cast_fp16 = conv(dilations = value_states_13_dilations_0, groups = value_states_13_groups_0, pad = value_states_13_pad_0, pad_type = value_states_13_pad_type_0, strides = value_states_13_strides_0, weight = var_1014_to_fp16, x = var_970_cast_fp16_0)[name = string("value_states_13_cast_fp16")]; + tensor var_1022 = const()[name = string("op_1022"), val = tensor([1, 16, 64, 64])]; + tensor embed_13_cast_fp16 = reshape(shape = var_1022, x = query_states_13_cast_fp16)[name = string("embed_13_cast_fp16")]; + tensor var_1026 = const()[name = string("op_1026"), val = tensor([1, 2, 64, 64])]; + tensor var_1027_cast_fp16 = reshape(shape = var_1026, x = key_states_13_cast_fp16)[name = string("op_1027_cast_fp16")]; + tensor embed_15_perm_0 = const()[name = string("embed_15_perm_0"), val = tensor([0, 1, 3, 2])]; + tensor var_1033 = const()[name = string("op_1033"), val = tensor([1, 2, 64, 64])]; + tensor var_1034_cast_fp16 = reshape(shape = var_1033, x = value_states_13_cast_fp16)[name = string("op_1034_cast_fp16")]; + tensor value_states_15_perm_0 = const()[name = string("value_states_15_perm_0"), val = tensor([0, 1, 3, 2])]; + tensor var_1038_cast_fp16 = mul(x = embed_13_cast_fp16, y = cos_1_cast_fp16)[name = string("op_1038_cast_fp16")]; + tensor var_1039_split_sizes_0 = const()[name = string("op_1039_split_sizes_0"), val = tensor([32, 32])]; + int32 var_1039_axis_0 = const()[name = string("op_1039_axis_0"), val = int32(-2)]; + tensor var_1039_cast_fp16_0, tensor var_1039_cast_fp16_1 = split(axis = var_1039_axis_0, split_sizes = var_1039_split_sizes_0, x = embed_13_cast_fp16)[name = string("op_1039_cast_fp16")]; + fp16 const_37_promoted_to_fp16 = const()[name = string("const_37_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_1041_cast_fp16 = mul(x = var_1039_cast_fp16_1, y = const_37_promoted_to_fp16)[name = string("op_1041_cast_fp16")]; + int32 var_1043 = const()[name = string("op_1043"), val = int32(-2)]; + bool var_1044_interleave_0 = const()[name = string("op_1044_interleave_0"), val = bool(false)]; + tensor var_1044_cast_fp16 = concat(axis = var_1043, interleave = var_1044_interleave_0, values = (var_1041_cast_fp16, var_1039_cast_fp16_0))[name = string("op_1044_cast_fp16")]; + tensor var_1045_cast_fp16 = mul(x = var_1044_cast_fp16, y = sin_1_cast_fp16)[name = string("op_1045_cast_fp16")]; + tensor query_states_15_cast_fp16 = add(x = var_1038_cast_fp16, y = var_1045_cast_fp16)[name = string("query_states_15_cast_fp16")]; + tensor embed_15_cast_fp16 = transpose(perm = embed_15_perm_0, x = var_1027_cast_fp16)[name = string("transpose_8")]; + tensor var_1048_cast_fp16 = mul(x = embed_15_cast_fp16, y = cos_cast_fp16)[name = string("op_1048_cast_fp16")]; + tensor var_1049_split_sizes_0 = const()[name = string("op_1049_split_sizes_0"), val = tensor([32, 32])]; + int32 var_1049_axis_0 = const()[name = string("op_1049_axis_0"), val = int32(-1)]; + tensor var_1049_cast_fp16_0, tensor var_1049_cast_fp16_1 = split(axis = var_1049_axis_0, split_sizes = var_1049_split_sizes_0, x = embed_15_cast_fp16)[name = string("op_1049_cast_fp16")]; + fp16 const_38_promoted_to_fp16 = const()[name = string("const_38_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_1051_cast_fp16 = mul(x = var_1049_cast_fp16_1, y = const_38_promoted_to_fp16)[name = string("op_1051_cast_fp16")]; + int32 var_1053 = const()[name = string("op_1053"), val = int32(-1)]; + bool var_1054_interleave_0 = const()[name = string("op_1054_interleave_0"), val = bool(false)]; + tensor var_1054_cast_fp16 = concat(axis = var_1053, interleave = var_1054_interleave_0, values = (var_1051_cast_fp16, var_1049_cast_fp16_0))[name = string("op_1054_cast_fp16")]; + tensor var_1055_cast_fp16 = mul(x = var_1054_cast_fp16, y = sin_cast_fp16)[name = string("op_1055_cast_fp16")]; + tensor key_states_15_cast_fp16 = add(x = var_1048_cast_fp16, y = var_1055_cast_fp16)[name = string("key_states_15_cast_fp16")]; + tensor expand_dims_31 = const()[name = string("expand_dims_31"), val = tensor([3])]; + tensor expand_dims_32 = const()[name = string("expand_dims_32"), val = tensor([0])]; + tensor expand_dims_34 = const()[name = string("expand_dims_34"), val = tensor([4])]; + tensor concat_27_values3_0 = const()[name = string("concat_27_values3_0"), val = tensor([0])]; + int32 concat_27_axis_0 = const()[name = string("concat_27_axis_0"), val = int32(0)]; + bool concat_27_interleave_0 = const()[name = string("concat_27_interleave_0"), val = bool(false)]; + tensor concat_27 = concat(axis = concat_27_axis_0, interleave = concat_27_interleave_0, values = (expand_dims_31, expand_dims_32, position_id, concat_27_values3_0))[name = string("concat_27")]; + tensor concat_28_values1_0 = const()[name = string("concat_28_values1_0"), val = tensor([0])]; + tensor concat_28_values3_0 = const()[name = string("concat_28_values3_0"), val = tensor([0])]; + int32 concat_28_axis_0 = const()[name = string("concat_28_axis_0"), val = int32(0)]; + bool concat_28_interleave_0 = const()[name = string("concat_28_interleave_0"), val = bool(false)]; + tensor concat_28 = concat(axis = concat_28_axis_0, interleave = concat_28_interleave_0, values = (expand_dims_34, concat_28_values1_0, var_246, concat_28_values3_0))[name = string("concat_28")]; + tensor key_cache_internal_tensor_assign_4_stride_0 = const()[name = string("key_cache_internal_tensor_assign_4_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor key_cache_internal_tensor_assign_4_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_4_begin_mask_0"), val = tensor([false, false, false, true])]; + tensor key_cache_internal_tensor_assign_4_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_4_end_mask_0"), val = tensor([false, true, false, true])]; + tensor key_cache_internal_tensor_assign_4_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_4_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor key_cache_internal_tensor_assign_4_cast_fp16 = slice_update(begin = concat_27, begin_mask = key_cache_internal_tensor_assign_4_begin_mask_0, end = concat_28, end_mask = key_cache_internal_tensor_assign_4_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_4_squeeze_mask_0, stride = key_cache_internal_tensor_assign_4_stride_0, update = key_states_15_cast_fp16, x = coreml_update_state_16)[name = string("key_cache_internal_tensor_assign_4_cast_fp16")]; + write_state(data = key_cache_internal_tensor_assign_4_cast_fp16, input = key_cache)[name = string("coreml_update_state_66_write_state")]; + tensor coreml_update_state_18 = read_state(input = key_cache)[name = string("coreml_update_state_66")]; + tensor value_cache_internal_tensor_assign_4_stride_0 = const()[name = string("value_cache_internal_tensor_assign_4_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor value_cache_internal_tensor_assign_4_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_4_begin_mask_0"), val = tensor([false, false, false, true])]; + tensor value_cache_internal_tensor_assign_4_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_4_end_mask_0"), val = tensor([false, true, false, true])]; + tensor value_cache_internal_tensor_assign_4_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_4_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor value_states_15_cast_fp16 = transpose(perm = value_states_15_perm_0, x = var_1034_cast_fp16)[name = string("transpose_7")]; + tensor value_cache_internal_tensor_assign_4_cast_fp16 = slice_update(begin = concat_27, begin_mask = value_cache_internal_tensor_assign_4_begin_mask_0, end = concat_28, end_mask = value_cache_internal_tensor_assign_4_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_4_squeeze_mask_0, stride = value_cache_internal_tensor_assign_4_stride_0, update = value_states_15_cast_fp16, x = coreml_update_state_17)[name = string("value_cache_internal_tensor_assign_4_cast_fp16")]; + write_state(data = value_cache_internal_tensor_assign_4_cast_fp16, input = value_cache)[name = string("coreml_update_state_67_write_state")]; + tensor coreml_update_state_19 = read_state(input = value_cache)[name = string("coreml_update_state_67")]; + tensor var_1098_begin_0 = const()[name = string("op_1098_begin_0"), val = tensor([3, 0, 0, 0])]; + tensor var_1098_end_0 = const()[name = string("op_1098_end_0"), val = tensor([4, 2, 2048, 64])]; + tensor var_1098_end_mask_0 = const()[name = string("op_1098_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_1098_cast_fp16 = slice_by_index(begin = var_1098_begin_0, end = var_1098_end_0, end_mask = var_1098_end_mask_0, x = coreml_update_state_18)[name = string("op_1098_cast_fp16")]; + tensor tile_6 = const()[name = string("tile_6"), val = tensor([1, 1])]; + int32 var_1101_axis_0 = const()[name = string("op_1101_axis_0"), val = int32(1)]; + tensor var_1101_cast_fp16_0, tensor var_1101_cast_fp16_1 = split(axis = var_1101_axis_0, split_sizes = tile_6, x = var_1098_cast_fp16)[name = string("op_1101_cast_fp16")]; + tensor var_1108_begin_0 = const()[name = string("op_1108_begin_0"), val = tensor([3, 0, 0, 0])]; + tensor var_1108_end_0 = const()[name = string("op_1108_end_0"), val = tensor([4, 2, 2048, 64])]; + tensor var_1108_end_mask_0 = const()[name = string("op_1108_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_1108_cast_fp16 = slice_by_index(begin = var_1108_begin_0, end = var_1108_end_0, end_mask = var_1108_end_mask_0, x = coreml_update_state_19)[name = string("op_1108_cast_fp16")]; + tensor tile_7 = const()[name = string("tile_7"), val = tensor([1, 1])]; + int32 var_1111_axis_0 = const()[name = string("op_1111_axis_0"), val = int32(1)]; + tensor var_1111_cast_fp16_0, tensor var_1111_cast_fp16_1 = split(axis = var_1111_axis_0, split_sizes = tile_7, x = var_1108_cast_fp16)[name = string("op_1111_cast_fp16")]; + tensor var_1114_split_sizes_0 = const()[name = string("op_1114_split_sizes_0"), val = tensor([8, 8])]; + int32 var_1114_axis_0 = const()[name = string("op_1114_axis_0"), val = int32(1)]; + tensor var_1114_cast_fp16_0, tensor var_1114_cast_fp16_1 = split(axis = var_1114_axis_0, split_sizes = var_1114_split_sizes_0, x = query_states_15_cast_fp16)[name = string("op_1114_cast_fp16")]; + bool attn_weights_49_transpose_x_0 = const()[name = string("attn_weights_49_transpose_x_0"), val = bool(false)]; + bool attn_weights_49_transpose_y_0 = const()[name = string("attn_weights_49_transpose_y_0"), val = bool(false)]; + tensor attn_weights_49_cast_fp16 = matmul(transpose_x = attn_weights_49_transpose_x_0, transpose_y = attn_weights_49_transpose_y_0, x = var_1101_cast_fp16_0, y = var_1114_cast_fp16_0)[name = string("attn_weights_49_cast_fp16")]; + fp16 _inversed_attn_weights_51_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_51_y_0_to_fp16"), val = fp16(0x1p-3)]; + tensor _inversed_attn_weights_51_cast_fp16 = mul(x = attn_weights_49_cast_fp16, y = _inversed_attn_weights_51_y_0_to_fp16)[name = string("_inversed_attn_weights_51_cast_fp16")]; + tensor attn_weights_53_cast_fp16 = add(x = _inversed_attn_weights_51_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_53_cast_fp16")]; + int32 var_1121 = const()[name = string("op_1121"), val = int32(2)]; + tensor attn_weights_55_cast_fp16 = softmax(axis = var_1121, x = attn_weights_53_cast_fp16)[name = string("attn_weights_55_cast_fp16")]; + bool var_1127_transpose_x_1 = const()[name = string("op_1127_transpose_x_1"), val = bool(true)]; + bool var_1127_transpose_y_1 = const()[name = string("op_1127_transpose_y_1"), val = bool(false)]; + tensor var_1127_cast_fp16 = matmul(transpose_x = var_1127_transpose_x_1, transpose_y = var_1127_transpose_y_1, x = attn_weights_55_cast_fp16, y = var_1111_cast_fp16_0)[name = string("op_1127_cast_fp16")]; + bool attn_weights_57_transpose_x_0 = const()[name = string("attn_weights_57_transpose_x_0"), val = bool(false)]; + bool attn_weights_57_transpose_y_0 = const()[name = string("attn_weights_57_transpose_y_0"), val = bool(false)]; + tensor attn_weights_57_cast_fp16 = matmul(transpose_x = attn_weights_57_transpose_x_0, transpose_y = attn_weights_57_transpose_y_0, x = var_1101_cast_fp16_1, y = var_1114_cast_fp16_1)[name = string("attn_weights_57_cast_fp16")]; + fp16 _inversed_attn_weights_59_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_59_y_0_to_fp16"), val = fp16(0x1p-3)]; + tensor _inversed_attn_weights_59_cast_fp16 = mul(x = attn_weights_57_cast_fp16, y = _inversed_attn_weights_59_y_0_to_fp16)[name = string("_inversed_attn_weights_59_cast_fp16")]; + tensor attn_weights_61_cast_fp16 = add(x = _inversed_attn_weights_59_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_61_cast_fp16")]; + int32 var_1133 = const()[name = string("op_1133"), val = int32(2)]; + tensor attn_weights_63_cast_fp16 = softmax(axis = var_1133, x = attn_weights_61_cast_fp16)[name = string("attn_weights_63_cast_fp16")]; + bool attn_output_19_transpose_x_1 = const()[name = string("attn_output_19_transpose_x_1"), val = bool(true)]; + bool attn_output_19_transpose_y_1 = const()[name = string("attn_output_19_transpose_y_1"), val = bool(false)]; + tensor attn_output_19_cast_fp16 = matmul(transpose_x = attn_output_19_transpose_x_1, transpose_y = attn_output_19_transpose_y_1, x = attn_weights_63_cast_fp16, y = var_1111_cast_fp16_1)[name = string("attn_output_19_cast_fp16")]; + int32 var_1141 = const()[name = string("op_1141"), val = int32(1)]; + bool attn_output_21_interleave_0 = const()[name = string("attn_output_21_interleave_0"), val = bool(false)]; + tensor attn_output_21_cast_fp16 = concat(axis = var_1141, interleave = attn_output_21_interleave_0, values = (var_1127_cast_fp16, attn_output_19_cast_fp16))[name = string("attn_output_21_cast_fp16")]; + tensor var_1145_perm_0 = const()[name = string("op_1145_perm_0"), val = tensor([0, 1, 3, 2])]; + tensor var_1150 = const()[name = string("op_1150"), val = tensor([1, 1024, 1, 64])]; + tensor var_1145_cast_fp16 = transpose(perm = var_1145_perm_0, x = attn_output_21_cast_fp16)[name = string("transpose_6")]; + tensor x_59_cast_fp16 = reshape(shape = var_1150, x = var_1145_cast_fp16)[name = string("x_59_cast_fp16")]; + string hidden_states_21_pad_type_0 = const()[name = string("hidden_states_21_pad_type_0"), val = string("valid")]; + tensor hidden_states_21_strides_0 = const()[name = string("hidden_states_21_strides_0"), val = tensor([1, 1])]; + tensor hidden_states_21_pad_0 = const()[name = string("hidden_states_21_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor hidden_states_21_dilations_0 = const()[name = string("hidden_states_21_dilations_0"), val = tensor([1, 1])]; + int32 hidden_states_21_groups_0 = const()[name = string("hidden_states_21_groups_0"), val = int32(1)]; + tensor var_1157_to_fp16 = const()[name = string("op_1157_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(100706560)))]; + tensor hidden_states_21_cast_fp16 = conv(dilations = hidden_states_21_dilations_0, groups = hidden_states_21_groups_0, pad = hidden_states_21_pad_0, pad_type = hidden_states_21_pad_type_0, strides = hidden_states_21_strides_0, weight = var_1157_to_fp16, x = x_59_cast_fp16)[name = string("hidden_states_21_cast_fp16")]; + tensor x_61_cast_fp16 = add(x = x_53_cast_fp16, y = hidden_states_21_cast_fp16)[name = string("x_61_cast_fp16")]; + int32 var_1169 = const()[name = string("op_1169"), val = int32(1)]; + fp16 const_43_promoted_to_fp16 = const()[name = string("const_43_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_1172_cast_fp16 = mul(x = x_61_cast_fp16, y = const_43_promoted_to_fp16)[name = string("op_1172_cast_fp16")]; + bool x_63_interleave_0 = const()[name = string("x_63_interleave_0"), val = bool(false)]; + tensor x_63_cast_fp16 = concat(axis = var_1169, interleave = x_63_interleave_0, values = (x_61_cast_fp16, var_1172_cast_fp16))[name = string("x_63_cast_fp16")]; + tensor out_43_axes_0 = const()[name = string("out_43_axes_0"), val = tensor([1])]; + fp16 var_1182_to_fp16 = const()[name = string("op_1182_to_fp16"), val = fp16(0x1.5p-17)]; + tensor out_43_cast_fp16 = layer_norm(axes = out_43_axes_0, epsilon = var_1182_to_fp16, x = x_63_cast_fp16)[name = string("out_43_cast_fp16")]; + tensor layer_layers_3_post_attention_layernorm_weight_to_fp16 = const()[name = string("layer_layers_3_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(102803776)))]; + tensor out_45_cast_fp16 = mul(x = out_43_cast_fp16, y = layer_layers_3_post_attention_layernorm_weight_to_fp16)[name = string("out_45_cast_fp16")]; + tensor var_1188_split_sizes_0 = const()[name = string("op_1188_split_sizes_0"), val = tensor([1024, 1024])]; + int32 var_1188_axis_0 = const()[name = string("op_1188_axis_0"), val = int32(1)]; + tensor var_1188_cast_fp16_0, tensor var_1188_cast_fp16_1 = split(axis = var_1188_axis_0, split_sizes = var_1188_split_sizes_0, x = out_45_cast_fp16)[name = string("op_1188_cast_fp16")]; + string input_7_pad_type_0 = const()[name = string("input_7_pad_type_0"), val = string("valid")]; + tensor input_7_strides_0 = const()[name = string("input_7_strides_0"), val = tensor([1, 1])]; + tensor input_7_pad_0 = const()[name = string("input_7_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor input_7_dilations_0 = const()[name = string("input_7_dilations_0"), val = tensor([1, 1])]; + int32 input_7_groups_0 = const()[name = string("input_7_groups_0"), val = int32(1)]; + tensor var_1193_to_fp16 = const()[name = string("op_1193_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(102807936)))]; + tensor input_7_cast_fp16 = conv(dilations = input_7_dilations_0, groups = input_7_groups_0, pad = input_7_pad_0, pad_type = input_7_pad_type_0, strides = input_7_strides_0, weight = var_1193_to_fp16, x = var_1188_cast_fp16_0)[name = string("input_7_cast_fp16")]; + tensor var_1204_cast_fp16 = silu(x = input_7_cast_fp16)[name = string("op_1204_cast_fp16")]; + string var_1209_pad_type_0 = const()[name = string("op_1209_pad_type_0"), val = string("valid")]; + tensor var_1209_strides_0 = const()[name = string("op_1209_strides_0"), val = tensor([1, 1])]; + tensor var_1209_pad_0 = const()[name = string("op_1209_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1209_dilations_0 = const()[name = string("op_1209_dilations_0"), val = tensor([1, 1])]; + int32 var_1209_groups_0 = const()[name = string("op_1209_groups_0"), val = int32(1)]; + tensor var_1192_to_fp16 = const()[name = string("op_1192_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(111196608)))]; + tensor var_1209_cast_fp16 = conv(dilations = var_1209_dilations_0, groups = var_1209_groups_0, pad = var_1209_pad_0, pad_type = var_1209_pad_type_0, strides = var_1209_strides_0, weight = var_1192_to_fp16, x = var_1188_cast_fp16_0)[name = string("op_1209_cast_fp16")]; + tensor x_69_cast_fp16 = mul(x = var_1204_cast_fp16, y = var_1209_cast_fp16)[name = string("x_69_cast_fp16")]; + string hidden_states_23_pad_type_0 = const()[name = string("hidden_states_23_pad_type_0"), val = string("valid")]; + tensor hidden_states_23_strides_0 = const()[name = string("hidden_states_23_strides_0"), val = tensor([1, 1])]; + tensor hidden_states_23_pad_0 = const()[name = string("hidden_states_23_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor hidden_states_23_dilations_0 = const()[name = string("hidden_states_23_dilations_0"), val = tensor([1, 1])]; + int32 hidden_states_23_groups_0 = const()[name = string("hidden_states_23_groups_0"), val = int32(1)]; + tensor var_1191_to_fp16 = const()[name = string("op_1191_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(119585280)))]; + tensor hidden_states_23_cast_fp16 = conv(dilations = hidden_states_23_dilations_0, groups = hidden_states_23_groups_0, pad = hidden_states_23_pad_0, pad_type = hidden_states_23_pad_type_0, strides = hidden_states_23_strides_0, weight = var_1191_to_fp16, x = x_69_cast_fp16)[name = string("hidden_states_23_cast_fp16")]; + tensor x_71_cast_fp16 = add(x = x_61_cast_fp16, y = hidden_states_23_cast_fp16)[name = string("x_71_cast_fp16")]; + int32 var_1222 = const()[name = string("op_1222"), val = int32(1)]; + fp16 const_44_promoted_to_fp16 = const()[name = string("const_44_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_1225_cast_fp16 = mul(x = x_71_cast_fp16, y = const_44_promoted_to_fp16)[name = string("op_1225_cast_fp16")]; + bool x_73_interleave_0 = const()[name = string("x_73_interleave_0"), val = bool(false)]; + tensor x_73_cast_fp16 = concat(axis = var_1222, interleave = x_73_interleave_0, values = (x_71_cast_fp16, var_1225_cast_fp16))[name = string("x_73_cast_fp16")]; + tensor out_49_axes_0 = const()[name = string("out_49_axes_0"), val = tensor([1])]; + fp16 var_1235_to_fp16 = const()[name = string("op_1235_to_fp16"), val = fp16(0x1.5p-17)]; + tensor out_49_cast_fp16 = layer_norm(axes = out_49_axes_0, epsilon = var_1235_to_fp16, x = x_73_cast_fp16)[name = string("out_49_cast_fp16")]; + tensor layer_layers_4_input_layernorm_weight_to_fp16 = const()[name = string("layer_layers_4_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(127973952)))]; + tensor out_51_cast_fp16 = mul(x = out_49_cast_fp16, y = layer_layers_4_input_layernorm_weight_to_fp16)[name = string("out_51_cast_fp16")]; + tensor var_1241_split_sizes_0 = const()[name = string("op_1241_split_sizes_0"), val = tensor([1024, 1024])]; + int32 var_1241_axis_0 = const()[name = string("op_1241_axis_0"), val = int32(1)]; + tensor var_1241_cast_fp16_0, tensor var_1241_cast_fp16_1 = split(axis = var_1241_axis_0, split_sizes = var_1241_split_sizes_0, x = out_51_cast_fp16)[name = string("op_1241_cast_fp16")]; + string query_states_17_pad_type_0 = const()[name = string("query_states_17_pad_type_0"), val = string("valid")]; + tensor query_states_17_strides_0 = const()[name = string("query_states_17_strides_0"), val = tensor([1, 1])]; + tensor query_states_17_pad_0 = const()[name = string("query_states_17_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor query_states_17_dilations_0 = const()[name = string("query_states_17_dilations_0"), val = tensor([1, 1])]; + int32 query_states_17_groups_0 = const()[name = string("query_states_17_groups_0"), val = int32(1)]; + tensor var_1263_to_fp16 = const()[name = string("op_1263_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(127978112)))]; + tensor query_states_17_cast_fp16 = conv(dilations = query_states_17_dilations_0, groups = query_states_17_groups_0, pad = query_states_17_pad_0, pad_type = query_states_17_pad_type_0, strides = query_states_17_strides_0, weight = var_1263_to_fp16, x = var_1241_cast_fp16_0)[name = string("query_states_17_cast_fp16")]; + string key_states_17_pad_type_0 = const()[name = string("key_states_17_pad_type_0"), val = string("valid")]; + tensor key_states_17_strides_0 = const()[name = string("key_states_17_strides_0"), val = tensor([1, 1])]; + tensor key_states_17_pad_0 = const()[name = string("key_states_17_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor key_states_17_dilations_0 = const()[name = string("key_states_17_dilations_0"), val = tensor([1, 1])]; + int32 key_states_17_groups_0 = const()[name = string("key_states_17_groups_0"), val = int32(1)]; + tensor var_1274_to_fp16 = const()[name = string("op_1274_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(130075328)))]; + tensor key_states_17_cast_fp16 = conv(dilations = key_states_17_dilations_0, groups = key_states_17_groups_0, pad = key_states_17_pad_0, pad_type = key_states_17_pad_type_0, strides = key_states_17_strides_0, weight = var_1274_to_fp16, x = var_1241_cast_fp16_0)[name = string("key_states_17_cast_fp16")]; + string value_states_17_pad_type_0 = const()[name = string("value_states_17_pad_type_0"), val = string("valid")]; + tensor value_states_17_strides_0 = const()[name = string("value_states_17_strides_0"), val = tensor([1, 1])]; + tensor value_states_17_pad_0 = const()[name = string("value_states_17_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor value_states_17_dilations_0 = const()[name = string("value_states_17_dilations_0"), val = tensor([1, 1])]; + int32 value_states_17_groups_0 = const()[name = string("value_states_17_groups_0"), val = int32(1)]; + tensor var_1285_to_fp16 = const()[name = string("op_1285_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(130337536)))]; + tensor value_states_17_cast_fp16 = conv(dilations = value_states_17_dilations_0, groups = value_states_17_groups_0, pad = value_states_17_pad_0, pad_type = value_states_17_pad_type_0, strides = value_states_17_strides_0, weight = var_1285_to_fp16, x = var_1241_cast_fp16_0)[name = string("value_states_17_cast_fp16")]; + tensor var_1293 = const()[name = string("op_1293"), val = tensor([1, 16, 64, 64])]; + tensor embed_17_cast_fp16 = reshape(shape = var_1293, x = query_states_17_cast_fp16)[name = string("embed_17_cast_fp16")]; + tensor var_1297 = const()[name = string("op_1297"), val = tensor([1, 2, 64, 64])]; + tensor var_1298_cast_fp16 = reshape(shape = var_1297, x = key_states_17_cast_fp16)[name = string("op_1298_cast_fp16")]; + tensor embed_19_perm_0 = const()[name = string("embed_19_perm_0"), val = tensor([0, 1, 3, 2])]; + tensor var_1304 = const()[name = string("op_1304"), val = tensor([1, 2, 64, 64])]; + tensor var_1305_cast_fp16 = reshape(shape = var_1304, x = value_states_17_cast_fp16)[name = string("op_1305_cast_fp16")]; + tensor value_states_19_perm_0 = const()[name = string("value_states_19_perm_0"), val = tensor([0, 1, 3, 2])]; + tensor var_1309_cast_fp16 = mul(x = embed_17_cast_fp16, y = cos_1_cast_fp16)[name = string("op_1309_cast_fp16")]; + tensor var_1310_split_sizes_0 = const()[name = string("op_1310_split_sizes_0"), val = tensor([32, 32])]; + int32 var_1310_axis_0 = const()[name = string("op_1310_axis_0"), val = int32(-2)]; + tensor var_1310_cast_fp16_0, tensor var_1310_cast_fp16_1 = split(axis = var_1310_axis_0, split_sizes = var_1310_split_sizes_0, x = embed_17_cast_fp16)[name = string("op_1310_cast_fp16")]; + fp16 const_47_promoted_to_fp16 = const()[name = string("const_47_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_1312_cast_fp16 = mul(x = var_1310_cast_fp16_1, y = const_47_promoted_to_fp16)[name = string("op_1312_cast_fp16")]; + int32 var_1314 = const()[name = string("op_1314"), val = int32(-2)]; + bool var_1315_interleave_0 = const()[name = string("op_1315_interleave_0"), val = bool(false)]; + tensor var_1315_cast_fp16 = concat(axis = var_1314, interleave = var_1315_interleave_0, values = (var_1312_cast_fp16, var_1310_cast_fp16_0))[name = string("op_1315_cast_fp16")]; + tensor var_1316_cast_fp16 = mul(x = var_1315_cast_fp16, y = sin_1_cast_fp16)[name = string("op_1316_cast_fp16")]; + tensor query_states_19_cast_fp16 = add(x = var_1309_cast_fp16, y = var_1316_cast_fp16)[name = string("query_states_19_cast_fp16")]; + tensor embed_19_cast_fp16 = transpose(perm = embed_19_perm_0, x = var_1298_cast_fp16)[name = string("transpose_5")]; + tensor var_1319_cast_fp16 = mul(x = embed_19_cast_fp16, y = cos_cast_fp16)[name = string("op_1319_cast_fp16")]; + tensor var_1320_split_sizes_0 = const()[name = string("op_1320_split_sizes_0"), val = tensor([32, 32])]; + int32 var_1320_axis_0 = const()[name = string("op_1320_axis_0"), val = int32(-1)]; + tensor var_1320_cast_fp16_0, tensor var_1320_cast_fp16_1 = split(axis = var_1320_axis_0, split_sizes = var_1320_split_sizes_0, x = embed_19_cast_fp16)[name = string("op_1320_cast_fp16")]; + fp16 const_48_promoted_to_fp16 = const()[name = string("const_48_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_1322_cast_fp16 = mul(x = var_1320_cast_fp16_1, y = const_48_promoted_to_fp16)[name = string("op_1322_cast_fp16")]; + int32 var_1324 = const()[name = string("op_1324"), val = int32(-1)]; + bool var_1325_interleave_0 = const()[name = string("op_1325_interleave_0"), val = bool(false)]; + tensor var_1325_cast_fp16 = concat(axis = var_1324, interleave = var_1325_interleave_0, values = (var_1322_cast_fp16, var_1320_cast_fp16_0))[name = string("op_1325_cast_fp16")]; + tensor var_1326_cast_fp16 = mul(x = var_1325_cast_fp16, y = sin_cast_fp16)[name = string("op_1326_cast_fp16")]; + tensor key_states_19_cast_fp16 = add(x = var_1319_cast_fp16, y = var_1326_cast_fp16)[name = string("key_states_19_cast_fp16")]; + tensor expand_dims_41 = const()[name = string("expand_dims_41"), val = tensor([4])]; + tensor expand_dims_42 = const()[name = string("expand_dims_42"), val = tensor([0])]; + tensor expand_dims_44 = const()[name = string("expand_dims_44"), val = tensor([5])]; + tensor concat_35_values3_0 = const()[name = string("concat_35_values3_0"), val = tensor([0])]; + int32 concat_35_axis_0 = const()[name = string("concat_35_axis_0"), val = int32(0)]; + bool concat_35_interleave_0 = const()[name = string("concat_35_interleave_0"), val = bool(false)]; + tensor concat_35 = concat(axis = concat_35_axis_0, interleave = concat_35_interleave_0, values = (expand_dims_41, expand_dims_42, position_id, concat_35_values3_0))[name = string("concat_35")]; + tensor concat_36_values1_0 = const()[name = string("concat_36_values1_0"), val = tensor([0])]; + tensor concat_36_values3_0 = const()[name = string("concat_36_values3_0"), val = tensor([0])]; + int32 concat_36_axis_0 = const()[name = string("concat_36_axis_0"), val = int32(0)]; + bool concat_36_interleave_0 = const()[name = string("concat_36_interleave_0"), val = bool(false)]; + tensor concat_36 = concat(axis = concat_36_axis_0, interleave = concat_36_interleave_0, values = (expand_dims_44, concat_36_values1_0, var_246, concat_36_values3_0))[name = string("concat_36")]; + tensor key_cache_internal_tensor_assign_5_stride_0 = const()[name = string("key_cache_internal_tensor_assign_5_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor key_cache_internal_tensor_assign_5_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_5_begin_mask_0"), val = tensor([false, false, false, true])]; + tensor key_cache_internal_tensor_assign_5_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_5_end_mask_0"), val = tensor([false, true, false, true])]; + tensor key_cache_internal_tensor_assign_5_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_5_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor key_cache_internal_tensor_assign_5_cast_fp16 = slice_update(begin = concat_35, begin_mask = key_cache_internal_tensor_assign_5_begin_mask_0, end = concat_36, end_mask = key_cache_internal_tensor_assign_5_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_5_squeeze_mask_0, stride = key_cache_internal_tensor_assign_5_stride_0, update = key_states_19_cast_fp16, x = coreml_update_state_18)[name = string("key_cache_internal_tensor_assign_5_cast_fp16")]; + write_state(data = key_cache_internal_tensor_assign_5_cast_fp16, input = key_cache)[name = string("coreml_update_state_68_write_state")]; + tensor coreml_update_state_20 = read_state(input = key_cache)[name = string("coreml_update_state_68")]; + tensor value_cache_internal_tensor_assign_5_stride_0 = const()[name = string("value_cache_internal_tensor_assign_5_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor value_cache_internal_tensor_assign_5_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_5_begin_mask_0"), val = tensor([false, false, false, true])]; + tensor value_cache_internal_tensor_assign_5_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_5_end_mask_0"), val = tensor([false, true, false, true])]; + tensor value_cache_internal_tensor_assign_5_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_5_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor value_states_19_cast_fp16 = transpose(perm = value_states_19_perm_0, x = var_1305_cast_fp16)[name = string("transpose_4")]; + tensor value_cache_internal_tensor_assign_5_cast_fp16 = slice_update(begin = concat_35, begin_mask = value_cache_internal_tensor_assign_5_begin_mask_0, end = concat_36, end_mask = value_cache_internal_tensor_assign_5_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_5_squeeze_mask_0, stride = value_cache_internal_tensor_assign_5_stride_0, update = value_states_19_cast_fp16, x = coreml_update_state_19)[name = string("value_cache_internal_tensor_assign_5_cast_fp16")]; + write_state(data = value_cache_internal_tensor_assign_5_cast_fp16, input = value_cache)[name = string("coreml_update_state_69_write_state")]; + tensor coreml_update_state_21 = read_state(input = value_cache)[name = string("coreml_update_state_69")]; + tensor var_1369_begin_0 = const()[name = string("op_1369_begin_0"), val = tensor([4, 0, 0, 0])]; + tensor var_1369_end_0 = const()[name = string("op_1369_end_0"), val = tensor([5, 2, 2048, 64])]; + tensor var_1369_end_mask_0 = const()[name = string("op_1369_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_1369_cast_fp16 = slice_by_index(begin = var_1369_begin_0, end = var_1369_end_0, end_mask = var_1369_end_mask_0, x = coreml_update_state_20)[name = string("op_1369_cast_fp16")]; + tensor tile_8 = const()[name = string("tile_8"), val = tensor([1, 1])]; + int32 var_1372_axis_0 = const()[name = string("op_1372_axis_0"), val = int32(1)]; + tensor var_1372_cast_fp16_0, tensor var_1372_cast_fp16_1 = split(axis = var_1372_axis_0, split_sizes = tile_8, x = var_1369_cast_fp16)[name = string("op_1372_cast_fp16")]; + tensor var_1379_begin_0 = const()[name = string("op_1379_begin_0"), val = tensor([4, 0, 0, 0])]; + tensor var_1379_end_0 = const()[name = string("op_1379_end_0"), val = tensor([5, 2, 2048, 64])]; + tensor var_1379_end_mask_0 = const()[name = string("op_1379_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_1379_cast_fp16 = slice_by_index(begin = var_1379_begin_0, end = var_1379_end_0, end_mask = var_1379_end_mask_0, x = coreml_update_state_21)[name = string("op_1379_cast_fp16")]; + tensor tile_9 = const()[name = string("tile_9"), val = tensor([1, 1])]; + int32 var_1382_axis_0 = const()[name = string("op_1382_axis_0"), val = int32(1)]; + tensor var_1382_cast_fp16_0, tensor var_1382_cast_fp16_1 = split(axis = var_1382_axis_0, split_sizes = tile_9, x = var_1379_cast_fp16)[name = string("op_1382_cast_fp16")]; + tensor var_1385_split_sizes_0 = const()[name = string("op_1385_split_sizes_0"), val = tensor([8, 8])]; + int32 var_1385_axis_0 = const()[name = string("op_1385_axis_0"), val = int32(1)]; + tensor var_1385_cast_fp16_0, tensor var_1385_cast_fp16_1 = split(axis = var_1385_axis_0, split_sizes = var_1385_split_sizes_0, x = query_states_19_cast_fp16)[name = string("op_1385_cast_fp16")]; + bool attn_weights_65_transpose_x_0 = const()[name = string("attn_weights_65_transpose_x_0"), val = bool(false)]; + bool attn_weights_65_transpose_y_0 = const()[name = string("attn_weights_65_transpose_y_0"), val = bool(false)]; + tensor attn_weights_65_cast_fp16 = matmul(transpose_x = attn_weights_65_transpose_x_0, transpose_y = attn_weights_65_transpose_y_0, x = var_1372_cast_fp16_0, y = var_1385_cast_fp16_0)[name = string("attn_weights_65_cast_fp16")]; + fp16 _inversed_attn_weights_67_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_67_y_0_to_fp16"), val = fp16(0x1p-3)]; + tensor _inversed_attn_weights_67_cast_fp16 = mul(x = attn_weights_65_cast_fp16, y = _inversed_attn_weights_67_y_0_to_fp16)[name = string("_inversed_attn_weights_67_cast_fp16")]; + tensor attn_weights_69_cast_fp16 = add(x = _inversed_attn_weights_67_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_69_cast_fp16")]; + int32 var_1392 = const()[name = string("op_1392"), val = int32(2)]; + tensor attn_weights_71_cast_fp16 = softmax(axis = var_1392, x = attn_weights_69_cast_fp16)[name = string("attn_weights_71_cast_fp16")]; + bool var_1398_transpose_x_1 = const()[name = string("op_1398_transpose_x_1"), val = bool(true)]; + bool var_1398_transpose_y_1 = const()[name = string("op_1398_transpose_y_1"), val = bool(false)]; + tensor var_1398_cast_fp16 = matmul(transpose_x = var_1398_transpose_x_1, transpose_y = var_1398_transpose_y_1, x = attn_weights_71_cast_fp16, y = var_1382_cast_fp16_0)[name = string("op_1398_cast_fp16")]; + bool attn_weights_73_transpose_x_0 = const()[name = string("attn_weights_73_transpose_x_0"), val = bool(false)]; + bool attn_weights_73_transpose_y_0 = const()[name = string("attn_weights_73_transpose_y_0"), val = bool(false)]; + tensor attn_weights_73_cast_fp16 = matmul(transpose_x = attn_weights_73_transpose_x_0, transpose_y = attn_weights_73_transpose_y_0, x = var_1372_cast_fp16_1, y = var_1385_cast_fp16_1)[name = string("attn_weights_73_cast_fp16")]; + fp16 _inversed_attn_weights_75_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_75_y_0_to_fp16"), val = fp16(0x1p-3)]; + tensor _inversed_attn_weights_75_cast_fp16 = mul(x = attn_weights_73_cast_fp16, y = _inversed_attn_weights_75_y_0_to_fp16)[name = string("_inversed_attn_weights_75_cast_fp16")]; + tensor attn_weights_77_cast_fp16 = add(x = _inversed_attn_weights_75_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_77_cast_fp16")]; + int32 var_1404 = const()[name = string("op_1404"), val = int32(2)]; + tensor attn_weights_79_cast_fp16 = softmax(axis = var_1404, x = attn_weights_77_cast_fp16)[name = string("attn_weights_79_cast_fp16")]; + bool attn_output_25_transpose_x_1 = const()[name = string("attn_output_25_transpose_x_1"), val = bool(true)]; + bool attn_output_25_transpose_y_1 = const()[name = string("attn_output_25_transpose_y_1"), val = bool(false)]; + tensor attn_output_25_cast_fp16 = matmul(transpose_x = attn_output_25_transpose_x_1, transpose_y = attn_output_25_transpose_y_1, x = attn_weights_79_cast_fp16, y = var_1382_cast_fp16_1)[name = string("attn_output_25_cast_fp16")]; + int32 var_1412 = const()[name = string("op_1412"), val = int32(1)]; + bool attn_output_27_interleave_0 = const()[name = string("attn_output_27_interleave_0"), val = bool(false)]; + tensor attn_output_27_cast_fp16 = concat(axis = var_1412, interleave = attn_output_27_interleave_0, values = (var_1398_cast_fp16, attn_output_25_cast_fp16))[name = string("attn_output_27_cast_fp16")]; + tensor var_1416_perm_0 = const()[name = string("op_1416_perm_0"), val = tensor([0, 1, 3, 2])]; + tensor var_1421 = const()[name = string("op_1421"), val = tensor([1, 1024, 1, 64])]; + tensor var_1416_cast_fp16 = transpose(perm = var_1416_perm_0, x = attn_output_27_cast_fp16)[name = string("transpose_3")]; + tensor x_77_cast_fp16 = reshape(shape = var_1421, x = var_1416_cast_fp16)[name = string("x_77_cast_fp16")]; + string hidden_states_27_pad_type_0 = const()[name = string("hidden_states_27_pad_type_0"), val = string("valid")]; + tensor hidden_states_27_strides_0 = const()[name = string("hidden_states_27_strides_0"), val = tensor([1, 1])]; + tensor hidden_states_27_pad_0 = const()[name = string("hidden_states_27_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor hidden_states_27_dilations_0 = const()[name = string("hidden_states_27_dilations_0"), val = tensor([1, 1])]; + int32 hidden_states_27_groups_0 = const()[name = string("hidden_states_27_groups_0"), val = int32(1)]; + tensor var_1428_to_fp16 = const()[name = string("op_1428_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(130599744)))]; + tensor hidden_states_27_cast_fp16 = conv(dilations = hidden_states_27_dilations_0, groups = hidden_states_27_groups_0, pad = hidden_states_27_pad_0, pad_type = hidden_states_27_pad_type_0, strides = hidden_states_27_strides_0, weight = var_1428_to_fp16, x = x_77_cast_fp16)[name = string("hidden_states_27_cast_fp16")]; + tensor x_79_cast_fp16 = add(x = x_71_cast_fp16, y = hidden_states_27_cast_fp16)[name = string("x_79_cast_fp16")]; + int32 var_1440 = const()[name = string("op_1440"), val = int32(1)]; + fp16 const_53_promoted_to_fp16 = const()[name = string("const_53_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_1443_cast_fp16 = mul(x = x_79_cast_fp16, y = const_53_promoted_to_fp16)[name = string("op_1443_cast_fp16")]; + bool x_81_interleave_0 = const()[name = string("x_81_interleave_0"), val = bool(false)]; + tensor x_81_cast_fp16 = concat(axis = var_1440, interleave = x_81_interleave_0, values = (x_79_cast_fp16, var_1443_cast_fp16))[name = string("x_81_cast_fp16")]; + tensor out_55_axes_0 = const()[name = string("out_55_axes_0"), val = tensor([1])]; + fp16 var_1453_to_fp16 = const()[name = string("op_1453_to_fp16"), val = fp16(0x1.5p-17)]; + tensor out_55_cast_fp16 = layer_norm(axes = out_55_axes_0, epsilon = var_1453_to_fp16, x = x_81_cast_fp16)[name = string("out_55_cast_fp16")]; + tensor layer_layers_4_post_attention_layernorm_weight_to_fp16 = const()[name = string("layer_layers_4_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(132696960)))]; + tensor out_57_cast_fp16 = mul(x = out_55_cast_fp16, y = layer_layers_4_post_attention_layernorm_weight_to_fp16)[name = string("out_57_cast_fp16")]; + tensor var_1459_split_sizes_0 = const()[name = string("op_1459_split_sizes_0"), val = tensor([1024, 1024])]; + int32 var_1459_axis_0 = const()[name = string("op_1459_axis_0"), val = int32(1)]; + tensor var_1459_cast_fp16_0, tensor var_1459_cast_fp16_1 = split(axis = var_1459_axis_0, split_sizes = var_1459_split_sizes_0, x = out_57_cast_fp16)[name = string("op_1459_cast_fp16")]; + string input_9_pad_type_0 = const()[name = string("input_9_pad_type_0"), val = string("valid")]; + tensor input_9_strides_0 = const()[name = string("input_9_strides_0"), val = tensor([1, 1])]; + tensor input_9_pad_0 = const()[name = string("input_9_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor input_9_dilations_0 = const()[name = string("input_9_dilations_0"), val = tensor([1, 1])]; + int32 input_9_groups_0 = const()[name = string("input_9_groups_0"), val = int32(1)]; + tensor var_1464_to_fp16 = const()[name = string("op_1464_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(132701120)))]; + tensor input_9_cast_fp16 = conv(dilations = input_9_dilations_0, groups = input_9_groups_0, pad = input_9_pad_0, pad_type = input_9_pad_type_0, strides = input_9_strides_0, weight = var_1464_to_fp16, x = var_1459_cast_fp16_0)[name = string("input_9_cast_fp16")]; + tensor var_1475_cast_fp16 = silu(x = input_9_cast_fp16)[name = string("op_1475_cast_fp16")]; + string var_1480_pad_type_0 = const()[name = string("op_1480_pad_type_0"), val = string("valid")]; + tensor var_1480_strides_0 = const()[name = string("op_1480_strides_0"), val = tensor([1, 1])]; + tensor var_1480_pad_0 = const()[name = string("op_1480_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1480_dilations_0 = const()[name = string("op_1480_dilations_0"), val = tensor([1, 1])]; + int32 var_1480_groups_0 = const()[name = string("op_1480_groups_0"), val = int32(1)]; + tensor var_1463_to_fp16 = const()[name = string("op_1463_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(141089792)))]; + tensor var_1480_cast_fp16 = conv(dilations = var_1480_dilations_0, groups = var_1480_groups_0, pad = var_1480_pad_0, pad_type = var_1480_pad_type_0, strides = var_1480_strides_0, weight = var_1463_to_fp16, x = var_1459_cast_fp16_0)[name = string("op_1480_cast_fp16")]; + tensor x_87_cast_fp16 = mul(x = var_1475_cast_fp16, y = var_1480_cast_fp16)[name = string("x_87_cast_fp16")]; + string hidden_states_29_pad_type_0 = const()[name = string("hidden_states_29_pad_type_0"), val = string("valid")]; + tensor hidden_states_29_strides_0 = const()[name = string("hidden_states_29_strides_0"), val = tensor([1, 1])]; + tensor hidden_states_29_pad_0 = const()[name = string("hidden_states_29_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor hidden_states_29_dilations_0 = const()[name = string("hidden_states_29_dilations_0"), val = tensor([1, 1])]; + int32 hidden_states_29_groups_0 = const()[name = string("hidden_states_29_groups_0"), val = int32(1)]; + tensor var_1462_to_fp16 = const()[name = string("op_1462_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(149478464)))]; + tensor hidden_states_29_cast_fp16 = conv(dilations = hidden_states_29_dilations_0, groups = hidden_states_29_groups_0, pad = hidden_states_29_pad_0, pad_type = hidden_states_29_pad_type_0, strides = hidden_states_29_strides_0, weight = var_1462_to_fp16, x = x_87_cast_fp16)[name = string("hidden_states_29_cast_fp16")]; + tensor x_89_cast_fp16 = add(x = x_79_cast_fp16, y = hidden_states_29_cast_fp16)[name = string("x_89_cast_fp16")]; + int32 var_1493 = const()[name = string("op_1493"), val = int32(1)]; + fp16 const_54_promoted_to_fp16 = const()[name = string("const_54_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_1496_cast_fp16 = mul(x = x_89_cast_fp16, y = const_54_promoted_to_fp16)[name = string("op_1496_cast_fp16")]; + bool x_91_interleave_0 = const()[name = string("x_91_interleave_0"), val = bool(false)]; + tensor x_91_cast_fp16 = concat(axis = var_1493, interleave = x_91_interleave_0, values = (x_89_cast_fp16, var_1496_cast_fp16))[name = string("x_91_cast_fp16")]; + tensor out_61_axes_0 = const()[name = string("out_61_axes_0"), val = tensor([1])]; + fp16 var_1506_to_fp16 = const()[name = string("op_1506_to_fp16"), val = fp16(0x1.5p-17)]; + tensor out_61_cast_fp16 = layer_norm(axes = out_61_axes_0, epsilon = var_1506_to_fp16, x = x_91_cast_fp16)[name = string("out_61_cast_fp16")]; + tensor layer_layers_5_input_layernorm_weight_to_fp16 = const()[name = string("layer_layers_5_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(157867136)))]; + tensor out_63_cast_fp16 = mul(x = out_61_cast_fp16, y = layer_layers_5_input_layernorm_weight_to_fp16)[name = string("out_63_cast_fp16")]; + tensor var_1512_split_sizes_0 = const()[name = string("op_1512_split_sizes_0"), val = tensor([1024, 1024])]; + int32 var_1512_axis_0 = const()[name = string("op_1512_axis_0"), val = int32(1)]; + tensor var_1512_cast_fp16_0, tensor var_1512_cast_fp16_1 = split(axis = var_1512_axis_0, split_sizes = var_1512_split_sizes_0, x = out_63_cast_fp16)[name = string("op_1512_cast_fp16")]; + string query_states_21_pad_type_0 = const()[name = string("query_states_21_pad_type_0"), val = string("valid")]; + tensor query_states_21_strides_0 = const()[name = string("query_states_21_strides_0"), val = tensor([1, 1])]; + tensor query_states_21_pad_0 = const()[name = string("query_states_21_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor query_states_21_dilations_0 = const()[name = string("query_states_21_dilations_0"), val = tensor([1, 1])]; + int32 query_states_21_groups_0 = const()[name = string("query_states_21_groups_0"), val = int32(1)]; + tensor var_1534_to_fp16 = const()[name = string("op_1534_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(157871296)))]; + tensor query_states_21_cast_fp16 = conv(dilations = query_states_21_dilations_0, groups = query_states_21_groups_0, pad = query_states_21_pad_0, pad_type = query_states_21_pad_type_0, strides = query_states_21_strides_0, weight = var_1534_to_fp16, x = var_1512_cast_fp16_0)[name = string("query_states_21_cast_fp16")]; + string key_states_21_pad_type_0 = const()[name = string("key_states_21_pad_type_0"), val = string("valid")]; + tensor key_states_21_strides_0 = const()[name = string("key_states_21_strides_0"), val = tensor([1, 1])]; + tensor key_states_21_pad_0 = const()[name = string("key_states_21_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor key_states_21_dilations_0 = const()[name = string("key_states_21_dilations_0"), val = tensor([1, 1])]; + int32 key_states_21_groups_0 = const()[name = string("key_states_21_groups_0"), val = int32(1)]; + tensor var_1545_to_fp16 = const()[name = string("op_1545_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(159968512)))]; + tensor key_states_21_cast_fp16 = conv(dilations = key_states_21_dilations_0, groups = key_states_21_groups_0, pad = key_states_21_pad_0, pad_type = key_states_21_pad_type_0, strides = key_states_21_strides_0, weight = var_1545_to_fp16, x = var_1512_cast_fp16_0)[name = string("key_states_21_cast_fp16")]; + string value_states_21_pad_type_0 = const()[name = string("value_states_21_pad_type_0"), val = string("valid")]; + tensor value_states_21_strides_0 = const()[name = string("value_states_21_strides_0"), val = tensor([1, 1])]; + tensor value_states_21_pad_0 = const()[name = string("value_states_21_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor value_states_21_dilations_0 = const()[name = string("value_states_21_dilations_0"), val = tensor([1, 1])]; + int32 value_states_21_groups_0 = const()[name = string("value_states_21_groups_0"), val = int32(1)]; + tensor var_1556_to_fp16 = const()[name = string("op_1556_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(160230720)))]; + tensor value_states_21_cast_fp16 = conv(dilations = value_states_21_dilations_0, groups = value_states_21_groups_0, pad = value_states_21_pad_0, pad_type = value_states_21_pad_type_0, strides = value_states_21_strides_0, weight = var_1556_to_fp16, x = var_1512_cast_fp16_0)[name = string("value_states_21_cast_fp16")]; + tensor var_1564 = const()[name = string("op_1564"), val = tensor([1, 16, 64, 64])]; + tensor embed_21_cast_fp16 = reshape(shape = var_1564, x = query_states_21_cast_fp16)[name = string("embed_21_cast_fp16")]; + tensor var_1568 = const()[name = string("op_1568"), val = tensor([1, 2, 64, 64])]; + tensor var_1569_cast_fp16 = reshape(shape = var_1568, x = key_states_21_cast_fp16)[name = string("op_1569_cast_fp16")]; + tensor embed_perm_0 = const()[name = string("embed_perm_0"), val = tensor([0, 1, 3, 2])]; + tensor var_1575 = const()[name = string("op_1575"), val = tensor([1, 2, 64, 64])]; + tensor var_1576_cast_fp16 = reshape(shape = var_1575, x = value_states_21_cast_fp16)[name = string("op_1576_cast_fp16")]; + tensor value_states_perm_0 = const()[name = string("value_states_perm_0"), val = tensor([0, 1, 3, 2])]; + tensor var_1580_cast_fp16 = mul(x = embed_21_cast_fp16, y = cos_1_cast_fp16)[name = string("op_1580_cast_fp16")]; + tensor var_1581_split_sizes_0 = const()[name = string("op_1581_split_sizes_0"), val = tensor([32, 32])]; + int32 var_1581_axis_0 = const()[name = string("op_1581_axis_0"), val = int32(-2)]; + tensor var_1581_cast_fp16_0, tensor var_1581_cast_fp16_1 = split(axis = var_1581_axis_0, split_sizes = var_1581_split_sizes_0, x = embed_21_cast_fp16)[name = string("op_1581_cast_fp16")]; + fp16 const_57_promoted_to_fp16 = const()[name = string("const_57_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_1583_cast_fp16 = mul(x = var_1581_cast_fp16_1, y = const_57_promoted_to_fp16)[name = string("op_1583_cast_fp16")]; + int32 var_1585 = const()[name = string("op_1585"), val = int32(-2)]; + bool var_1586_interleave_0 = const()[name = string("op_1586_interleave_0"), val = bool(false)]; + tensor var_1586_cast_fp16 = concat(axis = var_1585, interleave = var_1586_interleave_0, values = (var_1583_cast_fp16, var_1581_cast_fp16_0))[name = string("op_1586_cast_fp16")]; + tensor var_1587_cast_fp16 = mul(x = var_1586_cast_fp16, y = sin_1_cast_fp16)[name = string("op_1587_cast_fp16")]; + tensor query_states_cast_fp16 = add(x = var_1580_cast_fp16, y = var_1587_cast_fp16)[name = string("query_states_cast_fp16")]; + tensor embed_cast_fp16 = transpose(perm = embed_perm_0, x = var_1569_cast_fp16)[name = string("transpose_2")]; + tensor var_1590_cast_fp16 = mul(x = embed_cast_fp16, y = cos_cast_fp16)[name = string("op_1590_cast_fp16")]; + tensor var_1591_split_sizes_0 = const()[name = string("op_1591_split_sizes_0"), val = tensor([32, 32])]; + int32 var_1591_axis_0 = const()[name = string("op_1591_axis_0"), val = int32(-1)]; + tensor var_1591_cast_fp16_0, tensor var_1591_cast_fp16_1 = split(axis = var_1591_axis_0, split_sizes = var_1591_split_sizes_0, x = embed_cast_fp16)[name = string("op_1591_cast_fp16")]; + fp16 const_58_promoted_to_fp16 = const()[name = string("const_58_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_1593_cast_fp16 = mul(x = var_1591_cast_fp16_1, y = const_58_promoted_to_fp16)[name = string("op_1593_cast_fp16")]; + int32 var_1595 = const()[name = string("op_1595"), val = int32(-1)]; + bool var_1596_interleave_0 = const()[name = string("op_1596_interleave_0"), val = bool(false)]; + tensor var_1596_cast_fp16 = concat(axis = var_1595, interleave = var_1596_interleave_0, values = (var_1593_cast_fp16, var_1591_cast_fp16_0))[name = string("op_1596_cast_fp16")]; + tensor var_1597_cast_fp16 = mul(x = var_1596_cast_fp16, y = sin_cast_fp16)[name = string("op_1597_cast_fp16")]; + tensor key_states_cast_fp16 = add(x = var_1590_cast_fp16, y = var_1597_cast_fp16)[name = string("key_states_cast_fp16")]; + tensor expand_dims_51 = const()[name = string("expand_dims_51"), val = tensor([5])]; + tensor expand_dims_52 = const()[name = string("expand_dims_52"), val = tensor([0])]; + tensor expand_dims_54 = const()[name = string("expand_dims_54"), val = tensor([6])]; + tensor concat_43_values3_0 = const()[name = string("concat_43_values3_0"), val = tensor([0])]; + int32 concat_43_axis_0 = const()[name = string("concat_43_axis_0"), val = int32(0)]; + bool concat_43_interleave_0 = const()[name = string("concat_43_interleave_0"), val = bool(false)]; + tensor concat_43 = concat(axis = concat_43_axis_0, interleave = concat_43_interleave_0, values = (expand_dims_51, expand_dims_52, position_id, concat_43_values3_0))[name = string("concat_43")]; + tensor concat_44_values1_0 = const()[name = string("concat_44_values1_0"), val = tensor([0])]; + tensor concat_44_values3_0 = const()[name = string("concat_44_values3_0"), val = tensor([0])]; + int32 concat_44_axis_0 = const()[name = string("concat_44_axis_0"), val = int32(0)]; + bool concat_44_interleave_0 = const()[name = string("concat_44_interleave_0"), val = bool(false)]; + tensor concat_44 = concat(axis = concat_44_axis_0, interleave = concat_44_interleave_0, values = (expand_dims_54, concat_44_values1_0, var_246, concat_44_values3_0))[name = string("concat_44")]; + tensor key_cache_internal_tensor_assign_6_stride_0 = const()[name = string("key_cache_internal_tensor_assign_6_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor key_cache_internal_tensor_assign_6_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_6_begin_mask_0"), val = tensor([false, false, false, true])]; + tensor key_cache_internal_tensor_assign_6_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_6_end_mask_0"), val = tensor([false, true, false, true])]; + tensor key_cache_internal_tensor_assign_6_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_6_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor key_cache_internal_tensor_assign_6_cast_fp16 = slice_update(begin = concat_43, begin_mask = key_cache_internal_tensor_assign_6_begin_mask_0, end = concat_44, end_mask = key_cache_internal_tensor_assign_6_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_6_squeeze_mask_0, stride = key_cache_internal_tensor_assign_6_stride_0, update = key_states_cast_fp16, x = coreml_update_state_20)[name = string("key_cache_internal_tensor_assign_6_cast_fp16")]; + write_state(data = key_cache_internal_tensor_assign_6_cast_fp16, input = key_cache)[name = string("coreml_update_state_70_write_state")]; + tensor coreml_update_state_22 = read_state(input = key_cache)[name = string("coreml_update_state_70")]; + tensor value_cache_internal_tensor_assign_6_stride_0 = const()[name = string("value_cache_internal_tensor_assign_6_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor value_cache_internal_tensor_assign_6_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_6_begin_mask_0"), val = tensor([false, false, false, true])]; + tensor value_cache_internal_tensor_assign_6_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_6_end_mask_0"), val = tensor([false, true, false, true])]; + tensor value_cache_internal_tensor_assign_6_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_6_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor value_states_cast_fp16 = transpose(perm = value_states_perm_0, x = var_1576_cast_fp16)[name = string("transpose_1")]; + tensor value_cache_internal_tensor_assign_6_cast_fp16 = slice_update(begin = concat_43, begin_mask = value_cache_internal_tensor_assign_6_begin_mask_0, end = concat_44, end_mask = value_cache_internal_tensor_assign_6_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_6_squeeze_mask_0, stride = value_cache_internal_tensor_assign_6_stride_0, update = value_states_cast_fp16, x = coreml_update_state_21)[name = string("value_cache_internal_tensor_assign_6_cast_fp16")]; + write_state(data = value_cache_internal_tensor_assign_6_cast_fp16, input = value_cache)[name = string("coreml_update_state_71_write_state")]; + tensor coreml_update_state_23 = read_state(input = value_cache)[name = string("coreml_update_state_71")]; + tensor var_1640_begin_0 = const()[name = string("op_1640_begin_0"), val = tensor([5, 0, 0, 0])]; + tensor var_1640_end_0 = const()[name = string("op_1640_end_0"), val = tensor([1, 2, 2048, 64])]; + tensor var_1640_end_mask_0 = const()[name = string("op_1640_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_1640_cast_fp16 = slice_by_index(begin = var_1640_begin_0, end = var_1640_end_0, end_mask = var_1640_end_mask_0, x = coreml_update_state_22)[name = string("op_1640_cast_fp16")]; + tensor tile_10 = const()[name = string("tile_10"), val = tensor([1, 1])]; + int32 var_1643_axis_0 = const()[name = string("op_1643_axis_0"), val = int32(1)]; + tensor var_1643_cast_fp16_0, tensor var_1643_cast_fp16_1 = split(axis = var_1643_axis_0, split_sizes = tile_10, x = var_1640_cast_fp16)[name = string("op_1643_cast_fp16")]; + tensor var_1650_begin_0 = const()[name = string("op_1650_begin_0"), val = tensor([5, 0, 0, 0])]; + tensor var_1650_end_0 = const()[name = string("op_1650_end_0"), val = tensor([1, 2, 2048, 64])]; + tensor var_1650_end_mask_0 = const()[name = string("op_1650_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_1650_cast_fp16 = slice_by_index(begin = var_1650_begin_0, end = var_1650_end_0, end_mask = var_1650_end_mask_0, x = coreml_update_state_23)[name = string("op_1650_cast_fp16")]; + tensor tile_11 = const()[name = string("tile_11"), val = tensor([1, 1])]; + int32 var_1653_axis_0 = const()[name = string("op_1653_axis_0"), val = int32(1)]; + tensor var_1653_cast_fp16_0, tensor var_1653_cast_fp16_1 = split(axis = var_1653_axis_0, split_sizes = tile_11, x = var_1650_cast_fp16)[name = string("op_1653_cast_fp16")]; + tensor var_1656_split_sizes_0 = const()[name = string("op_1656_split_sizes_0"), val = tensor([8, 8])]; + int32 var_1656_axis_0 = const()[name = string("op_1656_axis_0"), val = int32(1)]; + tensor var_1656_cast_fp16_0, tensor var_1656_cast_fp16_1 = split(axis = var_1656_axis_0, split_sizes = var_1656_split_sizes_0, x = query_states_cast_fp16)[name = string("op_1656_cast_fp16")]; + bool attn_weights_81_transpose_x_0 = const()[name = string("attn_weights_81_transpose_x_0"), val = bool(false)]; + bool attn_weights_81_transpose_y_0 = const()[name = string("attn_weights_81_transpose_y_0"), val = bool(false)]; + tensor attn_weights_81_cast_fp16 = matmul(transpose_x = attn_weights_81_transpose_x_0, transpose_y = attn_weights_81_transpose_y_0, x = var_1643_cast_fp16_0, y = var_1656_cast_fp16_0)[name = string("attn_weights_81_cast_fp16")]; + fp16 _inversed_attn_weights_83_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_83_y_0_to_fp16"), val = fp16(0x1p-3)]; + tensor _inversed_attn_weights_83_cast_fp16 = mul(x = attn_weights_81_cast_fp16, y = _inversed_attn_weights_83_y_0_to_fp16)[name = string("_inversed_attn_weights_83_cast_fp16")]; + tensor attn_weights_85_cast_fp16 = add(x = _inversed_attn_weights_83_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_85_cast_fp16")]; + int32 var_1663 = const()[name = string("op_1663"), val = int32(2)]; + tensor attn_weights_87_cast_fp16 = softmax(axis = var_1663, x = attn_weights_85_cast_fp16)[name = string("attn_weights_87_cast_fp16")]; + bool var_1669_transpose_x_1 = const()[name = string("op_1669_transpose_x_1"), val = bool(true)]; + bool var_1669_transpose_y_1 = const()[name = string("op_1669_transpose_y_1"), val = bool(false)]; + tensor var_1669_cast_fp16 = matmul(transpose_x = var_1669_transpose_x_1, transpose_y = var_1669_transpose_y_1, x = attn_weights_87_cast_fp16, y = var_1653_cast_fp16_0)[name = string("op_1669_cast_fp16")]; + bool attn_weights_89_transpose_x_0 = const()[name = string("attn_weights_89_transpose_x_0"), val = bool(false)]; + bool attn_weights_89_transpose_y_0 = const()[name = string("attn_weights_89_transpose_y_0"), val = bool(false)]; + tensor attn_weights_89_cast_fp16 = matmul(transpose_x = attn_weights_89_transpose_x_0, transpose_y = attn_weights_89_transpose_y_0, x = var_1643_cast_fp16_1, y = var_1656_cast_fp16_1)[name = string("attn_weights_89_cast_fp16")]; + fp16 _inversed_attn_weights_91_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_91_y_0_to_fp16"), val = fp16(0x1p-3)]; + tensor _inversed_attn_weights_91_cast_fp16 = mul(x = attn_weights_89_cast_fp16, y = _inversed_attn_weights_91_y_0_to_fp16)[name = string("_inversed_attn_weights_91_cast_fp16")]; + tensor attn_weights_93_cast_fp16 = add(x = _inversed_attn_weights_91_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_93_cast_fp16")]; + int32 var_1675 = const()[name = string("op_1675"), val = int32(2)]; + tensor attn_weights_cast_fp16 = softmax(axis = var_1675, x = attn_weights_93_cast_fp16)[name = string("attn_weights_cast_fp16")]; + bool attn_output_31_transpose_x_1 = const()[name = string("attn_output_31_transpose_x_1"), val = bool(true)]; + bool attn_output_31_transpose_y_1 = const()[name = string("attn_output_31_transpose_y_1"), val = bool(false)]; + tensor attn_output_31_cast_fp16 = matmul(transpose_x = attn_output_31_transpose_x_1, transpose_y = attn_output_31_transpose_y_1, x = attn_weights_cast_fp16, y = var_1653_cast_fp16_1)[name = string("attn_output_31_cast_fp16")]; + int32 var_1683 = const()[name = string("op_1683"), val = int32(1)]; + bool attn_output_33_interleave_0 = const()[name = string("attn_output_33_interleave_0"), val = bool(false)]; + tensor attn_output_33_cast_fp16 = concat(axis = var_1683, interleave = attn_output_33_interleave_0, values = (var_1669_cast_fp16, attn_output_31_cast_fp16))[name = string("attn_output_33_cast_fp16")]; + tensor var_1687_perm_0 = const()[name = string("op_1687_perm_0"), val = tensor([0, 1, 3, 2])]; + tensor var_1692 = const()[name = string("op_1692"), val = tensor([1, 1024, 1, 64])]; + tensor var_1687_cast_fp16 = transpose(perm = var_1687_perm_0, x = attn_output_33_cast_fp16)[name = string("transpose_0")]; + tensor x_95_cast_fp16 = reshape(shape = var_1692, x = var_1687_cast_fp16)[name = string("x_95_cast_fp16")]; + string hidden_states_33_pad_type_0 = const()[name = string("hidden_states_33_pad_type_0"), val = string("valid")]; + tensor hidden_states_33_strides_0 = const()[name = string("hidden_states_33_strides_0"), val = tensor([1, 1])]; + tensor hidden_states_33_pad_0 = const()[name = string("hidden_states_33_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor hidden_states_33_dilations_0 = const()[name = string("hidden_states_33_dilations_0"), val = tensor([1, 1])]; + int32 hidden_states_33_groups_0 = const()[name = string("hidden_states_33_groups_0"), val = int32(1)]; + tensor var_1699_to_fp16 = const()[name = string("op_1699_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(160492928)))]; + tensor hidden_states_33_cast_fp16 = conv(dilations = hidden_states_33_dilations_0, groups = hidden_states_33_groups_0, pad = hidden_states_33_pad_0, pad_type = hidden_states_33_pad_type_0, strides = hidden_states_33_strides_0, weight = var_1699_to_fp16, x = x_95_cast_fp16)[name = string("hidden_states_33_cast_fp16")]; + tensor x_97_cast_fp16 = add(x = x_89_cast_fp16, y = hidden_states_33_cast_fp16)[name = string("x_97_cast_fp16")]; + int32 var_1711 = const()[name = string("op_1711"), val = int32(1)]; + fp16 const_63_promoted_to_fp16 = const()[name = string("const_63_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_1714_cast_fp16 = mul(x = x_97_cast_fp16, y = const_63_promoted_to_fp16)[name = string("op_1714_cast_fp16")]; + bool x_99_interleave_0 = const()[name = string("x_99_interleave_0"), val = bool(false)]; + tensor x_99_cast_fp16 = concat(axis = var_1711, interleave = x_99_interleave_0, values = (x_97_cast_fp16, var_1714_cast_fp16))[name = string("x_99_cast_fp16")]; + tensor out_67_axes_0 = const()[name = string("out_67_axes_0"), val = tensor([1])]; + fp16 var_1724_to_fp16 = const()[name = string("op_1724_to_fp16"), val = fp16(0x1.5p-17)]; + tensor out_67_cast_fp16 = layer_norm(axes = out_67_axes_0, epsilon = var_1724_to_fp16, x = x_99_cast_fp16)[name = string("out_67_cast_fp16")]; + tensor layer_layers_5_post_attention_layernorm_weight_to_fp16 = const()[name = string("layer_layers_5_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(162590144)))]; + tensor out_69_cast_fp16 = mul(x = out_67_cast_fp16, y = layer_layers_5_post_attention_layernorm_weight_to_fp16)[name = string("out_69_cast_fp16")]; + tensor var_1730_split_sizes_0 = const()[name = string("op_1730_split_sizes_0"), val = tensor([1024, 1024])]; + int32 var_1730_axis_0 = const()[name = string("op_1730_axis_0"), val = int32(1)]; + tensor var_1730_cast_fp16_0, tensor var_1730_cast_fp16_1 = split(axis = var_1730_axis_0, split_sizes = var_1730_split_sizes_0, x = out_69_cast_fp16)[name = string("op_1730_cast_fp16")]; + string input_pad_type_0 = const()[name = string("input_pad_type_0"), val = string("valid")]; + tensor input_strides_0 = const()[name = string("input_strides_0"), val = tensor([1, 1])]; + tensor input_pad_0 = const()[name = string("input_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor input_dilations_0 = const()[name = string("input_dilations_0"), val = tensor([1, 1])]; + int32 input_groups_0 = const()[name = string("input_groups_0"), val = int32(1)]; + tensor var_1735_to_fp16 = const()[name = string("op_1735_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(162594304)))]; + tensor input_cast_fp16 = conv(dilations = input_dilations_0, groups = input_groups_0, pad = input_pad_0, pad_type = input_pad_type_0, strides = input_strides_0, weight = var_1735_to_fp16, x = var_1730_cast_fp16_0)[name = string("input_cast_fp16")]; + tensor var_1746_cast_fp16 = silu(x = input_cast_fp16)[name = string("op_1746_cast_fp16")]; + string var_1751_pad_type_0 = const()[name = string("op_1751_pad_type_0"), val = string("valid")]; + tensor var_1751_strides_0 = const()[name = string("op_1751_strides_0"), val = tensor([1, 1])]; + tensor var_1751_pad_0 = const()[name = string("op_1751_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1751_dilations_0 = const()[name = string("op_1751_dilations_0"), val = tensor([1, 1])]; + int32 var_1751_groups_0 = const()[name = string("op_1751_groups_0"), val = int32(1)]; + tensor var_1734_to_fp16 = const()[name = string("op_1734_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(170982976)))]; + tensor var_1751_cast_fp16 = conv(dilations = var_1751_dilations_0, groups = var_1751_groups_0, pad = var_1751_pad_0, pad_type = var_1751_pad_type_0, strides = var_1751_strides_0, weight = var_1734_to_fp16, x = var_1730_cast_fp16_0)[name = string("op_1751_cast_fp16")]; + tensor x_105_cast_fp16 = mul(x = var_1746_cast_fp16, y = var_1751_cast_fp16)[name = string("x_105_cast_fp16")]; + string hidden_states_pad_type_0 = const()[name = string("hidden_states_pad_type_0"), val = string("valid")]; + tensor hidden_states_strides_0 = const()[name = string("hidden_states_strides_0"), val = tensor([1, 1])]; + tensor hidden_states_pad_0 = const()[name = string("hidden_states_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor hidden_states_dilations_0 = const()[name = string("hidden_states_dilations_0"), val = tensor([1, 1])]; + int32 hidden_states_groups_0 = const()[name = string("hidden_states_groups_0"), val = int32(1)]; + tensor var_1733_to_fp16 = const()[name = string("op_1733_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(179371648)))]; + tensor hidden_states_cast_fp16 = conv(dilations = hidden_states_dilations_0, groups = hidden_states_groups_0, pad = hidden_states_pad_0, pad_type = hidden_states_pad_type_0, strides = hidden_states_strides_0, weight = var_1733_to_fp16, x = x_105_cast_fp16)[name = string("hidden_states_cast_fp16")]; + tensor x_107_cast_fp16 = add(x = x_97_cast_fp16, y = hidden_states_cast_fp16)[name = string("x_107_cast_fp16")]; + int32 var_1764 = const()[name = string("op_1764"), val = int32(1)]; + fp16 const_64_promoted_to_fp16 = const()[name = string("const_64_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_1767_cast_fp16 = mul(x = x_107_cast_fp16, y = const_64_promoted_to_fp16)[name = string("op_1767_cast_fp16")]; + bool x_109_interleave_0 = const()[name = string("x_109_interleave_0"), val = bool(false)]; + tensor x_109_cast_fp16 = concat(axis = var_1764, interleave = x_109_interleave_0, values = (x_107_cast_fp16, var_1767_cast_fp16))[name = string("x_109_cast_fp16")]; + tensor out_73_axes_0 = const()[name = string("out_73_axes_0"), val = tensor([1])]; + fp16 var_1777_to_fp16 = const()[name = string("op_1777_to_fp16"), val = fp16(0x1.5p-17)]; + tensor out_73_cast_fp16 = layer_norm(axes = out_73_axes_0, epsilon = var_1777_to_fp16, x = x_109_cast_fp16)[name = string("out_73_cast_fp16")]; + tensor layer_norm_weight_to_fp16 = const()[name = string("layer_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(187760320)))]; + tensor out_75_cast_fp16 = mul(x = out_73_cast_fp16, y = layer_norm_weight_to_fp16)[name = string("out_75_cast_fp16")]; + tensor var_1783_split_sizes_0 = const()[name = string("op_1783_split_sizes_0"), val = tensor([1024, 1024])]; + int32 var_1783_axis_0 = const()[name = string("op_1783_axis_0"), val = int32(1)]; + tensor output, tensor var_1783_cast_fp16_1 = split(axis = var_1783_axis_0, split_sizes = var_1783_split_sizes_0, x = out_75_cast_fp16)[name = string("op_1783_cast_fp16")]; + } -> (output); + func length_8(tensor inputs_embeds, state> key_cache, tensor position_id, state> value_cache) { + tensor var_80 = const()[name = string("op_80"), val = tensor([[0, 1, 2, 3, 4, 5, 6, 7]])]; + tensor position_ids_1 = add(x = var_80, y = position_id)[name = string("position_ids_1")]; + int32 cos_batch_dims_0 = const()[name = string("cos_batch_dims_0"), val = int32(0)]; + bool cos_validate_indices_0 = const()[name = string("cos_validate_indices_0"), val = bool(false)]; + tensor layer_rope_emb_cos_cached_to_fp16 = const()[name = string("layer_rope_emb_cos_cached_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64)))]; + int32 greater_equal_0_y_0 = const()[name = string("greater_equal_0_y_0"), val = int32(0)]; + tensor greater_equal_0 = greater_equal(x = position_ids_1, y = greater_equal_0_y_0)[name = string("greater_equal_0")]; + int32 slice_by_index_0 = const()[name = string("slice_by_index_0"), val = int32(32768)]; + tensor add_0 = add(x = position_ids_1, y = slice_by_index_0)[name = string("add_0")]; + tensor select_0 = select(a = position_ids_1, b = add_0, cond = greater_equal_0)[name = string("select_0")]; + int32 cos_cast_fp16_axis_0 = const()[name = string("cos_cast_fp16_axis_0"), val = int32(0)]; + tensor cos_cast_fp16 = gather(axis = cos_cast_fp16_axis_0, batch_dims = cos_batch_dims_0, indices = select_0, validate_indices = cos_validate_indices_0, x = layer_rope_emb_cos_cached_to_fp16)[name = string("cos_cast_fp16")]; + int32 sin_batch_dims_0 = const()[name = string("sin_batch_dims_0"), val = int32(0)]; + bool sin_validate_indices_0 = const()[name = string("sin_validate_indices_0"), val = bool(false)]; + tensor layer_rope_emb_sin_cached_to_fp16 = const()[name = string("layer_rope_emb_sin_cached_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(4194432)))]; + int32 sin_cast_fp16_axis_0 = const()[name = string("sin_cast_fp16_axis_0"), val = int32(0)]; + tensor sin_cast_fp16 = gather(axis = sin_cast_fp16_axis_0, batch_dims = sin_batch_dims_0, indices = select_0, validate_indices = sin_validate_indices_0, x = layer_rope_emb_sin_cached_to_fp16)[name = string("sin_cast_fp16")]; + tensor var_105 = const()[name = string("op_105"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(8388800)))]; + tensor var_112_axes_0 = const()[name = string("op_112_axes_0"), val = tensor([2])]; + tensor var_112 = expand_dims(axes = var_112_axes_0, x = position_ids_1)[name = string("op_112")]; + tensor var_113 = greater(x = var_105, y = var_112)[name = string("op_113")]; + tensor attention_mask_3_axes_0 = const()[name = string("attention_mask_3_axes_0"), val = tensor([1])]; + string attention_mask_1_to_fp16_dtype_0 = const()[name = string("attention_mask_1_to_fp16_dtype_0"), val = string("fp16")]; + tensor var_113_to_fp16 = cast(dtype = attention_mask_1_to_fp16_dtype_0, x = var_113)[name = string("cast_65")]; + tensor attention_mask_3_cast_fp16 = expand_dims(axes = attention_mask_3_axes_0, x = var_113_to_fp16)[name = string("attention_mask_3_cast_fp16")]; + fp16 var_121_promoted_to_fp16 = const()[name = string("op_121_promoted_to_fp16"), val = fp16(0x0p+0)]; + tensor var_122_cast_fp16 = equal(x = attention_mask_3_cast_fp16, y = var_121_promoted_to_fp16)[name = string("op_122_cast_fp16")]; + tensor var_123_after_broadcast_to_fp16 = const()[name = string("op_123_after_broadcast_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(188289408)))]; + tensor attention_mask_cast_fp16 = select(a = attention_mask_3_cast_fp16, b = var_123_after_broadcast_to_fp16, cond = var_122_cast_fp16)[name = string("attention_mask_cast_fp16")]; + tensor cos_1_perm_0 = const()[name = string("cos_1_perm_0"), val = tensor([0, -1, -2])]; + tensor sin_1_perm_0 = const()[name = string("sin_1_perm_0"), val = tensor([0, -1, -2])]; + tensor attn_mask_perm_0 = const()[name = string("attn_mask_perm_0"), val = tensor([0, 1, -1, -2])]; + int32 var_138 = const()[name = string("op_138"), val = int32(1)]; + fp16 const_4_promoted_to_fp16 = const()[name = string("const_4_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_141_cast_fp16 = mul(x = inputs_embeds, y = const_4_promoted_to_fp16)[name = string("op_141_cast_fp16")]; + bool x_1_interleave_0 = const()[name = string("x_1_interleave_0"), val = bool(false)]; + tensor x_1_cast_fp16 = concat(axis = var_138, interleave = x_1_interleave_0, values = (inputs_embeds, var_141_cast_fp16))[name = string("x_1_cast_fp16")]; + tensor out_1_axes_0 = const()[name = string("out_1_axes_0"), val = tensor([1])]; + fp16 var_151_to_fp16 = const()[name = string("op_151_to_fp16"), val = fp16(0x1.5p-17)]; + tensor out_1_cast_fp16 = layer_norm(axes = out_1_axes_0, epsilon = var_151_to_fp16, x = x_1_cast_fp16)[name = string("out_1_cast_fp16")]; + tensor layer_layers_0_input_layernorm_weight_to_fp16 = const()[name = string("layer_layers_0_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(8401216)))]; + tensor out_3_cast_fp16 = mul(x = out_1_cast_fp16, y = layer_layers_0_input_layernorm_weight_to_fp16)[name = string("out_3_cast_fp16")]; + tensor var_157_split_sizes_0 = const()[name = string("op_157_split_sizes_0"), val = tensor([1024, 1024])]; + int32 var_157_axis_0 = const()[name = string("op_157_axis_0"), val = int32(1)]; + tensor var_157_cast_fp16_0, tensor var_157_cast_fp16_1 = split(axis = var_157_axis_0, split_sizes = var_157_split_sizes_0, x = out_3_cast_fp16)[name = string("op_157_cast_fp16")]; + tensor q_len_1 = const()[name = string("q_len_1"), val = tensor([8])]; + string query_states_1_pad_type_0 = const()[name = string("query_states_1_pad_type_0"), val = string("valid")]; + tensor query_states_1_strides_0 = const()[name = string("query_states_1_strides_0"), val = tensor([1, 1])]; + tensor query_states_1_pad_0 = const()[name = string("query_states_1_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor query_states_1_dilations_0 = const()[name = string("query_states_1_dilations_0"), val = tensor([1, 1])]; + int32 query_states_1_groups_0 = const()[name = string("query_states_1_groups_0"), val = int32(1)]; + tensor var_179_to_fp16 = const()[name = string("op_179_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(8405376)))]; + tensor query_states_1_cast_fp16 = conv(dilations = query_states_1_dilations_0, groups = query_states_1_groups_0, pad = query_states_1_pad_0, pad_type = query_states_1_pad_type_0, strides = query_states_1_strides_0, weight = var_179_to_fp16, x = var_157_cast_fp16_0)[name = string("query_states_1_cast_fp16")]; + string key_states_1_pad_type_0 = const()[name = string("key_states_1_pad_type_0"), val = string("valid")]; + tensor key_states_1_strides_0 = const()[name = string("key_states_1_strides_0"), val = tensor([1, 1])]; + tensor key_states_1_pad_0 = const()[name = string("key_states_1_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor key_states_1_dilations_0 = const()[name = string("key_states_1_dilations_0"), val = tensor([1, 1])]; + int32 key_states_1_groups_0 = const()[name = string("key_states_1_groups_0"), val = int32(1)]; + tensor var_190_to_fp16 = const()[name = string("op_190_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(10502592)))]; + tensor key_states_1_cast_fp16 = conv(dilations = key_states_1_dilations_0, groups = key_states_1_groups_0, pad = key_states_1_pad_0, pad_type = key_states_1_pad_type_0, strides = key_states_1_strides_0, weight = var_190_to_fp16, x = var_157_cast_fp16_0)[name = string("key_states_1_cast_fp16")]; + string value_states_1_pad_type_0 = const()[name = string("value_states_1_pad_type_0"), val = string("valid")]; + tensor value_states_1_strides_0 = const()[name = string("value_states_1_strides_0"), val = tensor([1, 1])]; + tensor value_states_1_pad_0 = const()[name = string("value_states_1_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor value_states_1_dilations_0 = const()[name = string("value_states_1_dilations_0"), val = tensor([1, 1])]; + int32 value_states_1_groups_0 = const()[name = string("value_states_1_groups_0"), val = int32(1)]; + tensor var_201_to_fp16 = const()[name = string("op_201_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(10764800)))]; + tensor value_states_1_cast_fp16 = conv(dilations = value_states_1_dilations_0, groups = value_states_1_groups_0, pad = value_states_1_pad_0, pad_type = value_states_1_pad_type_0, strides = value_states_1_strides_0, weight = var_201_to_fp16, x = var_157_cast_fp16_0)[name = string("value_states_1_cast_fp16")]; + tensor var_209 = const()[name = string("op_209"), val = tensor([1, 16, 64, 8])]; + tensor embed_1_cast_fp16 = reshape(shape = var_209, x = query_states_1_cast_fp16)[name = string("embed_1_cast_fp16")]; + tensor var_213 = const()[name = string("op_213"), val = tensor([1, 2, 64, 8])]; + tensor var_214_cast_fp16 = reshape(shape = var_213, x = key_states_1_cast_fp16)[name = string("op_214_cast_fp16")]; + tensor embed_3_perm_0 = const()[name = string("embed_3_perm_0"), val = tensor([0, 1, 3, 2])]; + tensor var_220 = const()[name = string("op_220"), val = tensor([1, 2, 64, 8])]; + tensor var_221_cast_fp16 = reshape(shape = var_220, x = value_states_1_cast_fp16)[name = string("op_221_cast_fp16")]; + tensor value_states_3_perm_0 = const()[name = string("value_states_3_perm_0"), val = tensor([0, 1, 3, 2])]; + tensor cos_1_cast_fp16 = transpose(perm = cos_1_perm_0, x = cos_cast_fp16)[name = string("transpose_20")]; + tensor var_225_cast_fp16 = mul(x = embed_1_cast_fp16, y = cos_1_cast_fp16)[name = string("op_225_cast_fp16")]; + tensor var_226_split_sizes_0 = const()[name = string("op_226_split_sizes_0"), val = tensor([32, 32])]; + int32 var_226_axis_0 = const()[name = string("op_226_axis_0"), val = int32(-2)]; + tensor var_226_cast_fp16_0, tensor var_226_cast_fp16_1 = split(axis = var_226_axis_0, split_sizes = var_226_split_sizes_0, x = embed_1_cast_fp16)[name = string("op_226_cast_fp16")]; + fp16 const_7_promoted_to_fp16 = const()[name = string("const_7_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_228_cast_fp16 = mul(x = var_226_cast_fp16_1, y = const_7_promoted_to_fp16)[name = string("op_228_cast_fp16")]; + int32 var_230 = const()[name = string("op_230"), val = int32(-2)]; + bool var_231_interleave_0 = const()[name = string("op_231_interleave_0"), val = bool(false)]; + tensor var_231_cast_fp16 = concat(axis = var_230, interleave = var_231_interleave_0, values = (var_228_cast_fp16, var_226_cast_fp16_0))[name = string("op_231_cast_fp16")]; + tensor sin_1_cast_fp16 = transpose(perm = sin_1_perm_0, x = sin_cast_fp16)[name = string("transpose_19")]; + tensor var_232_cast_fp16 = mul(x = var_231_cast_fp16, y = sin_1_cast_fp16)[name = string("op_232_cast_fp16")]; + tensor query_states_3_cast_fp16 = add(x = var_225_cast_fp16, y = var_232_cast_fp16)[name = string("query_states_3_cast_fp16")]; + tensor embed_3_cast_fp16 = transpose(perm = embed_3_perm_0, x = var_214_cast_fp16)[name = string("transpose_17")]; + tensor var_235_cast_fp16 = mul(x = embed_3_cast_fp16, y = cos_cast_fp16)[name = string("op_235_cast_fp16")]; + tensor var_236_split_sizes_0 = const()[name = string("op_236_split_sizes_0"), val = tensor([32, 32])]; + int32 var_236_axis_0 = const()[name = string("op_236_axis_0"), val = int32(-1)]; + tensor var_236_cast_fp16_0, tensor var_236_cast_fp16_1 = split(axis = var_236_axis_0, split_sizes = var_236_split_sizes_0, x = embed_3_cast_fp16)[name = string("op_236_cast_fp16")]; + fp16 const_8_promoted_to_fp16 = const()[name = string("const_8_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_238_cast_fp16 = mul(x = var_236_cast_fp16_1, y = const_8_promoted_to_fp16)[name = string("op_238_cast_fp16")]; + int32 var_240 = const()[name = string("op_240"), val = int32(-1)]; + bool var_241_interleave_0 = const()[name = string("op_241_interleave_0"), val = bool(false)]; + tensor var_241_cast_fp16 = concat(axis = var_240, interleave = var_241_interleave_0, values = (var_238_cast_fp16, var_236_cast_fp16_0))[name = string("op_241_cast_fp16")]; + tensor var_242_cast_fp16 = mul(x = var_241_cast_fp16, y = sin_cast_fp16)[name = string("op_242_cast_fp16")]; + tensor key_states_3_cast_fp16 = add(x = var_235_cast_fp16, y = var_242_cast_fp16)[name = string("key_states_3_cast_fp16")]; + tensor var_246 = add(x = position_id, y = q_len_1)[name = string("op_246")]; + tensor read_state_0 = read_state(input = key_cache)[name = string("read_state_0")]; + tensor expand_dims_1 = const()[name = string("expand_dims_1"), val = tensor([0])]; + tensor expand_dims_2 = const()[name = string("expand_dims_2"), val = tensor([0])]; + tensor expand_dims_4 = const()[name = string("expand_dims_4"), val = tensor([1])]; + tensor concat_3_values3_0 = const()[name = string("concat_3_values3_0"), val = tensor([0])]; + int32 concat_3_axis_0 = const()[name = string("concat_3_axis_0"), val = int32(0)]; + bool concat_3_interleave_0 = const()[name = string("concat_3_interleave_0"), val = bool(false)]; + tensor concat_3 = concat(axis = concat_3_axis_0, interleave = concat_3_interleave_0, values = (expand_dims_1, expand_dims_2, position_id, concat_3_values3_0))[name = string("concat_3")]; + tensor concat_4_values1_0 = const()[name = string("concat_4_values1_0"), val = tensor([0])]; + tensor concat_4_values3_0 = const()[name = string("concat_4_values3_0"), val = tensor([0])]; + int32 concat_4_axis_0 = const()[name = string("concat_4_axis_0"), val = int32(0)]; + bool concat_4_interleave_0 = const()[name = string("concat_4_interleave_0"), val = bool(false)]; + tensor concat_4 = concat(axis = concat_4_axis_0, interleave = concat_4_interleave_0, values = (expand_dims_4, concat_4_values1_0, var_246, concat_4_values3_0))[name = string("concat_4")]; + tensor key_cache_internal_tensor_assign_1_stride_0 = const()[name = string("key_cache_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor key_cache_internal_tensor_assign_1_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false, true])]; + tensor key_cache_internal_tensor_assign_1_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_1_end_mask_0"), val = tensor([false, true, false, true])]; + tensor key_cache_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor key_cache_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_3, begin_mask = key_cache_internal_tensor_assign_1_begin_mask_0, end = concat_4, end_mask = key_cache_internal_tensor_assign_1_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_1_squeeze_mask_0, stride = key_cache_internal_tensor_assign_1_stride_0, update = key_states_3_cast_fp16, x = read_state_0)[name = string("key_cache_internal_tensor_assign_1_cast_fp16")]; + write_state(data = key_cache_internal_tensor_assign_1_cast_fp16, input = key_cache)[name = string("coreml_update_state_24_write_state")]; + tensor coreml_update_state_12 = read_state(input = key_cache)[name = string("coreml_update_state_24")]; + tensor read_state_1 = read_state(input = value_cache)[name = string("read_state_1")]; + tensor value_cache_internal_tensor_assign_1_stride_0 = const()[name = string("value_cache_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor value_cache_internal_tensor_assign_1_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false, true])]; + tensor value_cache_internal_tensor_assign_1_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_1_end_mask_0"), val = tensor([false, true, false, true])]; + tensor value_cache_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor value_states_3_cast_fp16 = transpose(perm = value_states_3_perm_0, x = var_221_cast_fp16)[name = string("transpose_16")]; + tensor value_cache_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_3, begin_mask = value_cache_internal_tensor_assign_1_begin_mask_0, end = concat_4, end_mask = value_cache_internal_tensor_assign_1_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_1_squeeze_mask_0, stride = value_cache_internal_tensor_assign_1_stride_0, update = value_states_3_cast_fp16, x = read_state_1)[name = string("value_cache_internal_tensor_assign_1_cast_fp16")]; + write_state(data = value_cache_internal_tensor_assign_1_cast_fp16, input = value_cache)[name = string("coreml_update_state_25_write_state")]; + tensor coreml_update_state_13 = read_state(input = value_cache)[name = string("coreml_update_state_25")]; + tensor var_285_begin_0 = const()[name = string("op_285_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_285_end_0 = const()[name = string("op_285_end_0"), val = tensor([1, 2, 2048, 64])]; + tensor var_285_end_mask_0 = const()[name = string("op_285_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_285_cast_fp16 = slice_by_index(begin = var_285_begin_0, end = var_285_end_0, end_mask = var_285_end_mask_0, x = coreml_update_state_12)[name = string("op_285_cast_fp16")]; + tensor tile_0 = const()[name = string("tile_0"), val = tensor([1, 1])]; + int32 var_288_axis_0 = const()[name = string("op_288_axis_0"), val = int32(1)]; + tensor var_288_cast_fp16_0, tensor var_288_cast_fp16_1 = split(axis = var_288_axis_0, split_sizes = tile_0, x = var_285_cast_fp16)[name = string("op_288_cast_fp16")]; + tensor var_295_begin_0 = const()[name = string("op_295_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_295_end_0 = const()[name = string("op_295_end_0"), val = tensor([1, 2, 2048, 64])]; + tensor var_295_end_mask_0 = const()[name = string("op_295_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_295_cast_fp16 = slice_by_index(begin = var_295_begin_0, end = var_295_end_0, end_mask = var_295_end_mask_0, x = coreml_update_state_13)[name = string("op_295_cast_fp16")]; + tensor tile_1 = const()[name = string("tile_1"), val = tensor([1, 1])]; + int32 var_298_axis_0 = const()[name = string("op_298_axis_0"), val = int32(1)]; + tensor var_298_cast_fp16_0, tensor var_298_cast_fp16_1 = split(axis = var_298_axis_0, split_sizes = tile_1, x = var_295_cast_fp16)[name = string("op_298_cast_fp16")]; + tensor var_301_split_sizes_0 = const()[name = string("op_301_split_sizes_0"), val = tensor([8, 8])]; + int32 var_301_axis_0 = const()[name = string("op_301_axis_0"), val = int32(1)]; + tensor var_301_cast_fp16_0, tensor var_301_cast_fp16_1 = split(axis = var_301_axis_0, split_sizes = var_301_split_sizes_0, x = query_states_3_cast_fp16)[name = string("op_301_cast_fp16")]; + bool attn_weights_1_transpose_x_0 = const()[name = string("attn_weights_1_transpose_x_0"), val = bool(false)]; + bool attn_weights_1_transpose_y_0 = const()[name = string("attn_weights_1_transpose_y_0"), val = bool(false)]; + tensor attn_weights_1_cast_fp16 = matmul(transpose_x = attn_weights_1_transpose_x_0, transpose_y = attn_weights_1_transpose_y_0, x = var_288_cast_fp16_0, y = var_301_cast_fp16_0)[name = string("attn_weights_1_cast_fp16")]; + fp16 _inversed_attn_weights_3_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_3_y_0_to_fp16"), val = fp16(0x1p-3)]; + tensor _inversed_attn_weights_3_cast_fp16 = mul(x = attn_weights_1_cast_fp16, y = _inversed_attn_weights_3_y_0_to_fp16)[name = string("_inversed_attn_weights_3_cast_fp16")]; + tensor attn_mask_cast_fp16 = transpose(perm = attn_mask_perm_0, x = attention_mask_cast_fp16)[name = string("transpose_18")]; + tensor attn_weights_5_cast_fp16 = add(x = _inversed_attn_weights_3_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_5_cast_fp16")]; + int32 var_308 = const()[name = string("op_308"), val = int32(2)]; + tensor attn_weights_7_cast_fp16 = softmax(axis = var_308, x = attn_weights_5_cast_fp16)[name = string("attn_weights_7_cast_fp16")]; + bool var_314_transpose_x_1 = const()[name = string("op_314_transpose_x_1"), val = bool(true)]; + bool var_314_transpose_y_1 = const()[name = string("op_314_transpose_y_1"), val = bool(false)]; + tensor var_314_cast_fp16 = matmul(transpose_x = var_314_transpose_x_1, transpose_y = var_314_transpose_y_1, x = attn_weights_7_cast_fp16, y = var_298_cast_fp16_0)[name = string("op_314_cast_fp16")]; + bool attn_weights_9_transpose_x_0 = const()[name = string("attn_weights_9_transpose_x_0"), val = bool(false)]; + bool attn_weights_9_transpose_y_0 = const()[name = string("attn_weights_9_transpose_y_0"), val = bool(false)]; + tensor attn_weights_9_cast_fp16 = matmul(transpose_x = attn_weights_9_transpose_x_0, transpose_y = attn_weights_9_transpose_y_0, x = var_288_cast_fp16_1, y = var_301_cast_fp16_1)[name = string("attn_weights_9_cast_fp16")]; + fp16 _inversed_attn_weights_11_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_11_y_0_to_fp16"), val = fp16(0x1p-3)]; + tensor _inversed_attn_weights_11_cast_fp16 = mul(x = attn_weights_9_cast_fp16, y = _inversed_attn_weights_11_y_0_to_fp16)[name = string("_inversed_attn_weights_11_cast_fp16")]; + tensor attn_weights_13_cast_fp16 = add(x = _inversed_attn_weights_11_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_13_cast_fp16")]; + int32 var_320 = const()[name = string("op_320"), val = int32(2)]; + tensor attn_weights_15_cast_fp16 = softmax(axis = var_320, x = attn_weights_13_cast_fp16)[name = string("attn_weights_15_cast_fp16")]; + bool attn_output_1_transpose_x_1 = const()[name = string("attn_output_1_transpose_x_1"), val = bool(true)]; + bool attn_output_1_transpose_y_1 = const()[name = string("attn_output_1_transpose_y_1"), val = bool(false)]; + tensor attn_output_1_cast_fp16 = matmul(transpose_x = attn_output_1_transpose_x_1, transpose_y = attn_output_1_transpose_y_1, x = attn_weights_15_cast_fp16, y = var_298_cast_fp16_1)[name = string("attn_output_1_cast_fp16")]; + int32 var_328 = const()[name = string("op_328"), val = int32(1)]; + bool attn_output_3_interleave_0 = const()[name = string("attn_output_3_interleave_0"), val = bool(false)]; + tensor attn_output_3_cast_fp16 = concat(axis = var_328, interleave = attn_output_3_interleave_0, values = (var_314_cast_fp16, attn_output_1_cast_fp16))[name = string("attn_output_3_cast_fp16")]; + tensor var_332_perm_0 = const()[name = string("op_332_perm_0"), val = tensor([0, 1, 3, 2])]; + tensor var_337 = const()[name = string("op_337"), val = tensor([1, 1024, 1, 8])]; + tensor var_332_cast_fp16 = transpose(perm = var_332_perm_0, x = attn_output_3_cast_fp16)[name = string("transpose_15")]; + tensor x_5_cast_fp16 = reshape(shape = var_337, x = var_332_cast_fp16)[name = string("x_5_cast_fp16")]; + string hidden_states_3_pad_type_0 = const()[name = string("hidden_states_3_pad_type_0"), val = string("valid")]; + tensor hidden_states_3_strides_0 = const()[name = string("hidden_states_3_strides_0"), val = tensor([1, 1])]; + tensor hidden_states_3_pad_0 = const()[name = string("hidden_states_3_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor hidden_states_3_dilations_0 = const()[name = string("hidden_states_3_dilations_0"), val = tensor([1, 1])]; + int32 hidden_states_3_groups_0 = const()[name = string("hidden_states_3_groups_0"), val = int32(1)]; + tensor var_344_to_fp16 = const()[name = string("op_344_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(11027008)))]; + tensor hidden_states_3_cast_fp16 = conv(dilations = hidden_states_3_dilations_0, groups = hidden_states_3_groups_0, pad = hidden_states_3_pad_0, pad_type = hidden_states_3_pad_type_0, strides = hidden_states_3_strides_0, weight = var_344_to_fp16, x = x_5_cast_fp16)[name = string("hidden_states_3_cast_fp16")]; + tensor x_7_cast_fp16 = add(x = inputs_embeds, y = hidden_states_3_cast_fp16)[name = string("x_7_cast_fp16")]; + int32 var_356 = const()[name = string("op_356"), val = int32(1)]; + fp16 const_13_promoted_to_fp16 = const()[name = string("const_13_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_359_cast_fp16 = mul(x = x_7_cast_fp16, y = const_13_promoted_to_fp16)[name = string("op_359_cast_fp16")]; + bool x_9_interleave_0 = const()[name = string("x_9_interleave_0"), val = bool(false)]; + tensor x_9_cast_fp16 = concat(axis = var_356, interleave = x_9_interleave_0, values = (x_7_cast_fp16, var_359_cast_fp16))[name = string("x_9_cast_fp16")]; + tensor out_7_axes_0 = const()[name = string("out_7_axes_0"), val = tensor([1])]; + fp16 var_369_to_fp16 = const()[name = string("op_369_to_fp16"), val = fp16(0x1.5p-17)]; + tensor out_7_cast_fp16 = layer_norm(axes = out_7_axes_0, epsilon = var_369_to_fp16, x = x_9_cast_fp16)[name = string("out_7_cast_fp16")]; + tensor layer_layers_0_post_attention_layernorm_weight_to_fp16 = const()[name = string("layer_layers_0_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(13124224)))]; + tensor out_9_cast_fp16 = mul(x = out_7_cast_fp16, y = layer_layers_0_post_attention_layernorm_weight_to_fp16)[name = string("out_9_cast_fp16")]; + tensor var_375_split_sizes_0 = const()[name = string("op_375_split_sizes_0"), val = tensor([1024, 1024])]; + int32 var_375_axis_0 = const()[name = string("op_375_axis_0"), val = int32(1)]; + tensor var_375_cast_fp16_0, tensor var_375_cast_fp16_1 = split(axis = var_375_axis_0, split_sizes = var_375_split_sizes_0, x = out_9_cast_fp16)[name = string("op_375_cast_fp16")]; + string input_1_pad_type_0 = const()[name = string("input_1_pad_type_0"), val = string("valid")]; + tensor input_1_strides_0 = const()[name = string("input_1_strides_0"), val = tensor([1, 1])]; + tensor input_1_pad_0 = const()[name = string("input_1_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor input_1_dilations_0 = const()[name = string("input_1_dilations_0"), val = tensor([1, 1])]; + int32 input_1_groups_0 = const()[name = string("input_1_groups_0"), val = int32(1)]; + tensor var_380_to_fp16 = const()[name = string("op_380_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(13128384)))]; + tensor input_1_cast_fp16 = conv(dilations = input_1_dilations_0, groups = input_1_groups_0, pad = input_1_pad_0, pad_type = input_1_pad_type_0, strides = input_1_strides_0, weight = var_380_to_fp16, x = var_375_cast_fp16_0)[name = string("input_1_cast_fp16")]; + tensor var_391_cast_fp16 = silu(x = input_1_cast_fp16)[name = string("op_391_cast_fp16")]; + string var_396_pad_type_0 = const()[name = string("op_396_pad_type_0"), val = string("valid")]; + tensor var_396_strides_0 = const()[name = string("op_396_strides_0"), val = tensor([1, 1])]; + tensor var_396_pad_0 = const()[name = string("op_396_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_396_dilations_0 = const()[name = string("op_396_dilations_0"), val = tensor([1, 1])]; + int32 var_396_groups_0 = const()[name = string("op_396_groups_0"), val = int32(1)]; + tensor var_379_to_fp16 = const()[name = string("op_379_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(21517056)))]; + tensor var_396_cast_fp16 = conv(dilations = var_396_dilations_0, groups = var_396_groups_0, pad = var_396_pad_0, pad_type = var_396_pad_type_0, strides = var_396_strides_0, weight = var_379_to_fp16, x = var_375_cast_fp16_0)[name = string("op_396_cast_fp16")]; + tensor x_15_cast_fp16 = mul(x = var_391_cast_fp16, y = var_396_cast_fp16)[name = string("x_15_cast_fp16")]; + string hidden_states_5_pad_type_0 = const()[name = string("hidden_states_5_pad_type_0"), val = string("valid")]; + tensor hidden_states_5_strides_0 = const()[name = string("hidden_states_5_strides_0"), val = tensor([1, 1])]; + tensor hidden_states_5_pad_0 = const()[name = string("hidden_states_5_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor hidden_states_5_dilations_0 = const()[name = string("hidden_states_5_dilations_0"), val = tensor([1, 1])]; + int32 hidden_states_5_groups_0 = const()[name = string("hidden_states_5_groups_0"), val = int32(1)]; + tensor var_378_to_fp16 = const()[name = string("op_378_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(29905728)))]; + tensor hidden_states_5_cast_fp16 = conv(dilations = hidden_states_5_dilations_0, groups = hidden_states_5_groups_0, pad = hidden_states_5_pad_0, pad_type = hidden_states_5_pad_type_0, strides = hidden_states_5_strides_0, weight = var_378_to_fp16, x = x_15_cast_fp16)[name = string("hidden_states_5_cast_fp16")]; + tensor x_17_cast_fp16 = add(x = x_7_cast_fp16, y = hidden_states_5_cast_fp16)[name = string("x_17_cast_fp16")]; + int32 var_409 = const()[name = string("op_409"), val = int32(1)]; + fp16 const_14_promoted_to_fp16 = const()[name = string("const_14_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_412_cast_fp16 = mul(x = x_17_cast_fp16, y = const_14_promoted_to_fp16)[name = string("op_412_cast_fp16")]; + bool x_19_interleave_0 = const()[name = string("x_19_interleave_0"), val = bool(false)]; + tensor x_19_cast_fp16 = concat(axis = var_409, interleave = x_19_interleave_0, values = (x_17_cast_fp16, var_412_cast_fp16))[name = string("x_19_cast_fp16")]; + tensor out_13_axes_0 = const()[name = string("out_13_axes_0"), val = tensor([1])]; + fp16 var_422_to_fp16 = const()[name = string("op_422_to_fp16"), val = fp16(0x1.5p-17)]; + tensor out_13_cast_fp16 = layer_norm(axes = out_13_axes_0, epsilon = var_422_to_fp16, x = x_19_cast_fp16)[name = string("out_13_cast_fp16")]; + tensor layer_layers_1_input_layernorm_weight_to_fp16 = const()[name = string("layer_layers_1_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(38294400)))]; + tensor out_15_cast_fp16 = mul(x = out_13_cast_fp16, y = layer_layers_1_input_layernorm_weight_to_fp16)[name = string("out_15_cast_fp16")]; + tensor var_428_split_sizes_0 = const()[name = string("op_428_split_sizes_0"), val = tensor([1024, 1024])]; + int32 var_428_axis_0 = const()[name = string("op_428_axis_0"), val = int32(1)]; + tensor var_428_cast_fp16_0, tensor var_428_cast_fp16_1 = split(axis = var_428_axis_0, split_sizes = var_428_split_sizes_0, x = out_15_cast_fp16)[name = string("op_428_cast_fp16")]; + string query_states_5_pad_type_0 = const()[name = string("query_states_5_pad_type_0"), val = string("valid")]; + tensor query_states_5_strides_0 = const()[name = string("query_states_5_strides_0"), val = tensor([1, 1])]; + tensor query_states_5_pad_0 = const()[name = string("query_states_5_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor query_states_5_dilations_0 = const()[name = string("query_states_5_dilations_0"), val = tensor([1, 1])]; + int32 query_states_5_groups_0 = const()[name = string("query_states_5_groups_0"), val = int32(1)]; + tensor var_450_to_fp16 = const()[name = string("op_450_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(38298560)))]; + tensor query_states_5_cast_fp16 = conv(dilations = query_states_5_dilations_0, groups = query_states_5_groups_0, pad = query_states_5_pad_0, pad_type = query_states_5_pad_type_0, strides = query_states_5_strides_0, weight = var_450_to_fp16, x = var_428_cast_fp16_0)[name = string("query_states_5_cast_fp16")]; + string key_states_5_pad_type_0 = const()[name = string("key_states_5_pad_type_0"), val = string("valid")]; + tensor key_states_5_strides_0 = const()[name = string("key_states_5_strides_0"), val = tensor([1, 1])]; + tensor key_states_5_pad_0 = const()[name = string("key_states_5_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor key_states_5_dilations_0 = const()[name = string("key_states_5_dilations_0"), val = tensor([1, 1])]; + int32 key_states_5_groups_0 = const()[name = string("key_states_5_groups_0"), val = int32(1)]; + tensor var_461_to_fp16 = const()[name = string("op_461_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(40395776)))]; + tensor key_states_5_cast_fp16 = conv(dilations = key_states_5_dilations_0, groups = key_states_5_groups_0, pad = key_states_5_pad_0, pad_type = key_states_5_pad_type_0, strides = key_states_5_strides_0, weight = var_461_to_fp16, x = var_428_cast_fp16_0)[name = string("key_states_5_cast_fp16")]; + string value_states_5_pad_type_0 = const()[name = string("value_states_5_pad_type_0"), val = string("valid")]; + tensor value_states_5_strides_0 = const()[name = string("value_states_5_strides_0"), val = tensor([1, 1])]; + tensor value_states_5_pad_0 = const()[name = string("value_states_5_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor value_states_5_dilations_0 = const()[name = string("value_states_5_dilations_0"), val = tensor([1, 1])]; + int32 value_states_5_groups_0 = const()[name = string("value_states_5_groups_0"), val = int32(1)]; + tensor var_472_to_fp16 = const()[name = string("op_472_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(40657984)))]; + tensor value_states_5_cast_fp16 = conv(dilations = value_states_5_dilations_0, groups = value_states_5_groups_0, pad = value_states_5_pad_0, pad_type = value_states_5_pad_type_0, strides = value_states_5_strides_0, weight = var_472_to_fp16, x = var_428_cast_fp16_0)[name = string("value_states_5_cast_fp16")]; + tensor var_480 = const()[name = string("op_480"), val = tensor([1, 16, 64, 8])]; + tensor embed_5_cast_fp16 = reshape(shape = var_480, x = query_states_5_cast_fp16)[name = string("embed_5_cast_fp16")]; + tensor var_484 = const()[name = string("op_484"), val = tensor([1, 2, 64, 8])]; + tensor var_485_cast_fp16 = reshape(shape = var_484, x = key_states_5_cast_fp16)[name = string("op_485_cast_fp16")]; + tensor embed_7_perm_0 = const()[name = string("embed_7_perm_0"), val = tensor([0, 1, 3, 2])]; + tensor var_491 = const()[name = string("op_491"), val = tensor([1, 2, 64, 8])]; + tensor var_492_cast_fp16 = reshape(shape = var_491, x = value_states_5_cast_fp16)[name = string("op_492_cast_fp16")]; + tensor value_states_7_perm_0 = const()[name = string("value_states_7_perm_0"), val = tensor([0, 1, 3, 2])]; + tensor var_496_cast_fp16 = mul(x = embed_5_cast_fp16, y = cos_1_cast_fp16)[name = string("op_496_cast_fp16")]; + tensor var_497_split_sizes_0 = const()[name = string("op_497_split_sizes_0"), val = tensor([32, 32])]; + int32 var_497_axis_0 = const()[name = string("op_497_axis_0"), val = int32(-2)]; + tensor var_497_cast_fp16_0, tensor var_497_cast_fp16_1 = split(axis = var_497_axis_0, split_sizes = var_497_split_sizes_0, x = embed_5_cast_fp16)[name = string("op_497_cast_fp16")]; + fp16 const_17_promoted_to_fp16 = const()[name = string("const_17_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_499_cast_fp16 = mul(x = var_497_cast_fp16_1, y = const_17_promoted_to_fp16)[name = string("op_499_cast_fp16")]; + int32 var_501 = const()[name = string("op_501"), val = int32(-2)]; + bool var_502_interleave_0 = const()[name = string("op_502_interleave_0"), val = bool(false)]; + tensor var_502_cast_fp16 = concat(axis = var_501, interleave = var_502_interleave_0, values = (var_499_cast_fp16, var_497_cast_fp16_0))[name = string("op_502_cast_fp16")]; + tensor var_503_cast_fp16 = mul(x = var_502_cast_fp16, y = sin_1_cast_fp16)[name = string("op_503_cast_fp16")]; + tensor query_states_7_cast_fp16 = add(x = var_496_cast_fp16, y = var_503_cast_fp16)[name = string("query_states_7_cast_fp16")]; + tensor embed_7_cast_fp16 = transpose(perm = embed_7_perm_0, x = var_485_cast_fp16)[name = string("transpose_14")]; + tensor var_506_cast_fp16 = mul(x = embed_7_cast_fp16, y = cos_cast_fp16)[name = string("op_506_cast_fp16")]; + tensor var_507_split_sizes_0 = const()[name = string("op_507_split_sizes_0"), val = tensor([32, 32])]; + int32 var_507_axis_0 = const()[name = string("op_507_axis_0"), val = int32(-1)]; + tensor var_507_cast_fp16_0, tensor var_507_cast_fp16_1 = split(axis = var_507_axis_0, split_sizes = var_507_split_sizes_0, x = embed_7_cast_fp16)[name = string("op_507_cast_fp16")]; + fp16 const_18_promoted_to_fp16 = const()[name = string("const_18_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_509_cast_fp16 = mul(x = var_507_cast_fp16_1, y = const_18_promoted_to_fp16)[name = string("op_509_cast_fp16")]; + int32 var_511 = const()[name = string("op_511"), val = int32(-1)]; + bool var_512_interleave_0 = const()[name = string("op_512_interleave_0"), val = bool(false)]; + tensor var_512_cast_fp16 = concat(axis = var_511, interleave = var_512_interleave_0, values = (var_509_cast_fp16, var_507_cast_fp16_0))[name = string("op_512_cast_fp16")]; + tensor var_513_cast_fp16 = mul(x = var_512_cast_fp16, y = sin_cast_fp16)[name = string("op_513_cast_fp16")]; + tensor key_states_7_cast_fp16 = add(x = var_506_cast_fp16, y = var_513_cast_fp16)[name = string("key_states_7_cast_fp16")]; + tensor expand_dims_11 = const()[name = string("expand_dims_11"), val = tensor([1])]; + tensor expand_dims_12 = const()[name = string("expand_dims_12"), val = tensor([0])]; + tensor expand_dims_14 = const()[name = string("expand_dims_14"), val = tensor([2])]; + tensor concat_11_values3_0 = const()[name = string("concat_11_values3_0"), val = tensor([0])]; + int32 concat_11_axis_0 = const()[name = string("concat_11_axis_0"), val = int32(0)]; + bool concat_11_interleave_0 = const()[name = string("concat_11_interleave_0"), val = bool(false)]; + tensor concat_11 = concat(axis = concat_11_axis_0, interleave = concat_11_interleave_0, values = (expand_dims_11, expand_dims_12, position_id, concat_11_values3_0))[name = string("concat_11")]; + tensor concat_12_values1_0 = const()[name = string("concat_12_values1_0"), val = tensor([0])]; + tensor concat_12_values3_0 = const()[name = string("concat_12_values3_0"), val = tensor([0])]; + int32 concat_12_axis_0 = const()[name = string("concat_12_axis_0"), val = int32(0)]; + bool concat_12_interleave_0 = const()[name = string("concat_12_interleave_0"), val = bool(false)]; + tensor concat_12 = concat(axis = concat_12_axis_0, interleave = concat_12_interleave_0, values = (expand_dims_14, concat_12_values1_0, var_246, concat_12_values3_0))[name = string("concat_12")]; + tensor key_cache_internal_tensor_assign_2_stride_0 = const()[name = string("key_cache_internal_tensor_assign_2_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor key_cache_internal_tensor_assign_2_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_2_begin_mask_0"), val = tensor([false, false, false, true])]; + tensor key_cache_internal_tensor_assign_2_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_2_end_mask_0"), val = tensor([false, true, false, true])]; + tensor key_cache_internal_tensor_assign_2_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_2_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor key_cache_internal_tensor_assign_2_cast_fp16 = slice_update(begin = concat_11, begin_mask = key_cache_internal_tensor_assign_2_begin_mask_0, end = concat_12, end_mask = key_cache_internal_tensor_assign_2_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_2_squeeze_mask_0, stride = key_cache_internal_tensor_assign_2_stride_0, update = key_states_7_cast_fp16, x = coreml_update_state_12)[name = string("key_cache_internal_tensor_assign_2_cast_fp16")]; + write_state(data = key_cache_internal_tensor_assign_2_cast_fp16, input = key_cache)[name = string("coreml_update_state_26_write_state")]; + tensor coreml_update_state_14 = read_state(input = key_cache)[name = string("coreml_update_state_26")]; + tensor value_cache_internal_tensor_assign_2_stride_0 = const()[name = string("value_cache_internal_tensor_assign_2_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor value_cache_internal_tensor_assign_2_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_2_begin_mask_0"), val = tensor([false, false, false, true])]; + tensor value_cache_internal_tensor_assign_2_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_2_end_mask_0"), val = tensor([false, true, false, true])]; + tensor value_cache_internal_tensor_assign_2_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_2_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor value_states_7_cast_fp16 = transpose(perm = value_states_7_perm_0, x = var_492_cast_fp16)[name = string("transpose_13")]; + tensor value_cache_internal_tensor_assign_2_cast_fp16 = slice_update(begin = concat_11, begin_mask = value_cache_internal_tensor_assign_2_begin_mask_0, end = concat_12, end_mask = value_cache_internal_tensor_assign_2_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_2_squeeze_mask_0, stride = value_cache_internal_tensor_assign_2_stride_0, update = value_states_7_cast_fp16, x = coreml_update_state_13)[name = string("value_cache_internal_tensor_assign_2_cast_fp16")]; + write_state(data = value_cache_internal_tensor_assign_2_cast_fp16, input = value_cache)[name = string("coreml_update_state_27_write_state")]; + tensor coreml_update_state_15 = read_state(input = value_cache)[name = string("coreml_update_state_27")]; + tensor var_556_begin_0 = const()[name = string("op_556_begin_0"), val = tensor([1, 0, 0, 0])]; + tensor var_556_end_0 = const()[name = string("op_556_end_0"), val = tensor([2, 2, 2048, 64])]; + tensor var_556_end_mask_0 = const()[name = string("op_556_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_556_cast_fp16 = slice_by_index(begin = var_556_begin_0, end = var_556_end_0, end_mask = var_556_end_mask_0, x = coreml_update_state_14)[name = string("op_556_cast_fp16")]; + tensor tile_2 = const()[name = string("tile_2"), val = tensor([1, 1])]; + int32 var_559_axis_0 = const()[name = string("op_559_axis_0"), val = int32(1)]; + tensor var_559_cast_fp16_0, tensor var_559_cast_fp16_1 = split(axis = var_559_axis_0, split_sizes = tile_2, x = var_556_cast_fp16)[name = string("op_559_cast_fp16")]; + tensor var_566_begin_0 = const()[name = string("op_566_begin_0"), val = tensor([1, 0, 0, 0])]; + tensor var_566_end_0 = const()[name = string("op_566_end_0"), val = tensor([2, 2, 2048, 64])]; + tensor var_566_end_mask_0 = const()[name = string("op_566_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_566_cast_fp16 = slice_by_index(begin = var_566_begin_0, end = var_566_end_0, end_mask = var_566_end_mask_0, x = coreml_update_state_15)[name = string("op_566_cast_fp16")]; + tensor tile_3 = const()[name = string("tile_3"), val = tensor([1, 1])]; + int32 var_569_axis_0 = const()[name = string("op_569_axis_0"), val = int32(1)]; + tensor var_569_cast_fp16_0, tensor var_569_cast_fp16_1 = split(axis = var_569_axis_0, split_sizes = tile_3, x = var_566_cast_fp16)[name = string("op_569_cast_fp16")]; + tensor var_572_split_sizes_0 = const()[name = string("op_572_split_sizes_0"), val = tensor([8, 8])]; + int32 var_572_axis_0 = const()[name = string("op_572_axis_0"), val = int32(1)]; + tensor var_572_cast_fp16_0, tensor var_572_cast_fp16_1 = split(axis = var_572_axis_0, split_sizes = var_572_split_sizes_0, x = query_states_7_cast_fp16)[name = string("op_572_cast_fp16")]; + bool attn_weights_17_transpose_x_0 = const()[name = string("attn_weights_17_transpose_x_0"), val = bool(false)]; + bool attn_weights_17_transpose_y_0 = const()[name = string("attn_weights_17_transpose_y_0"), val = bool(false)]; + tensor attn_weights_17_cast_fp16 = matmul(transpose_x = attn_weights_17_transpose_x_0, transpose_y = attn_weights_17_transpose_y_0, x = var_559_cast_fp16_0, y = var_572_cast_fp16_0)[name = string("attn_weights_17_cast_fp16")]; + fp16 _inversed_attn_weights_19_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_19_y_0_to_fp16"), val = fp16(0x1p-3)]; + tensor _inversed_attn_weights_19_cast_fp16 = mul(x = attn_weights_17_cast_fp16, y = _inversed_attn_weights_19_y_0_to_fp16)[name = string("_inversed_attn_weights_19_cast_fp16")]; + tensor attn_weights_21_cast_fp16 = add(x = _inversed_attn_weights_19_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_21_cast_fp16")]; + int32 var_579 = const()[name = string("op_579"), val = int32(2)]; + tensor attn_weights_23_cast_fp16 = softmax(axis = var_579, x = attn_weights_21_cast_fp16)[name = string("attn_weights_23_cast_fp16")]; + bool var_585_transpose_x_1 = const()[name = string("op_585_transpose_x_1"), val = bool(true)]; + bool var_585_transpose_y_1 = const()[name = string("op_585_transpose_y_1"), val = bool(false)]; + tensor var_585_cast_fp16 = matmul(transpose_x = var_585_transpose_x_1, transpose_y = var_585_transpose_y_1, x = attn_weights_23_cast_fp16, y = var_569_cast_fp16_0)[name = string("op_585_cast_fp16")]; + bool attn_weights_25_transpose_x_0 = const()[name = string("attn_weights_25_transpose_x_0"), val = bool(false)]; + bool attn_weights_25_transpose_y_0 = const()[name = string("attn_weights_25_transpose_y_0"), val = bool(false)]; + tensor attn_weights_25_cast_fp16 = matmul(transpose_x = attn_weights_25_transpose_x_0, transpose_y = attn_weights_25_transpose_y_0, x = var_559_cast_fp16_1, y = var_572_cast_fp16_1)[name = string("attn_weights_25_cast_fp16")]; + fp16 _inversed_attn_weights_27_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_27_y_0_to_fp16"), val = fp16(0x1p-3)]; + tensor _inversed_attn_weights_27_cast_fp16 = mul(x = attn_weights_25_cast_fp16, y = _inversed_attn_weights_27_y_0_to_fp16)[name = string("_inversed_attn_weights_27_cast_fp16")]; + tensor attn_weights_29_cast_fp16 = add(x = _inversed_attn_weights_27_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_29_cast_fp16")]; + int32 var_591 = const()[name = string("op_591"), val = int32(2)]; + tensor attn_weights_31_cast_fp16 = softmax(axis = var_591, x = attn_weights_29_cast_fp16)[name = string("attn_weights_31_cast_fp16")]; + bool attn_output_7_transpose_x_1 = const()[name = string("attn_output_7_transpose_x_1"), val = bool(true)]; + bool attn_output_7_transpose_y_1 = const()[name = string("attn_output_7_transpose_y_1"), val = bool(false)]; + tensor attn_output_7_cast_fp16 = matmul(transpose_x = attn_output_7_transpose_x_1, transpose_y = attn_output_7_transpose_y_1, x = attn_weights_31_cast_fp16, y = var_569_cast_fp16_1)[name = string("attn_output_7_cast_fp16")]; + int32 var_599 = const()[name = string("op_599"), val = int32(1)]; + bool attn_output_9_interleave_0 = const()[name = string("attn_output_9_interleave_0"), val = bool(false)]; + tensor attn_output_9_cast_fp16 = concat(axis = var_599, interleave = attn_output_9_interleave_0, values = (var_585_cast_fp16, attn_output_7_cast_fp16))[name = string("attn_output_9_cast_fp16")]; + tensor var_603_perm_0 = const()[name = string("op_603_perm_0"), val = tensor([0, 1, 3, 2])]; + tensor var_608 = const()[name = string("op_608"), val = tensor([1, 1024, 1, 8])]; + tensor var_603_cast_fp16 = transpose(perm = var_603_perm_0, x = attn_output_9_cast_fp16)[name = string("transpose_12")]; + tensor x_23_cast_fp16 = reshape(shape = var_608, x = var_603_cast_fp16)[name = string("x_23_cast_fp16")]; + string hidden_states_9_pad_type_0 = const()[name = string("hidden_states_9_pad_type_0"), val = string("valid")]; + tensor hidden_states_9_strides_0 = const()[name = string("hidden_states_9_strides_0"), val = tensor([1, 1])]; + tensor hidden_states_9_pad_0 = const()[name = string("hidden_states_9_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor hidden_states_9_dilations_0 = const()[name = string("hidden_states_9_dilations_0"), val = tensor([1, 1])]; + int32 hidden_states_9_groups_0 = const()[name = string("hidden_states_9_groups_0"), val = int32(1)]; + tensor var_615_to_fp16 = const()[name = string("op_615_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(40920192)))]; + tensor hidden_states_9_cast_fp16 = conv(dilations = hidden_states_9_dilations_0, groups = hidden_states_9_groups_0, pad = hidden_states_9_pad_0, pad_type = hidden_states_9_pad_type_0, strides = hidden_states_9_strides_0, weight = var_615_to_fp16, x = x_23_cast_fp16)[name = string("hidden_states_9_cast_fp16")]; + tensor x_25_cast_fp16 = add(x = x_17_cast_fp16, y = hidden_states_9_cast_fp16)[name = string("x_25_cast_fp16")]; + int32 var_627 = const()[name = string("op_627"), val = int32(1)]; + fp16 const_23_promoted_to_fp16 = const()[name = string("const_23_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_630_cast_fp16 = mul(x = x_25_cast_fp16, y = const_23_promoted_to_fp16)[name = string("op_630_cast_fp16")]; + bool x_27_interleave_0 = const()[name = string("x_27_interleave_0"), val = bool(false)]; + tensor x_27_cast_fp16 = concat(axis = var_627, interleave = x_27_interleave_0, values = (x_25_cast_fp16, var_630_cast_fp16))[name = string("x_27_cast_fp16")]; + tensor out_19_axes_0 = const()[name = string("out_19_axes_0"), val = tensor([1])]; + fp16 var_640_to_fp16 = const()[name = string("op_640_to_fp16"), val = fp16(0x1.5p-17)]; + tensor out_19_cast_fp16 = layer_norm(axes = out_19_axes_0, epsilon = var_640_to_fp16, x = x_27_cast_fp16)[name = string("out_19_cast_fp16")]; + tensor layer_layers_1_post_attention_layernorm_weight_to_fp16 = const()[name = string("layer_layers_1_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(43017408)))]; + tensor out_21_cast_fp16 = mul(x = out_19_cast_fp16, y = layer_layers_1_post_attention_layernorm_weight_to_fp16)[name = string("out_21_cast_fp16")]; + tensor var_646_split_sizes_0 = const()[name = string("op_646_split_sizes_0"), val = tensor([1024, 1024])]; + int32 var_646_axis_0 = const()[name = string("op_646_axis_0"), val = int32(1)]; + tensor var_646_cast_fp16_0, tensor var_646_cast_fp16_1 = split(axis = var_646_axis_0, split_sizes = var_646_split_sizes_0, x = out_21_cast_fp16)[name = string("op_646_cast_fp16")]; + string input_3_pad_type_0 = const()[name = string("input_3_pad_type_0"), val = string("valid")]; + tensor input_3_strides_0 = const()[name = string("input_3_strides_0"), val = tensor([1, 1])]; + tensor input_3_pad_0 = const()[name = string("input_3_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor input_3_dilations_0 = const()[name = string("input_3_dilations_0"), val = tensor([1, 1])]; + int32 input_3_groups_0 = const()[name = string("input_3_groups_0"), val = int32(1)]; + tensor var_651_to_fp16 = const()[name = string("op_651_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(43021568)))]; + tensor input_3_cast_fp16 = conv(dilations = input_3_dilations_0, groups = input_3_groups_0, pad = input_3_pad_0, pad_type = input_3_pad_type_0, strides = input_3_strides_0, weight = var_651_to_fp16, x = var_646_cast_fp16_0)[name = string("input_3_cast_fp16")]; + tensor var_662_cast_fp16 = silu(x = input_3_cast_fp16)[name = string("op_662_cast_fp16")]; + string var_667_pad_type_0 = const()[name = string("op_667_pad_type_0"), val = string("valid")]; + tensor var_667_strides_0 = const()[name = string("op_667_strides_0"), val = tensor([1, 1])]; + tensor var_667_pad_0 = const()[name = string("op_667_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_667_dilations_0 = const()[name = string("op_667_dilations_0"), val = tensor([1, 1])]; + int32 var_667_groups_0 = const()[name = string("op_667_groups_0"), val = int32(1)]; + tensor var_650_to_fp16 = const()[name = string("op_650_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(51410240)))]; + tensor var_667_cast_fp16 = conv(dilations = var_667_dilations_0, groups = var_667_groups_0, pad = var_667_pad_0, pad_type = var_667_pad_type_0, strides = var_667_strides_0, weight = var_650_to_fp16, x = var_646_cast_fp16_0)[name = string("op_667_cast_fp16")]; + tensor x_33_cast_fp16 = mul(x = var_662_cast_fp16, y = var_667_cast_fp16)[name = string("x_33_cast_fp16")]; + string hidden_states_11_pad_type_0 = const()[name = string("hidden_states_11_pad_type_0"), val = string("valid")]; + tensor hidden_states_11_strides_0 = const()[name = string("hidden_states_11_strides_0"), val = tensor([1, 1])]; + tensor hidden_states_11_pad_0 = const()[name = string("hidden_states_11_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor hidden_states_11_dilations_0 = const()[name = string("hidden_states_11_dilations_0"), val = tensor([1, 1])]; + int32 hidden_states_11_groups_0 = const()[name = string("hidden_states_11_groups_0"), val = int32(1)]; + tensor var_649_to_fp16 = const()[name = string("op_649_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(59798912)))]; + tensor hidden_states_11_cast_fp16 = conv(dilations = hidden_states_11_dilations_0, groups = hidden_states_11_groups_0, pad = hidden_states_11_pad_0, pad_type = hidden_states_11_pad_type_0, strides = hidden_states_11_strides_0, weight = var_649_to_fp16, x = x_33_cast_fp16)[name = string("hidden_states_11_cast_fp16")]; + tensor x_35_cast_fp16 = add(x = x_25_cast_fp16, y = hidden_states_11_cast_fp16)[name = string("x_35_cast_fp16")]; + int32 var_680 = const()[name = string("op_680"), val = int32(1)]; + fp16 const_24_promoted_to_fp16 = const()[name = string("const_24_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_683_cast_fp16 = mul(x = x_35_cast_fp16, y = const_24_promoted_to_fp16)[name = string("op_683_cast_fp16")]; + bool x_37_interleave_0 = const()[name = string("x_37_interleave_0"), val = bool(false)]; + tensor x_37_cast_fp16 = concat(axis = var_680, interleave = x_37_interleave_0, values = (x_35_cast_fp16, var_683_cast_fp16))[name = string("x_37_cast_fp16")]; + tensor out_25_axes_0 = const()[name = string("out_25_axes_0"), val = tensor([1])]; + fp16 var_693_to_fp16 = const()[name = string("op_693_to_fp16"), val = fp16(0x1.5p-17)]; + tensor out_25_cast_fp16 = layer_norm(axes = out_25_axes_0, epsilon = var_693_to_fp16, x = x_37_cast_fp16)[name = string("out_25_cast_fp16")]; + tensor layer_layers_2_input_layernorm_weight_to_fp16 = const()[name = string("layer_layers_2_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(68187584)))]; + tensor out_27_cast_fp16 = mul(x = out_25_cast_fp16, y = layer_layers_2_input_layernorm_weight_to_fp16)[name = string("out_27_cast_fp16")]; + tensor var_699_split_sizes_0 = const()[name = string("op_699_split_sizes_0"), val = tensor([1024, 1024])]; + int32 var_699_axis_0 = const()[name = string("op_699_axis_0"), val = int32(1)]; + tensor var_699_cast_fp16_0, tensor var_699_cast_fp16_1 = split(axis = var_699_axis_0, split_sizes = var_699_split_sizes_0, x = out_27_cast_fp16)[name = string("op_699_cast_fp16")]; + string query_states_9_pad_type_0 = const()[name = string("query_states_9_pad_type_0"), val = string("valid")]; + tensor query_states_9_strides_0 = const()[name = string("query_states_9_strides_0"), val = tensor([1, 1])]; + tensor query_states_9_pad_0 = const()[name = string("query_states_9_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor query_states_9_dilations_0 = const()[name = string("query_states_9_dilations_0"), val = tensor([1, 1])]; + int32 query_states_9_groups_0 = const()[name = string("query_states_9_groups_0"), val = int32(1)]; + tensor var_721_to_fp16 = const()[name = string("op_721_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(68191744)))]; + tensor query_states_9_cast_fp16 = conv(dilations = query_states_9_dilations_0, groups = query_states_9_groups_0, pad = query_states_9_pad_0, pad_type = query_states_9_pad_type_0, strides = query_states_9_strides_0, weight = var_721_to_fp16, x = var_699_cast_fp16_0)[name = string("query_states_9_cast_fp16")]; + string key_states_9_pad_type_0 = const()[name = string("key_states_9_pad_type_0"), val = string("valid")]; + tensor key_states_9_strides_0 = const()[name = string("key_states_9_strides_0"), val = tensor([1, 1])]; + tensor key_states_9_pad_0 = const()[name = string("key_states_9_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor key_states_9_dilations_0 = const()[name = string("key_states_9_dilations_0"), val = tensor([1, 1])]; + int32 key_states_9_groups_0 = const()[name = string("key_states_9_groups_0"), val = int32(1)]; + tensor var_732_to_fp16 = const()[name = string("op_732_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(70288960)))]; + tensor key_states_9_cast_fp16 = conv(dilations = key_states_9_dilations_0, groups = key_states_9_groups_0, pad = key_states_9_pad_0, pad_type = key_states_9_pad_type_0, strides = key_states_9_strides_0, weight = var_732_to_fp16, x = var_699_cast_fp16_0)[name = string("key_states_9_cast_fp16")]; + string value_states_9_pad_type_0 = const()[name = string("value_states_9_pad_type_0"), val = string("valid")]; + tensor value_states_9_strides_0 = const()[name = string("value_states_9_strides_0"), val = tensor([1, 1])]; + tensor value_states_9_pad_0 = const()[name = string("value_states_9_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor value_states_9_dilations_0 = const()[name = string("value_states_9_dilations_0"), val = tensor([1, 1])]; + int32 value_states_9_groups_0 = const()[name = string("value_states_9_groups_0"), val = int32(1)]; + tensor var_743_to_fp16 = const()[name = string("op_743_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(70551168)))]; + tensor value_states_9_cast_fp16 = conv(dilations = value_states_9_dilations_0, groups = value_states_9_groups_0, pad = value_states_9_pad_0, pad_type = value_states_9_pad_type_0, strides = value_states_9_strides_0, weight = var_743_to_fp16, x = var_699_cast_fp16_0)[name = string("value_states_9_cast_fp16")]; + tensor var_751 = const()[name = string("op_751"), val = tensor([1, 16, 64, 8])]; + tensor embed_9_cast_fp16 = reshape(shape = var_751, x = query_states_9_cast_fp16)[name = string("embed_9_cast_fp16")]; + tensor var_755 = const()[name = string("op_755"), val = tensor([1, 2, 64, 8])]; + tensor var_756_cast_fp16 = reshape(shape = var_755, x = key_states_9_cast_fp16)[name = string("op_756_cast_fp16")]; + tensor embed_11_perm_0 = const()[name = string("embed_11_perm_0"), val = tensor([0, 1, 3, 2])]; + tensor var_762 = const()[name = string("op_762"), val = tensor([1, 2, 64, 8])]; + tensor var_763_cast_fp16 = reshape(shape = var_762, x = value_states_9_cast_fp16)[name = string("op_763_cast_fp16")]; + tensor value_states_11_perm_0 = const()[name = string("value_states_11_perm_0"), val = tensor([0, 1, 3, 2])]; + tensor var_767_cast_fp16 = mul(x = embed_9_cast_fp16, y = cos_1_cast_fp16)[name = string("op_767_cast_fp16")]; + tensor var_768_split_sizes_0 = const()[name = string("op_768_split_sizes_0"), val = tensor([32, 32])]; + int32 var_768_axis_0 = const()[name = string("op_768_axis_0"), val = int32(-2)]; + tensor var_768_cast_fp16_0, tensor var_768_cast_fp16_1 = split(axis = var_768_axis_0, split_sizes = var_768_split_sizes_0, x = embed_9_cast_fp16)[name = string("op_768_cast_fp16")]; + fp16 const_27_promoted_to_fp16 = const()[name = string("const_27_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_770_cast_fp16 = mul(x = var_768_cast_fp16_1, y = const_27_promoted_to_fp16)[name = string("op_770_cast_fp16")]; + int32 var_772 = const()[name = string("op_772"), val = int32(-2)]; + bool var_773_interleave_0 = const()[name = string("op_773_interleave_0"), val = bool(false)]; + tensor var_773_cast_fp16 = concat(axis = var_772, interleave = var_773_interleave_0, values = (var_770_cast_fp16, var_768_cast_fp16_0))[name = string("op_773_cast_fp16")]; + tensor var_774_cast_fp16 = mul(x = var_773_cast_fp16, y = sin_1_cast_fp16)[name = string("op_774_cast_fp16")]; + tensor query_states_11_cast_fp16 = add(x = var_767_cast_fp16, y = var_774_cast_fp16)[name = string("query_states_11_cast_fp16")]; + tensor embed_11_cast_fp16 = transpose(perm = embed_11_perm_0, x = var_756_cast_fp16)[name = string("transpose_11")]; + tensor var_777_cast_fp16 = mul(x = embed_11_cast_fp16, y = cos_cast_fp16)[name = string("op_777_cast_fp16")]; + tensor var_778_split_sizes_0 = const()[name = string("op_778_split_sizes_0"), val = tensor([32, 32])]; + int32 var_778_axis_0 = const()[name = string("op_778_axis_0"), val = int32(-1)]; + tensor var_778_cast_fp16_0, tensor var_778_cast_fp16_1 = split(axis = var_778_axis_0, split_sizes = var_778_split_sizes_0, x = embed_11_cast_fp16)[name = string("op_778_cast_fp16")]; + fp16 const_28_promoted_to_fp16 = const()[name = string("const_28_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_780_cast_fp16 = mul(x = var_778_cast_fp16_1, y = const_28_promoted_to_fp16)[name = string("op_780_cast_fp16")]; + int32 var_782 = const()[name = string("op_782"), val = int32(-1)]; + bool var_783_interleave_0 = const()[name = string("op_783_interleave_0"), val = bool(false)]; + tensor var_783_cast_fp16 = concat(axis = var_782, interleave = var_783_interleave_0, values = (var_780_cast_fp16, var_778_cast_fp16_0))[name = string("op_783_cast_fp16")]; + tensor var_784_cast_fp16 = mul(x = var_783_cast_fp16, y = sin_cast_fp16)[name = string("op_784_cast_fp16")]; + tensor key_states_11_cast_fp16 = add(x = var_777_cast_fp16, y = var_784_cast_fp16)[name = string("key_states_11_cast_fp16")]; + tensor expand_dims_21 = const()[name = string("expand_dims_21"), val = tensor([2])]; + tensor expand_dims_22 = const()[name = string("expand_dims_22"), val = tensor([0])]; + tensor expand_dims_24 = const()[name = string("expand_dims_24"), val = tensor([3])]; + tensor concat_19_values3_0 = const()[name = string("concat_19_values3_0"), val = tensor([0])]; + int32 concat_19_axis_0 = const()[name = string("concat_19_axis_0"), val = int32(0)]; + bool concat_19_interleave_0 = const()[name = string("concat_19_interleave_0"), val = bool(false)]; + tensor concat_19 = concat(axis = concat_19_axis_0, interleave = concat_19_interleave_0, values = (expand_dims_21, expand_dims_22, position_id, concat_19_values3_0))[name = string("concat_19")]; + tensor concat_20_values1_0 = const()[name = string("concat_20_values1_0"), val = tensor([0])]; + tensor concat_20_values3_0 = const()[name = string("concat_20_values3_0"), val = tensor([0])]; + int32 concat_20_axis_0 = const()[name = string("concat_20_axis_0"), val = int32(0)]; + bool concat_20_interleave_0 = const()[name = string("concat_20_interleave_0"), val = bool(false)]; + tensor concat_20 = concat(axis = concat_20_axis_0, interleave = concat_20_interleave_0, values = (expand_dims_24, concat_20_values1_0, var_246, concat_20_values3_0))[name = string("concat_20")]; + tensor key_cache_internal_tensor_assign_3_stride_0 = const()[name = string("key_cache_internal_tensor_assign_3_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor key_cache_internal_tensor_assign_3_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_3_begin_mask_0"), val = tensor([false, false, false, true])]; + tensor key_cache_internal_tensor_assign_3_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_3_end_mask_0"), val = tensor([false, true, false, true])]; + tensor key_cache_internal_tensor_assign_3_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_3_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor key_cache_internal_tensor_assign_3_cast_fp16 = slice_update(begin = concat_19, begin_mask = key_cache_internal_tensor_assign_3_begin_mask_0, end = concat_20, end_mask = key_cache_internal_tensor_assign_3_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_3_squeeze_mask_0, stride = key_cache_internal_tensor_assign_3_stride_0, update = key_states_11_cast_fp16, x = coreml_update_state_14)[name = string("key_cache_internal_tensor_assign_3_cast_fp16")]; + write_state(data = key_cache_internal_tensor_assign_3_cast_fp16, input = key_cache)[name = string("coreml_update_state_28_write_state")]; + tensor coreml_update_state_16 = read_state(input = key_cache)[name = string("coreml_update_state_28")]; + tensor value_cache_internal_tensor_assign_3_stride_0 = const()[name = string("value_cache_internal_tensor_assign_3_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor value_cache_internal_tensor_assign_3_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_3_begin_mask_0"), val = tensor([false, false, false, true])]; + tensor value_cache_internal_tensor_assign_3_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_3_end_mask_0"), val = tensor([false, true, false, true])]; + tensor value_cache_internal_tensor_assign_3_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_3_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor value_states_11_cast_fp16 = transpose(perm = value_states_11_perm_0, x = var_763_cast_fp16)[name = string("transpose_10")]; + tensor value_cache_internal_tensor_assign_3_cast_fp16 = slice_update(begin = concat_19, begin_mask = value_cache_internal_tensor_assign_3_begin_mask_0, end = concat_20, end_mask = value_cache_internal_tensor_assign_3_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_3_squeeze_mask_0, stride = value_cache_internal_tensor_assign_3_stride_0, update = value_states_11_cast_fp16, x = coreml_update_state_15)[name = string("value_cache_internal_tensor_assign_3_cast_fp16")]; + write_state(data = value_cache_internal_tensor_assign_3_cast_fp16, input = value_cache)[name = string("coreml_update_state_29_write_state")]; + tensor coreml_update_state_17 = read_state(input = value_cache)[name = string("coreml_update_state_29")]; + tensor var_827_begin_0 = const()[name = string("op_827_begin_0"), val = tensor([2, 0, 0, 0])]; + tensor var_827_end_0 = const()[name = string("op_827_end_0"), val = tensor([3, 2, 2048, 64])]; + tensor var_827_end_mask_0 = const()[name = string("op_827_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_827_cast_fp16 = slice_by_index(begin = var_827_begin_0, end = var_827_end_0, end_mask = var_827_end_mask_0, x = coreml_update_state_16)[name = string("op_827_cast_fp16")]; + tensor tile_4 = const()[name = string("tile_4"), val = tensor([1, 1])]; + int32 var_830_axis_0 = const()[name = string("op_830_axis_0"), val = int32(1)]; + tensor var_830_cast_fp16_0, tensor var_830_cast_fp16_1 = split(axis = var_830_axis_0, split_sizes = tile_4, x = var_827_cast_fp16)[name = string("op_830_cast_fp16")]; + tensor var_837_begin_0 = const()[name = string("op_837_begin_0"), val = tensor([2, 0, 0, 0])]; + tensor var_837_end_0 = const()[name = string("op_837_end_0"), val = tensor([3, 2, 2048, 64])]; + tensor var_837_end_mask_0 = const()[name = string("op_837_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_837_cast_fp16 = slice_by_index(begin = var_837_begin_0, end = var_837_end_0, end_mask = var_837_end_mask_0, x = coreml_update_state_17)[name = string("op_837_cast_fp16")]; + tensor tile_5 = const()[name = string("tile_5"), val = tensor([1, 1])]; + int32 var_840_axis_0 = const()[name = string("op_840_axis_0"), val = int32(1)]; + tensor var_840_cast_fp16_0, tensor var_840_cast_fp16_1 = split(axis = var_840_axis_0, split_sizes = tile_5, x = var_837_cast_fp16)[name = string("op_840_cast_fp16")]; + tensor var_843_split_sizes_0 = const()[name = string("op_843_split_sizes_0"), val = tensor([8, 8])]; + int32 var_843_axis_0 = const()[name = string("op_843_axis_0"), val = int32(1)]; + tensor var_843_cast_fp16_0, tensor var_843_cast_fp16_1 = split(axis = var_843_axis_0, split_sizes = var_843_split_sizes_0, x = query_states_11_cast_fp16)[name = string("op_843_cast_fp16")]; + bool attn_weights_33_transpose_x_0 = const()[name = string("attn_weights_33_transpose_x_0"), val = bool(false)]; + bool attn_weights_33_transpose_y_0 = const()[name = string("attn_weights_33_transpose_y_0"), val = bool(false)]; + tensor attn_weights_33_cast_fp16 = matmul(transpose_x = attn_weights_33_transpose_x_0, transpose_y = attn_weights_33_transpose_y_0, x = var_830_cast_fp16_0, y = var_843_cast_fp16_0)[name = string("attn_weights_33_cast_fp16")]; + fp16 _inversed_attn_weights_35_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_35_y_0_to_fp16"), val = fp16(0x1p-3)]; + tensor _inversed_attn_weights_35_cast_fp16 = mul(x = attn_weights_33_cast_fp16, y = _inversed_attn_weights_35_y_0_to_fp16)[name = string("_inversed_attn_weights_35_cast_fp16")]; + tensor attn_weights_37_cast_fp16 = add(x = _inversed_attn_weights_35_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_37_cast_fp16")]; + int32 var_850 = const()[name = string("op_850"), val = int32(2)]; + tensor attn_weights_39_cast_fp16 = softmax(axis = var_850, x = attn_weights_37_cast_fp16)[name = string("attn_weights_39_cast_fp16")]; + bool var_856_transpose_x_1 = const()[name = string("op_856_transpose_x_1"), val = bool(true)]; + bool var_856_transpose_y_1 = const()[name = string("op_856_transpose_y_1"), val = bool(false)]; + tensor var_856_cast_fp16 = matmul(transpose_x = var_856_transpose_x_1, transpose_y = var_856_transpose_y_1, x = attn_weights_39_cast_fp16, y = var_840_cast_fp16_0)[name = string("op_856_cast_fp16")]; + bool attn_weights_41_transpose_x_0 = const()[name = string("attn_weights_41_transpose_x_0"), val = bool(false)]; + bool attn_weights_41_transpose_y_0 = const()[name = string("attn_weights_41_transpose_y_0"), val = bool(false)]; + tensor attn_weights_41_cast_fp16 = matmul(transpose_x = attn_weights_41_transpose_x_0, transpose_y = attn_weights_41_transpose_y_0, x = var_830_cast_fp16_1, y = var_843_cast_fp16_1)[name = string("attn_weights_41_cast_fp16")]; + fp16 _inversed_attn_weights_43_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_43_y_0_to_fp16"), val = fp16(0x1p-3)]; + tensor _inversed_attn_weights_43_cast_fp16 = mul(x = attn_weights_41_cast_fp16, y = _inversed_attn_weights_43_y_0_to_fp16)[name = string("_inversed_attn_weights_43_cast_fp16")]; + tensor attn_weights_45_cast_fp16 = add(x = _inversed_attn_weights_43_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_45_cast_fp16")]; + int32 var_862 = const()[name = string("op_862"), val = int32(2)]; + tensor attn_weights_47_cast_fp16 = softmax(axis = var_862, x = attn_weights_45_cast_fp16)[name = string("attn_weights_47_cast_fp16")]; + bool attn_output_13_transpose_x_1 = const()[name = string("attn_output_13_transpose_x_1"), val = bool(true)]; + bool attn_output_13_transpose_y_1 = const()[name = string("attn_output_13_transpose_y_1"), val = bool(false)]; + tensor attn_output_13_cast_fp16 = matmul(transpose_x = attn_output_13_transpose_x_1, transpose_y = attn_output_13_transpose_y_1, x = attn_weights_47_cast_fp16, y = var_840_cast_fp16_1)[name = string("attn_output_13_cast_fp16")]; + int32 var_870 = const()[name = string("op_870"), val = int32(1)]; + bool attn_output_15_interleave_0 = const()[name = string("attn_output_15_interleave_0"), val = bool(false)]; + tensor attn_output_15_cast_fp16 = concat(axis = var_870, interleave = attn_output_15_interleave_0, values = (var_856_cast_fp16, attn_output_13_cast_fp16))[name = string("attn_output_15_cast_fp16")]; + tensor var_874_perm_0 = const()[name = string("op_874_perm_0"), val = tensor([0, 1, 3, 2])]; + tensor var_879 = const()[name = string("op_879"), val = tensor([1, 1024, 1, 8])]; + tensor var_874_cast_fp16 = transpose(perm = var_874_perm_0, x = attn_output_15_cast_fp16)[name = string("transpose_9")]; + tensor x_41_cast_fp16 = reshape(shape = var_879, x = var_874_cast_fp16)[name = string("x_41_cast_fp16")]; + string hidden_states_15_pad_type_0 = const()[name = string("hidden_states_15_pad_type_0"), val = string("valid")]; + tensor hidden_states_15_strides_0 = const()[name = string("hidden_states_15_strides_0"), val = tensor([1, 1])]; + tensor hidden_states_15_pad_0 = const()[name = string("hidden_states_15_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor hidden_states_15_dilations_0 = const()[name = string("hidden_states_15_dilations_0"), val = tensor([1, 1])]; + int32 hidden_states_15_groups_0 = const()[name = string("hidden_states_15_groups_0"), val = int32(1)]; + tensor var_886_to_fp16 = const()[name = string("op_886_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(70813376)))]; + tensor hidden_states_15_cast_fp16 = conv(dilations = hidden_states_15_dilations_0, groups = hidden_states_15_groups_0, pad = hidden_states_15_pad_0, pad_type = hidden_states_15_pad_type_0, strides = hidden_states_15_strides_0, weight = var_886_to_fp16, x = x_41_cast_fp16)[name = string("hidden_states_15_cast_fp16")]; + tensor x_43_cast_fp16 = add(x = x_35_cast_fp16, y = hidden_states_15_cast_fp16)[name = string("x_43_cast_fp16")]; + int32 var_898 = const()[name = string("op_898"), val = int32(1)]; + fp16 const_33_promoted_to_fp16 = const()[name = string("const_33_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_901_cast_fp16 = mul(x = x_43_cast_fp16, y = const_33_promoted_to_fp16)[name = string("op_901_cast_fp16")]; + bool x_45_interleave_0 = const()[name = string("x_45_interleave_0"), val = bool(false)]; + tensor x_45_cast_fp16 = concat(axis = var_898, interleave = x_45_interleave_0, values = (x_43_cast_fp16, var_901_cast_fp16))[name = string("x_45_cast_fp16")]; + tensor out_31_axes_0 = const()[name = string("out_31_axes_0"), val = tensor([1])]; + fp16 var_911_to_fp16 = const()[name = string("op_911_to_fp16"), val = fp16(0x1.5p-17)]; + tensor out_31_cast_fp16 = layer_norm(axes = out_31_axes_0, epsilon = var_911_to_fp16, x = x_45_cast_fp16)[name = string("out_31_cast_fp16")]; + tensor layer_layers_2_post_attention_layernorm_weight_to_fp16 = const()[name = string("layer_layers_2_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(72910592)))]; + tensor out_33_cast_fp16 = mul(x = out_31_cast_fp16, y = layer_layers_2_post_attention_layernorm_weight_to_fp16)[name = string("out_33_cast_fp16")]; + tensor var_917_split_sizes_0 = const()[name = string("op_917_split_sizes_0"), val = tensor([1024, 1024])]; + int32 var_917_axis_0 = const()[name = string("op_917_axis_0"), val = int32(1)]; + tensor var_917_cast_fp16_0, tensor var_917_cast_fp16_1 = split(axis = var_917_axis_0, split_sizes = var_917_split_sizes_0, x = out_33_cast_fp16)[name = string("op_917_cast_fp16")]; + string input_5_pad_type_0 = const()[name = string("input_5_pad_type_0"), val = string("valid")]; + tensor input_5_strides_0 = const()[name = string("input_5_strides_0"), val = tensor([1, 1])]; + tensor input_5_pad_0 = const()[name = string("input_5_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor input_5_dilations_0 = const()[name = string("input_5_dilations_0"), val = tensor([1, 1])]; + int32 input_5_groups_0 = const()[name = string("input_5_groups_0"), val = int32(1)]; + tensor var_922_to_fp16 = const()[name = string("op_922_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(72914752)))]; + tensor input_5_cast_fp16 = conv(dilations = input_5_dilations_0, groups = input_5_groups_0, pad = input_5_pad_0, pad_type = input_5_pad_type_0, strides = input_5_strides_0, weight = var_922_to_fp16, x = var_917_cast_fp16_0)[name = string("input_5_cast_fp16")]; + tensor var_933_cast_fp16 = silu(x = input_5_cast_fp16)[name = string("op_933_cast_fp16")]; + string var_938_pad_type_0 = const()[name = string("op_938_pad_type_0"), val = string("valid")]; + tensor var_938_strides_0 = const()[name = string("op_938_strides_0"), val = tensor([1, 1])]; + tensor var_938_pad_0 = const()[name = string("op_938_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_938_dilations_0 = const()[name = string("op_938_dilations_0"), val = tensor([1, 1])]; + int32 var_938_groups_0 = const()[name = string("op_938_groups_0"), val = int32(1)]; + tensor var_921_to_fp16 = const()[name = string("op_921_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(81303424)))]; + tensor var_938_cast_fp16 = conv(dilations = var_938_dilations_0, groups = var_938_groups_0, pad = var_938_pad_0, pad_type = var_938_pad_type_0, strides = var_938_strides_0, weight = var_921_to_fp16, x = var_917_cast_fp16_0)[name = string("op_938_cast_fp16")]; + tensor x_51_cast_fp16 = mul(x = var_933_cast_fp16, y = var_938_cast_fp16)[name = string("x_51_cast_fp16")]; + string hidden_states_17_pad_type_0 = const()[name = string("hidden_states_17_pad_type_0"), val = string("valid")]; + tensor hidden_states_17_strides_0 = const()[name = string("hidden_states_17_strides_0"), val = tensor([1, 1])]; + tensor hidden_states_17_pad_0 = const()[name = string("hidden_states_17_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor hidden_states_17_dilations_0 = const()[name = string("hidden_states_17_dilations_0"), val = tensor([1, 1])]; + int32 hidden_states_17_groups_0 = const()[name = string("hidden_states_17_groups_0"), val = int32(1)]; + tensor var_920_to_fp16 = const()[name = string("op_920_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(89692096)))]; + tensor hidden_states_17_cast_fp16 = conv(dilations = hidden_states_17_dilations_0, groups = hidden_states_17_groups_0, pad = hidden_states_17_pad_0, pad_type = hidden_states_17_pad_type_0, strides = hidden_states_17_strides_0, weight = var_920_to_fp16, x = x_51_cast_fp16)[name = string("hidden_states_17_cast_fp16")]; + tensor x_53_cast_fp16 = add(x = x_43_cast_fp16, y = hidden_states_17_cast_fp16)[name = string("x_53_cast_fp16")]; + int32 var_951 = const()[name = string("op_951"), val = int32(1)]; + fp16 const_34_promoted_to_fp16 = const()[name = string("const_34_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_954_cast_fp16 = mul(x = x_53_cast_fp16, y = const_34_promoted_to_fp16)[name = string("op_954_cast_fp16")]; + bool x_55_interleave_0 = const()[name = string("x_55_interleave_0"), val = bool(false)]; + tensor x_55_cast_fp16 = concat(axis = var_951, interleave = x_55_interleave_0, values = (x_53_cast_fp16, var_954_cast_fp16))[name = string("x_55_cast_fp16")]; + tensor out_37_axes_0 = const()[name = string("out_37_axes_0"), val = tensor([1])]; + fp16 var_964_to_fp16 = const()[name = string("op_964_to_fp16"), val = fp16(0x1.5p-17)]; + tensor out_37_cast_fp16 = layer_norm(axes = out_37_axes_0, epsilon = var_964_to_fp16, x = x_55_cast_fp16)[name = string("out_37_cast_fp16")]; + tensor layer_layers_3_input_layernorm_weight_to_fp16 = const()[name = string("layer_layers_3_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(98080768)))]; + tensor out_39_cast_fp16 = mul(x = out_37_cast_fp16, y = layer_layers_3_input_layernorm_weight_to_fp16)[name = string("out_39_cast_fp16")]; + tensor var_970_split_sizes_0 = const()[name = string("op_970_split_sizes_0"), val = tensor([1024, 1024])]; + int32 var_970_axis_0 = const()[name = string("op_970_axis_0"), val = int32(1)]; + tensor var_970_cast_fp16_0, tensor var_970_cast_fp16_1 = split(axis = var_970_axis_0, split_sizes = var_970_split_sizes_0, x = out_39_cast_fp16)[name = string("op_970_cast_fp16")]; + string query_states_13_pad_type_0 = const()[name = string("query_states_13_pad_type_0"), val = string("valid")]; + tensor query_states_13_strides_0 = const()[name = string("query_states_13_strides_0"), val = tensor([1, 1])]; + tensor query_states_13_pad_0 = const()[name = string("query_states_13_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor query_states_13_dilations_0 = const()[name = string("query_states_13_dilations_0"), val = tensor([1, 1])]; + int32 query_states_13_groups_0 = const()[name = string("query_states_13_groups_0"), val = int32(1)]; + tensor var_992_to_fp16 = const()[name = string("op_992_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(98084928)))]; + tensor query_states_13_cast_fp16 = conv(dilations = query_states_13_dilations_0, groups = query_states_13_groups_0, pad = query_states_13_pad_0, pad_type = query_states_13_pad_type_0, strides = query_states_13_strides_0, weight = var_992_to_fp16, x = var_970_cast_fp16_0)[name = string("query_states_13_cast_fp16")]; + string key_states_13_pad_type_0 = const()[name = string("key_states_13_pad_type_0"), val = string("valid")]; + tensor key_states_13_strides_0 = const()[name = string("key_states_13_strides_0"), val = tensor([1, 1])]; + tensor key_states_13_pad_0 = const()[name = string("key_states_13_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor key_states_13_dilations_0 = const()[name = string("key_states_13_dilations_0"), val = tensor([1, 1])]; + int32 key_states_13_groups_0 = const()[name = string("key_states_13_groups_0"), val = int32(1)]; + tensor var_1003_to_fp16 = const()[name = string("op_1003_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(100182144)))]; + tensor key_states_13_cast_fp16 = conv(dilations = key_states_13_dilations_0, groups = key_states_13_groups_0, pad = key_states_13_pad_0, pad_type = key_states_13_pad_type_0, strides = key_states_13_strides_0, weight = var_1003_to_fp16, x = var_970_cast_fp16_0)[name = string("key_states_13_cast_fp16")]; + string value_states_13_pad_type_0 = const()[name = string("value_states_13_pad_type_0"), val = string("valid")]; + tensor value_states_13_strides_0 = const()[name = string("value_states_13_strides_0"), val = tensor([1, 1])]; + tensor value_states_13_pad_0 = const()[name = string("value_states_13_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor value_states_13_dilations_0 = const()[name = string("value_states_13_dilations_0"), val = tensor([1, 1])]; + int32 value_states_13_groups_0 = const()[name = string("value_states_13_groups_0"), val = int32(1)]; + tensor var_1014_to_fp16 = const()[name = string("op_1014_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(100444352)))]; + tensor value_states_13_cast_fp16 = conv(dilations = value_states_13_dilations_0, groups = value_states_13_groups_0, pad = value_states_13_pad_0, pad_type = value_states_13_pad_type_0, strides = value_states_13_strides_0, weight = var_1014_to_fp16, x = var_970_cast_fp16_0)[name = string("value_states_13_cast_fp16")]; + tensor var_1022 = const()[name = string("op_1022"), val = tensor([1, 16, 64, 8])]; + tensor embed_13_cast_fp16 = reshape(shape = var_1022, x = query_states_13_cast_fp16)[name = string("embed_13_cast_fp16")]; + tensor var_1026 = const()[name = string("op_1026"), val = tensor([1, 2, 64, 8])]; + tensor var_1027_cast_fp16 = reshape(shape = var_1026, x = key_states_13_cast_fp16)[name = string("op_1027_cast_fp16")]; + tensor embed_15_perm_0 = const()[name = string("embed_15_perm_0"), val = tensor([0, 1, 3, 2])]; + tensor var_1033 = const()[name = string("op_1033"), val = tensor([1, 2, 64, 8])]; + tensor var_1034_cast_fp16 = reshape(shape = var_1033, x = value_states_13_cast_fp16)[name = string("op_1034_cast_fp16")]; + tensor value_states_15_perm_0 = const()[name = string("value_states_15_perm_0"), val = tensor([0, 1, 3, 2])]; + tensor var_1038_cast_fp16 = mul(x = embed_13_cast_fp16, y = cos_1_cast_fp16)[name = string("op_1038_cast_fp16")]; + tensor var_1039_split_sizes_0 = const()[name = string("op_1039_split_sizes_0"), val = tensor([32, 32])]; + int32 var_1039_axis_0 = const()[name = string("op_1039_axis_0"), val = int32(-2)]; + tensor var_1039_cast_fp16_0, tensor var_1039_cast_fp16_1 = split(axis = var_1039_axis_0, split_sizes = var_1039_split_sizes_0, x = embed_13_cast_fp16)[name = string("op_1039_cast_fp16")]; + fp16 const_37_promoted_to_fp16 = const()[name = string("const_37_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_1041_cast_fp16 = mul(x = var_1039_cast_fp16_1, y = const_37_promoted_to_fp16)[name = string("op_1041_cast_fp16")]; + int32 var_1043 = const()[name = string("op_1043"), val = int32(-2)]; + bool var_1044_interleave_0 = const()[name = string("op_1044_interleave_0"), val = bool(false)]; + tensor var_1044_cast_fp16 = concat(axis = var_1043, interleave = var_1044_interleave_0, values = (var_1041_cast_fp16, var_1039_cast_fp16_0))[name = string("op_1044_cast_fp16")]; + tensor var_1045_cast_fp16 = mul(x = var_1044_cast_fp16, y = sin_1_cast_fp16)[name = string("op_1045_cast_fp16")]; + tensor query_states_15_cast_fp16 = add(x = var_1038_cast_fp16, y = var_1045_cast_fp16)[name = string("query_states_15_cast_fp16")]; + tensor embed_15_cast_fp16 = transpose(perm = embed_15_perm_0, x = var_1027_cast_fp16)[name = string("transpose_8")]; + tensor var_1048_cast_fp16 = mul(x = embed_15_cast_fp16, y = cos_cast_fp16)[name = string("op_1048_cast_fp16")]; + tensor var_1049_split_sizes_0 = const()[name = string("op_1049_split_sizes_0"), val = tensor([32, 32])]; + int32 var_1049_axis_0 = const()[name = string("op_1049_axis_0"), val = int32(-1)]; + tensor var_1049_cast_fp16_0, tensor var_1049_cast_fp16_1 = split(axis = var_1049_axis_0, split_sizes = var_1049_split_sizes_0, x = embed_15_cast_fp16)[name = string("op_1049_cast_fp16")]; + fp16 const_38_promoted_to_fp16 = const()[name = string("const_38_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_1051_cast_fp16 = mul(x = var_1049_cast_fp16_1, y = const_38_promoted_to_fp16)[name = string("op_1051_cast_fp16")]; + int32 var_1053 = const()[name = string("op_1053"), val = int32(-1)]; + bool var_1054_interleave_0 = const()[name = string("op_1054_interleave_0"), val = bool(false)]; + tensor var_1054_cast_fp16 = concat(axis = var_1053, interleave = var_1054_interleave_0, values = (var_1051_cast_fp16, var_1049_cast_fp16_0))[name = string("op_1054_cast_fp16")]; + tensor var_1055_cast_fp16 = mul(x = var_1054_cast_fp16, y = sin_cast_fp16)[name = string("op_1055_cast_fp16")]; + tensor key_states_15_cast_fp16 = add(x = var_1048_cast_fp16, y = var_1055_cast_fp16)[name = string("key_states_15_cast_fp16")]; + tensor expand_dims_31 = const()[name = string("expand_dims_31"), val = tensor([3])]; + tensor expand_dims_32 = const()[name = string("expand_dims_32"), val = tensor([0])]; + tensor expand_dims_34 = const()[name = string("expand_dims_34"), val = tensor([4])]; + tensor concat_27_values3_0 = const()[name = string("concat_27_values3_0"), val = tensor([0])]; + int32 concat_27_axis_0 = const()[name = string("concat_27_axis_0"), val = int32(0)]; + bool concat_27_interleave_0 = const()[name = string("concat_27_interleave_0"), val = bool(false)]; + tensor concat_27 = concat(axis = concat_27_axis_0, interleave = concat_27_interleave_0, values = (expand_dims_31, expand_dims_32, position_id, concat_27_values3_0))[name = string("concat_27")]; + tensor concat_28_values1_0 = const()[name = string("concat_28_values1_0"), val = tensor([0])]; + tensor concat_28_values3_0 = const()[name = string("concat_28_values3_0"), val = tensor([0])]; + int32 concat_28_axis_0 = const()[name = string("concat_28_axis_0"), val = int32(0)]; + bool concat_28_interleave_0 = const()[name = string("concat_28_interleave_0"), val = bool(false)]; + tensor concat_28 = concat(axis = concat_28_axis_0, interleave = concat_28_interleave_0, values = (expand_dims_34, concat_28_values1_0, var_246, concat_28_values3_0))[name = string("concat_28")]; + tensor key_cache_internal_tensor_assign_4_stride_0 = const()[name = string("key_cache_internal_tensor_assign_4_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor key_cache_internal_tensor_assign_4_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_4_begin_mask_0"), val = tensor([false, false, false, true])]; + tensor key_cache_internal_tensor_assign_4_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_4_end_mask_0"), val = tensor([false, true, false, true])]; + tensor key_cache_internal_tensor_assign_4_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_4_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor key_cache_internal_tensor_assign_4_cast_fp16 = slice_update(begin = concat_27, begin_mask = key_cache_internal_tensor_assign_4_begin_mask_0, end = concat_28, end_mask = key_cache_internal_tensor_assign_4_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_4_squeeze_mask_0, stride = key_cache_internal_tensor_assign_4_stride_0, update = key_states_15_cast_fp16, x = coreml_update_state_16)[name = string("key_cache_internal_tensor_assign_4_cast_fp16")]; + write_state(data = key_cache_internal_tensor_assign_4_cast_fp16, input = key_cache)[name = string("coreml_update_state_30_write_state")]; + tensor coreml_update_state_18 = read_state(input = key_cache)[name = string("coreml_update_state_30")]; + tensor value_cache_internal_tensor_assign_4_stride_0 = const()[name = string("value_cache_internal_tensor_assign_4_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor value_cache_internal_tensor_assign_4_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_4_begin_mask_0"), val = tensor([false, false, false, true])]; + tensor value_cache_internal_tensor_assign_4_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_4_end_mask_0"), val = tensor([false, true, false, true])]; + tensor value_cache_internal_tensor_assign_4_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_4_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor value_states_15_cast_fp16 = transpose(perm = value_states_15_perm_0, x = var_1034_cast_fp16)[name = string("transpose_7")]; + tensor value_cache_internal_tensor_assign_4_cast_fp16 = slice_update(begin = concat_27, begin_mask = value_cache_internal_tensor_assign_4_begin_mask_0, end = concat_28, end_mask = value_cache_internal_tensor_assign_4_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_4_squeeze_mask_0, stride = value_cache_internal_tensor_assign_4_stride_0, update = value_states_15_cast_fp16, x = coreml_update_state_17)[name = string("value_cache_internal_tensor_assign_4_cast_fp16")]; + write_state(data = value_cache_internal_tensor_assign_4_cast_fp16, input = value_cache)[name = string("coreml_update_state_31_write_state")]; + tensor coreml_update_state_19 = read_state(input = value_cache)[name = string("coreml_update_state_31")]; + tensor var_1098_begin_0 = const()[name = string("op_1098_begin_0"), val = tensor([3, 0, 0, 0])]; + tensor var_1098_end_0 = const()[name = string("op_1098_end_0"), val = tensor([4, 2, 2048, 64])]; + tensor var_1098_end_mask_0 = const()[name = string("op_1098_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_1098_cast_fp16 = slice_by_index(begin = var_1098_begin_0, end = var_1098_end_0, end_mask = var_1098_end_mask_0, x = coreml_update_state_18)[name = string("op_1098_cast_fp16")]; + tensor tile_6 = const()[name = string("tile_6"), val = tensor([1, 1])]; + int32 var_1101_axis_0 = const()[name = string("op_1101_axis_0"), val = int32(1)]; + tensor var_1101_cast_fp16_0, tensor var_1101_cast_fp16_1 = split(axis = var_1101_axis_0, split_sizes = tile_6, x = var_1098_cast_fp16)[name = string("op_1101_cast_fp16")]; + tensor var_1108_begin_0 = const()[name = string("op_1108_begin_0"), val = tensor([3, 0, 0, 0])]; + tensor var_1108_end_0 = const()[name = string("op_1108_end_0"), val = tensor([4, 2, 2048, 64])]; + tensor var_1108_end_mask_0 = const()[name = string("op_1108_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_1108_cast_fp16 = slice_by_index(begin = var_1108_begin_0, end = var_1108_end_0, end_mask = var_1108_end_mask_0, x = coreml_update_state_19)[name = string("op_1108_cast_fp16")]; + tensor tile_7 = const()[name = string("tile_7"), val = tensor([1, 1])]; + int32 var_1111_axis_0 = const()[name = string("op_1111_axis_0"), val = int32(1)]; + tensor var_1111_cast_fp16_0, tensor var_1111_cast_fp16_1 = split(axis = var_1111_axis_0, split_sizes = tile_7, x = var_1108_cast_fp16)[name = string("op_1111_cast_fp16")]; + tensor var_1114_split_sizes_0 = const()[name = string("op_1114_split_sizes_0"), val = tensor([8, 8])]; + int32 var_1114_axis_0 = const()[name = string("op_1114_axis_0"), val = int32(1)]; + tensor var_1114_cast_fp16_0, tensor var_1114_cast_fp16_1 = split(axis = var_1114_axis_0, split_sizes = var_1114_split_sizes_0, x = query_states_15_cast_fp16)[name = string("op_1114_cast_fp16")]; + bool attn_weights_49_transpose_x_0 = const()[name = string("attn_weights_49_transpose_x_0"), val = bool(false)]; + bool attn_weights_49_transpose_y_0 = const()[name = string("attn_weights_49_transpose_y_0"), val = bool(false)]; + tensor attn_weights_49_cast_fp16 = matmul(transpose_x = attn_weights_49_transpose_x_0, transpose_y = attn_weights_49_transpose_y_0, x = var_1101_cast_fp16_0, y = var_1114_cast_fp16_0)[name = string("attn_weights_49_cast_fp16")]; + fp16 _inversed_attn_weights_51_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_51_y_0_to_fp16"), val = fp16(0x1p-3)]; + tensor _inversed_attn_weights_51_cast_fp16 = mul(x = attn_weights_49_cast_fp16, y = _inversed_attn_weights_51_y_0_to_fp16)[name = string("_inversed_attn_weights_51_cast_fp16")]; + tensor attn_weights_53_cast_fp16 = add(x = _inversed_attn_weights_51_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_53_cast_fp16")]; + int32 var_1121 = const()[name = string("op_1121"), val = int32(2)]; + tensor attn_weights_55_cast_fp16 = softmax(axis = var_1121, x = attn_weights_53_cast_fp16)[name = string("attn_weights_55_cast_fp16")]; + bool var_1127_transpose_x_1 = const()[name = string("op_1127_transpose_x_1"), val = bool(true)]; + bool var_1127_transpose_y_1 = const()[name = string("op_1127_transpose_y_1"), val = bool(false)]; + tensor var_1127_cast_fp16 = matmul(transpose_x = var_1127_transpose_x_1, transpose_y = var_1127_transpose_y_1, x = attn_weights_55_cast_fp16, y = var_1111_cast_fp16_0)[name = string("op_1127_cast_fp16")]; + bool attn_weights_57_transpose_x_0 = const()[name = string("attn_weights_57_transpose_x_0"), val = bool(false)]; + bool attn_weights_57_transpose_y_0 = const()[name = string("attn_weights_57_transpose_y_0"), val = bool(false)]; + tensor attn_weights_57_cast_fp16 = matmul(transpose_x = attn_weights_57_transpose_x_0, transpose_y = attn_weights_57_transpose_y_0, x = var_1101_cast_fp16_1, y = var_1114_cast_fp16_1)[name = string("attn_weights_57_cast_fp16")]; + fp16 _inversed_attn_weights_59_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_59_y_0_to_fp16"), val = fp16(0x1p-3)]; + tensor _inversed_attn_weights_59_cast_fp16 = mul(x = attn_weights_57_cast_fp16, y = _inversed_attn_weights_59_y_0_to_fp16)[name = string("_inversed_attn_weights_59_cast_fp16")]; + tensor attn_weights_61_cast_fp16 = add(x = _inversed_attn_weights_59_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_61_cast_fp16")]; + int32 var_1133 = const()[name = string("op_1133"), val = int32(2)]; + tensor attn_weights_63_cast_fp16 = softmax(axis = var_1133, x = attn_weights_61_cast_fp16)[name = string("attn_weights_63_cast_fp16")]; + bool attn_output_19_transpose_x_1 = const()[name = string("attn_output_19_transpose_x_1"), val = bool(true)]; + bool attn_output_19_transpose_y_1 = const()[name = string("attn_output_19_transpose_y_1"), val = bool(false)]; + tensor attn_output_19_cast_fp16 = matmul(transpose_x = attn_output_19_transpose_x_1, transpose_y = attn_output_19_transpose_y_1, x = attn_weights_63_cast_fp16, y = var_1111_cast_fp16_1)[name = string("attn_output_19_cast_fp16")]; + int32 var_1141 = const()[name = string("op_1141"), val = int32(1)]; + bool attn_output_21_interleave_0 = const()[name = string("attn_output_21_interleave_0"), val = bool(false)]; + tensor attn_output_21_cast_fp16 = concat(axis = var_1141, interleave = attn_output_21_interleave_0, values = (var_1127_cast_fp16, attn_output_19_cast_fp16))[name = string("attn_output_21_cast_fp16")]; + tensor var_1145_perm_0 = const()[name = string("op_1145_perm_0"), val = tensor([0, 1, 3, 2])]; + tensor var_1150 = const()[name = string("op_1150"), val = tensor([1, 1024, 1, 8])]; + tensor var_1145_cast_fp16 = transpose(perm = var_1145_perm_0, x = attn_output_21_cast_fp16)[name = string("transpose_6")]; + tensor x_59_cast_fp16 = reshape(shape = var_1150, x = var_1145_cast_fp16)[name = string("x_59_cast_fp16")]; + string hidden_states_21_pad_type_0 = const()[name = string("hidden_states_21_pad_type_0"), val = string("valid")]; + tensor hidden_states_21_strides_0 = const()[name = string("hidden_states_21_strides_0"), val = tensor([1, 1])]; + tensor hidden_states_21_pad_0 = const()[name = string("hidden_states_21_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor hidden_states_21_dilations_0 = const()[name = string("hidden_states_21_dilations_0"), val = tensor([1, 1])]; + int32 hidden_states_21_groups_0 = const()[name = string("hidden_states_21_groups_0"), val = int32(1)]; + tensor var_1157_to_fp16 = const()[name = string("op_1157_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(100706560)))]; + tensor hidden_states_21_cast_fp16 = conv(dilations = hidden_states_21_dilations_0, groups = hidden_states_21_groups_0, pad = hidden_states_21_pad_0, pad_type = hidden_states_21_pad_type_0, strides = hidden_states_21_strides_0, weight = var_1157_to_fp16, x = x_59_cast_fp16)[name = string("hidden_states_21_cast_fp16")]; + tensor x_61_cast_fp16 = add(x = x_53_cast_fp16, y = hidden_states_21_cast_fp16)[name = string("x_61_cast_fp16")]; + int32 var_1169 = const()[name = string("op_1169"), val = int32(1)]; + fp16 const_43_promoted_to_fp16 = const()[name = string("const_43_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_1172_cast_fp16 = mul(x = x_61_cast_fp16, y = const_43_promoted_to_fp16)[name = string("op_1172_cast_fp16")]; + bool x_63_interleave_0 = const()[name = string("x_63_interleave_0"), val = bool(false)]; + tensor x_63_cast_fp16 = concat(axis = var_1169, interleave = x_63_interleave_0, values = (x_61_cast_fp16, var_1172_cast_fp16))[name = string("x_63_cast_fp16")]; + tensor out_43_axes_0 = const()[name = string("out_43_axes_0"), val = tensor([1])]; + fp16 var_1182_to_fp16 = const()[name = string("op_1182_to_fp16"), val = fp16(0x1.5p-17)]; + tensor out_43_cast_fp16 = layer_norm(axes = out_43_axes_0, epsilon = var_1182_to_fp16, x = x_63_cast_fp16)[name = string("out_43_cast_fp16")]; + tensor layer_layers_3_post_attention_layernorm_weight_to_fp16 = const()[name = string("layer_layers_3_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(102803776)))]; + tensor out_45_cast_fp16 = mul(x = out_43_cast_fp16, y = layer_layers_3_post_attention_layernorm_weight_to_fp16)[name = string("out_45_cast_fp16")]; + tensor var_1188_split_sizes_0 = const()[name = string("op_1188_split_sizes_0"), val = tensor([1024, 1024])]; + int32 var_1188_axis_0 = const()[name = string("op_1188_axis_0"), val = int32(1)]; + tensor var_1188_cast_fp16_0, tensor var_1188_cast_fp16_1 = split(axis = var_1188_axis_0, split_sizes = var_1188_split_sizes_0, x = out_45_cast_fp16)[name = string("op_1188_cast_fp16")]; + string input_7_pad_type_0 = const()[name = string("input_7_pad_type_0"), val = string("valid")]; + tensor input_7_strides_0 = const()[name = string("input_7_strides_0"), val = tensor([1, 1])]; + tensor input_7_pad_0 = const()[name = string("input_7_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor input_7_dilations_0 = const()[name = string("input_7_dilations_0"), val = tensor([1, 1])]; + int32 input_7_groups_0 = const()[name = string("input_7_groups_0"), val = int32(1)]; + tensor var_1193_to_fp16 = const()[name = string("op_1193_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(102807936)))]; + tensor input_7_cast_fp16 = conv(dilations = input_7_dilations_0, groups = input_7_groups_0, pad = input_7_pad_0, pad_type = input_7_pad_type_0, strides = input_7_strides_0, weight = var_1193_to_fp16, x = var_1188_cast_fp16_0)[name = string("input_7_cast_fp16")]; + tensor var_1204_cast_fp16 = silu(x = input_7_cast_fp16)[name = string("op_1204_cast_fp16")]; + string var_1209_pad_type_0 = const()[name = string("op_1209_pad_type_0"), val = string("valid")]; + tensor var_1209_strides_0 = const()[name = string("op_1209_strides_0"), val = tensor([1, 1])]; + tensor var_1209_pad_0 = const()[name = string("op_1209_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1209_dilations_0 = const()[name = string("op_1209_dilations_0"), val = tensor([1, 1])]; + int32 var_1209_groups_0 = const()[name = string("op_1209_groups_0"), val = int32(1)]; + tensor var_1192_to_fp16 = const()[name = string("op_1192_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(111196608)))]; + tensor var_1209_cast_fp16 = conv(dilations = var_1209_dilations_0, groups = var_1209_groups_0, pad = var_1209_pad_0, pad_type = var_1209_pad_type_0, strides = var_1209_strides_0, weight = var_1192_to_fp16, x = var_1188_cast_fp16_0)[name = string("op_1209_cast_fp16")]; + tensor x_69_cast_fp16 = mul(x = var_1204_cast_fp16, y = var_1209_cast_fp16)[name = string("x_69_cast_fp16")]; + string hidden_states_23_pad_type_0 = const()[name = string("hidden_states_23_pad_type_0"), val = string("valid")]; + tensor hidden_states_23_strides_0 = const()[name = string("hidden_states_23_strides_0"), val = tensor([1, 1])]; + tensor hidden_states_23_pad_0 = const()[name = string("hidden_states_23_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor hidden_states_23_dilations_0 = const()[name = string("hidden_states_23_dilations_0"), val = tensor([1, 1])]; + int32 hidden_states_23_groups_0 = const()[name = string("hidden_states_23_groups_0"), val = int32(1)]; + tensor var_1191_to_fp16 = const()[name = string("op_1191_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(119585280)))]; + tensor hidden_states_23_cast_fp16 = conv(dilations = hidden_states_23_dilations_0, groups = hidden_states_23_groups_0, pad = hidden_states_23_pad_0, pad_type = hidden_states_23_pad_type_0, strides = hidden_states_23_strides_0, weight = var_1191_to_fp16, x = x_69_cast_fp16)[name = string("hidden_states_23_cast_fp16")]; + tensor x_71_cast_fp16 = add(x = x_61_cast_fp16, y = hidden_states_23_cast_fp16)[name = string("x_71_cast_fp16")]; + int32 var_1222 = const()[name = string("op_1222"), val = int32(1)]; + fp16 const_44_promoted_to_fp16 = const()[name = string("const_44_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_1225_cast_fp16 = mul(x = x_71_cast_fp16, y = const_44_promoted_to_fp16)[name = string("op_1225_cast_fp16")]; + bool x_73_interleave_0 = const()[name = string("x_73_interleave_0"), val = bool(false)]; + tensor x_73_cast_fp16 = concat(axis = var_1222, interleave = x_73_interleave_0, values = (x_71_cast_fp16, var_1225_cast_fp16))[name = string("x_73_cast_fp16")]; + tensor out_49_axes_0 = const()[name = string("out_49_axes_0"), val = tensor([1])]; + fp16 var_1235_to_fp16 = const()[name = string("op_1235_to_fp16"), val = fp16(0x1.5p-17)]; + tensor out_49_cast_fp16 = layer_norm(axes = out_49_axes_0, epsilon = var_1235_to_fp16, x = x_73_cast_fp16)[name = string("out_49_cast_fp16")]; + tensor layer_layers_4_input_layernorm_weight_to_fp16 = const()[name = string("layer_layers_4_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(127973952)))]; + tensor out_51_cast_fp16 = mul(x = out_49_cast_fp16, y = layer_layers_4_input_layernorm_weight_to_fp16)[name = string("out_51_cast_fp16")]; + tensor var_1241_split_sizes_0 = const()[name = string("op_1241_split_sizes_0"), val = tensor([1024, 1024])]; + int32 var_1241_axis_0 = const()[name = string("op_1241_axis_0"), val = int32(1)]; + tensor var_1241_cast_fp16_0, tensor var_1241_cast_fp16_1 = split(axis = var_1241_axis_0, split_sizes = var_1241_split_sizes_0, x = out_51_cast_fp16)[name = string("op_1241_cast_fp16")]; + string query_states_17_pad_type_0 = const()[name = string("query_states_17_pad_type_0"), val = string("valid")]; + tensor query_states_17_strides_0 = const()[name = string("query_states_17_strides_0"), val = tensor([1, 1])]; + tensor query_states_17_pad_0 = const()[name = string("query_states_17_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor query_states_17_dilations_0 = const()[name = string("query_states_17_dilations_0"), val = tensor([1, 1])]; + int32 query_states_17_groups_0 = const()[name = string("query_states_17_groups_0"), val = int32(1)]; + tensor var_1263_to_fp16 = const()[name = string("op_1263_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(127978112)))]; + tensor query_states_17_cast_fp16 = conv(dilations = query_states_17_dilations_0, groups = query_states_17_groups_0, pad = query_states_17_pad_0, pad_type = query_states_17_pad_type_0, strides = query_states_17_strides_0, weight = var_1263_to_fp16, x = var_1241_cast_fp16_0)[name = string("query_states_17_cast_fp16")]; + string key_states_17_pad_type_0 = const()[name = string("key_states_17_pad_type_0"), val = string("valid")]; + tensor key_states_17_strides_0 = const()[name = string("key_states_17_strides_0"), val = tensor([1, 1])]; + tensor key_states_17_pad_0 = const()[name = string("key_states_17_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor key_states_17_dilations_0 = const()[name = string("key_states_17_dilations_0"), val = tensor([1, 1])]; + int32 key_states_17_groups_0 = const()[name = string("key_states_17_groups_0"), val = int32(1)]; + tensor var_1274_to_fp16 = const()[name = string("op_1274_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(130075328)))]; + tensor key_states_17_cast_fp16 = conv(dilations = key_states_17_dilations_0, groups = key_states_17_groups_0, pad = key_states_17_pad_0, pad_type = key_states_17_pad_type_0, strides = key_states_17_strides_0, weight = var_1274_to_fp16, x = var_1241_cast_fp16_0)[name = string("key_states_17_cast_fp16")]; + string value_states_17_pad_type_0 = const()[name = string("value_states_17_pad_type_0"), val = string("valid")]; + tensor value_states_17_strides_0 = const()[name = string("value_states_17_strides_0"), val = tensor([1, 1])]; + tensor value_states_17_pad_0 = const()[name = string("value_states_17_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor value_states_17_dilations_0 = const()[name = string("value_states_17_dilations_0"), val = tensor([1, 1])]; + int32 value_states_17_groups_0 = const()[name = string("value_states_17_groups_0"), val = int32(1)]; + tensor var_1285_to_fp16 = const()[name = string("op_1285_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(130337536)))]; + tensor value_states_17_cast_fp16 = conv(dilations = value_states_17_dilations_0, groups = value_states_17_groups_0, pad = value_states_17_pad_0, pad_type = value_states_17_pad_type_0, strides = value_states_17_strides_0, weight = var_1285_to_fp16, x = var_1241_cast_fp16_0)[name = string("value_states_17_cast_fp16")]; + tensor var_1293 = const()[name = string("op_1293"), val = tensor([1, 16, 64, 8])]; + tensor embed_17_cast_fp16 = reshape(shape = var_1293, x = query_states_17_cast_fp16)[name = string("embed_17_cast_fp16")]; + tensor var_1297 = const()[name = string("op_1297"), val = tensor([1, 2, 64, 8])]; + tensor var_1298_cast_fp16 = reshape(shape = var_1297, x = key_states_17_cast_fp16)[name = string("op_1298_cast_fp16")]; + tensor embed_19_perm_0 = const()[name = string("embed_19_perm_0"), val = tensor([0, 1, 3, 2])]; + tensor var_1304 = const()[name = string("op_1304"), val = tensor([1, 2, 64, 8])]; + tensor var_1305_cast_fp16 = reshape(shape = var_1304, x = value_states_17_cast_fp16)[name = string("op_1305_cast_fp16")]; + tensor value_states_19_perm_0 = const()[name = string("value_states_19_perm_0"), val = tensor([0, 1, 3, 2])]; + tensor var_1309_cast_fp16 = mul(x = embed_17_cast_fp16, y = cos_1_cast_fp16)[name = string("op_1309_cast_fp16")]; + tensor var_1310_split_sizes_0 = const()[name = string("op_1310_split_sizes_0"), val = tensor([32, 32])]; + int32 var_1310_axis_0 = const()[name = string("op_1310_axis_0"), val = int32(-2)]; + tensor var_1310_cast_fp16_0, tensor var_1310_cast_fp16_1 = split(axis = var_1310_axis_0, split_sizes = var_1310_split_sizes_0, x = embed_17_cast_fp16)[name = string("op_1310_cast_fp16")]; + fp16 const_47_promoted_to_fp16 = const()[name = string("const_47_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_1312_cast_fp16 = mul(x = var_1310_cast_fp16_1, y = const_47_promoted_to_fp16)[name = string("op_1312_cast_fp16")]; + int32 var_1314 = const()[name = string("op_1314"), val = int32(-2)]; + bool var_1315_interleave_0 = const()[name = string("op_1315_interleave_0"), val = bool(false)]; + tensor var_1315_cast_fp16 = concat(axis = var_1314, interleave = var_1315_interleave_0, values = (var_1312_cast_fp16, var_1310_cast_fp16_0))[name = string("op_1315_cast_fp16")]; + tensor var_1316_cast_fp16 = mul(x = var_1315_cast_fp16, y = sin_1_cast_fp16)[name = string("op_1316_cast_fp16")]; + tensor query_states_19_cast_fp16 = add(x = var_1309_cast_fp16, y = var_1316_cast_fp16)[name = string("query_states_19_cast_fp16")]; + tensor embed_19_cast_fp16 = transpose(perm = embed_19_perm_0, x = var_1298_cast_fp16)[name = string("transpose_5")]; + tensor var_1319_cast_fp16 = mul(x = embed_19_cast_fp16, y = cos_cast_fp16)[name = string("op_1319_cast_fp16")]; + tensor var_1320_split_sizes_0 = const()[name = string("op_1320_split_sizes_0"), val = tensor([32, 32])]; + int32 var_1320_axis_0 = const()[name = string("op_1320_axis_0"), val = int32(-1)]; + tensor var_1320_cast_fp16_0, tensor var_1320_cast_fp16_1 = split(axis = var_1320_axis_0, split_sizes = var_1320_split_sizes_0, x = embed_19_cast_fp16)[name = string("op_1320_cast_fp16")]; + fp16 const_48_promoted_to_fp16 = const()[name = string("const_48_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_1322_cast_fp16 = mul(x = var_1320_cast_fp16_1, y = const_48_promoted_to_fp16)[name = string("op_1322_cast_fp16")]; + int32 var_1324 = const()[name = string("op_1324"), val = int32(-1)]; + bool var_1325_interleave_0 = const()[name = string("op_1325_interleave_0"), val = bool(false)]; + tensor var_1325_cast_fp16 = concat(axis = var_1324, interleave = var_1325_interleave_0, values = (var_1322_cast_fp16, var_1320_cast_fp16_0))[name = string("op_1325_cast_fp16")]; + tensor var_1326_cast_fp16 = mul(x = var_1325_cast_fp16, y = sin_cast_fp16)[name = string("op_1326_cast_fp16")]; + tensor key_states_19_cast_fp16 = add(x = var_1319_cast_fp16, y = var_1326_cast_fp16)[name = string("key_states_19_cast_fp16")]; + tensor expand_dims_41 = const()[name = string("expand_dims_41"), val = tensor([4])]; + tensor expand_dims_42 = const()[name = string("expand_dims_42"), val = tensor([0])]; + tensor expand_dims_44 = const()[name = string("expand_dims_44"), val = tensor([5])]; + tensor concat_35_values3_0 = const()[name = string("concat_35_values3_0"), val = tensor([0])]; + int32 concat_35_axis_0 = const()[name = string("concat_35_axis_0"), val = int32(0)]; + bool concat_35_interleave_0 = const()[name = string("concat_35_interleave_0"), val = bool(false)]; + tensor concat_35 = concat(axis = concat_35_axis_0, interleave = concat_35_interleave_0, values = (expand_dims_41, expand_dims_42, position_id, concat_35_values3_0))[name = string("concat_35")]; + tensor concat_36_values1_0 = const()[name = string("concat_36_values1_0"), val = tensor([0])]; + tensor concat_36_values3_0 = const()[name = string("concat_36_values3_0"), val = tensor([0])]; + int32 concat_36_axis_0 = const()[name = string("concat_36_axis_0"), val = int32(0)]; + bool concat_36_interleave_0 = const()[name = string("concat_36_interleave_0"), val = bool(false)]; + tensor concat_36 = concat(axis = concat_36_axis_0, interleave = concat_36_interleave_0, values = (expand_dims_44, concat_36_values1_0, var_246, concat_36_values3_0))[name = string("concat_36")]; + tensor key_cache_internal_tensor_assign_5_stride_0 = const()[name = string("key_cache_internal_tensor_assign_5_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor key_cache_internal_tensor_assign_5_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_5_begin_mask_0"), val = tensor([false, false, false, true])]; + tensor key_cache_internal_tensor_assign_5_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_5_end_mask_0"), val = tensor([false, true, false, true])]; + tensor key_cache_internal_tensor_assign_5_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_5_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor key_cache_internal_tensor_assign_5_cast_fp16 = slice_update(begin = concat_35, begin_mask = key_cache_internal_tensor_assign_5_begin_mask_0, end = concat_36, end_mask = key_cache_internal_tensor_assign_5_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_5_squeeze_mask_0, stride = key_cache_internal_tensor_assign_5_stride_0, update = key_states_19_cast_fp16, x = coreml_update_state_18)[name = string("key_cache_internal_tensor_assign_5_cast_fp16")]; + write_state(data = key_cache_internal_tensor_assign_5_cast_fp16, input = key_cache)[name = string("coreml_update_state_32_write_state")]; + tensor coreml_update_state_20 = read_state(input = key_cache)[name = string("coreml_update_state_32")]; + tensor value_cache_internal_tensor_assign_5_stride_0 = const()[name = string("value_cache_internal_tensor_assign_5_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor value_cache_internal_tensor_assign_5_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_5_begin_mask_0"), val = tensor([false, false, false, true])]; + tensor value_cache_internal_tensor_assign_5_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_5_end_mask_0"), val = tensor([false, true, false, true])]; + tensor value_cache_internal_tensor_assign_5_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_5_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor value_states_19_cast_fp16 = transpose(perm = value_states_19_perm_0, x = var_1305_cast_fp16)[name = string("transpose_4")]; + tensor value_cache_internal_tensor_assign_5_cast_fp16 = slice_update(begin = concat_35, begin_mask = value_cache_internal_tensor_assign_5_begin_mask_0, end = concat_36, end_mask = value_cache_internal_tensor_assign_5_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_5_squeeze_mask_0, stride = value_cache_internal_tensor_assign_5_stride_0, update = value_states_19_cast_fp16, x = coreml_update_state_19)[name = string("value_cache_internal_tensor_assign_5_cast_fp16")]; + write_state(data = value_cache_internal_tensor_assign_5_cast_fp16, input = value_cache)[name = string("coreml_update_state_33_write_state")]; + tensor coreml_update_state_21 = read_state(input = value_cache)[name = string("coreml_update_state_33")]; + tensor var_1369_begin_0 = const()[name = string("op_1369_begin_0"), val = tensor([4, 0, 0, 0])]; + tensor var_1369_end_0 = const()[name = string("op_1369_end_0"), val = tensor([5, 2, 2048, 64])]; + tensor var_1369_end_mask_0 = const()[name = string("op_1369_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_1369_cast_fp16 = slice_by_index(begin = var_1369_begin_0, end = var_1369_end_0, end_mask = var_1369_end_mask_0, x = coreml_update_state_20)[name = string("op_1369_cast_fp16")]; + tensor tile_8 = const()[name = string("tile_8"), val = tensor([1, 1])]; + int32 var_1372_axis_0 = const()[name = string("op_1372_axis_0"), val = int32(1)]; + tensor var_1372_cast_fp16_0, tensor var_1372_cast_fp16_1 = split(axis = var_1372_axis_0, split_sizes = tile_8, x = var_1369_cast_fp16)[name = string("op_1372_cast_fp16")]; + tensor var_1379_begin_0 = const()[name = string("op_1379_begin_0"), val = tensor([4, 0, 0, 0])]; + tensor var_1379_end_0 = const()[name = string("op_1379_end_0"), val = tensor([5, 2, 2048, 64])]; + tensor var_1379_end_mask_0 = const()[name = string("op_1379_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_1379_cast_fp16 = slice_by_index(begin = var_1379_begin_0, end = var_1379_end_0, end_mask = var_1379_end_mask_0, x = coreml_update_state_21)[name = string("op_1379_cast_fp16")]; + tensor tile_9 = const()[name = string("tile_9"), val = tensor([1, 1])]; + int32 var_1382_axis_0 = const()[name = string("op_1382_axis_0"), val = int32(1)]; + tensor var_1382_cast_fp16_0, tensor var_1382_cast_fp16_1 = split(axis = var_1382_axis_0, split_sizes = tile_9, x = var_1379_cast_fp16)[name = string("op_1382_cast_fp16")]; + tensor var_1385_split_sizes_0 = const()[name = string("op_1385_split_sizes_0"), val = tensor([8, 8])]; + int32 var_1385_axis_0 = const()[name = string("op_1385_axis_0"), val = int32(1)]; + tensor var_1385_cast_fp16_0, tensor var_1385_cast_fp16_1 = split(axis = var_1385_axis_0, split_sizes = var_1385_split_sizes_0, x = query_states_19_cast_fp16)[name = string("op_1385_cast_fp16")]; + bool attn_weights_65_transpose_x_0 = const()[name = string("attn_weights_65_transpose_x_0"), val = bool(false)]; + bool attn_weights_65_transpose_y_0 = const()[name = string("attn_weights_65_transpose_y_0"), val = bool(false)]; + tensor attn_weights_65_cast_fp16 = matmul(transpose_x = attn_weights_65_transpose_x_0, transpose_y = attn_weights_65_transpose_y_0, x = var_1372_cast_fp16_0, y = var_1385_cast_fp16_0)[name = string("attn_weights_65_cast_fp16")]; + fp16 _inversed_attn_weights_67_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_67_y_0_to_fp16"), val = fp16(0x1p-3)]; + tensor _inversed_attn_weights_67_cast_fp16 = mul(x = attn_weights_65_cast_fp16, y = _inversed_attn_weights_67_y_0_to_fp16)[name = string("_inversed_attn_weights_67_cast_fp16")]; + tensor attn_weights_69_cast_fp16 = add(x = _inversed_attn_weights_67_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_69_cast_fp16")]; + int32 var_1392 = const()[name = string("op_1392"), val = int32(2)]; + tensor attn_weights_71_cast_fp16 = softmax(axis = var_1392, x = attn_weights_69_cast_fp16)[name = string("attn_weights_71_cast_fp16")]; + bool var_1398_transpose_x_1 = const()[name = string("op_1398_transpose_x_1"), val = bool(true)]; + bool var_1398_transpose_y_1 = const()[name = string("op_1398_transpose_y_1"), val = bool(false)]; + tensor var_1398_cast_fp16 = matmul(transpose_x = var_1398_transpose_x_1, transpose_y = var_1398_transpose_y_1, x = attn_weights_71_cast_fp16, y = var_1382_cast_fp16_0)[name = string("op_1398_cast_fp16")]; + bool attn_weights_73_transpose_x_0 = const()[name = string("attn_weights_73_transpose_x_0"), val = bool(false)]; + bool attn_weights_73_transpose_y_0 = const()[name = string("attn_weights_73_transpose_y_0"), val = bool(false)]; + tensor attn_weights_73_cast_fp16 = matmul(transpose_x = attn_weights_73_transpose_x_0, transpose_y = attn_weights_73_transpose_y_0, x = var_1372_cast_fp16_1, y = var_1385_cast_fp16_1)[name = string("attn_weights_73_cast_fp16")]; + fp16 _inversed_attn_weights_75_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_75_y_0_to_fp16"), val = fp16(0x1p-3)]; + tensor _inversed_attn_weights_75_cast_fp16 = mul(x = attn_weights_73_cast_fp16, y = _inversed_attn_weights_75_y_0_to_fp16)[name = string("_inversed_attn_weights_75_cast_fp16")]; + tensor attn_weights_77_cast_fp16 = add(x = _inversed_attn_weights_75_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_77_cast_fp16")]; + int32 var_1404 = const()[name = string("op_1404"), val = int32(2)]; + tensor attn_weights_79_cast_fp16 = softmax(axis = var_1404, x = attn_weights_77_cast_fp16)[name = string("attn_weights_79_cast_fp16")]; + bool attn_output_25_transpose_x_1 = const()[name = string("attn_output_25_transpose_x_1"), val = bool(true)]; + bool attn_output_25_transpose_y_1 = const()[name = string("attn_output_25_transpose_y_1"), val = bool(false)]; + tensor attn_output_25_cast_fp16 = matmul(transpose_x = attn_output_25_transpose_x_1, transpose_y = attn_output_25_transpose_y_1, x = attn_weights_79_cast_fp16, y = var_1382_cast_fp16_1)[name = string("attn_output_25_cast_fp16")]; + int32 var_1412 = const()[name = string("op_1412"), val = int32(1)]; + bool attn_output_27_interleave_0 = const()[name = string("attn_output_27_interleave_0"), val = bool(false)]; + tensor attn_output_27_cast_fp16 = concat(axis = var_1412, interleave = attn_output_27_interleave_0, values = (var_1398_cast_fp16, attn_output_25_cast_fp16))[name = string("attn_output_27_cast_fp16")]; + tensor var_1416_perm_0 = const()[name = string("op_1416_perm_0"), val = tensor([0, 1, 3, 2])]; + tensor var_1421 = const()[name = string("op_1421"), val = tensor([1, 1024, 1, 8])]; + tensor var_1416_cast_fp16 = transpose(perm = var_1416_perm_0, x = attn_output_27_cast_fp16)[name = string("transpose_3")]; + tensor x_77_cast_fp16 = reshape(shape = var_1421, x = var_1416_cast_fp16)[name = string("x_77_cast_fp16")]; + string hidden_states_27_pad_type_0 = const()[name = string("hidden_states_27_pad_type_0"), val = string("valid")]; + tensor hidden_states_27_strides_0 = const()[name = string("hidden_states_27_strides_0"), val = tensor([1, 1])]; + tensor hidden_states_27_pad_0 = const()[name = string("hidden_states_27_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor hidden_states_27_dilations_0 = const()[name = string("hidden_states_27_dilations_0"), val = tensor([1, 1])]; + int32 hidden_states_27_groups_0 = const()[name = string("hidden_states_27_groups_0"), val = int32(1)]; + tensor var_1428_to_fp16 = const()[name = string("op_1428_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(130599744)))]; + tensor hidden_states_27_cast_fp16 = conv(dilations = hidden_states_27_dilations_0, groups = hidden_states_27_groups_0, pad = hidden_states_27_pad_0, pad_type = hidden_states_27_pad_type_0, strides = hidden_states_27_strides_0, weight = var_1428_to_fp16, x = x_77_cast_fp16)[name = string("hidden_states_27_cast_fp16")]; + tensor x_79_cast_fp16 = add(x = x_71_cast_fp16, y = hidden_states_27_cast_fp16)[name = string("x_79_cast_fp16")]; + int32 var_1440 = const()[name = string("op_1440"), val = int32(1)]; + fp16 const_53_promoted_to_fp16 = const()[name = string("const_53_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_1443_cast_fp16 = mul(x = x_79_cast_fp16, y = const_53_promoted_to_fp16)[name = string("op_1443_cast_fp16")]; + bool x_81_interleave_0 = const()[name = string("x_81_interleave_0"), val = bool(false)]; + tensor x_81_cast_fp16 = concat(axis = var_1440, interleave = x_81_interleave_0, values = (x_79_cast_fp16, var_1443_cast_fp16))[name = string("x_81_cast_fp16")]; + tensor out_55_axes_0 = const()[name = string("out_55_axes_0"), val = tensor([1])]; + fp16 var_1453_to_fp16 = const()[name = string("op_1453_to_fp16"), val = fp16(0x1.5p-17)]; + tensor out_55_cast_fp16 = layer_norm(axes = out_55_axes_0, epsilon = var_1453_to_fp16, x = x_81_cast_fp16)[name = string("out_55_cast_fp16")]; + tensor layer_layers_4_post_attention_layernorm_weight_to_fp16 = const()[name = string("layer_layers_4_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(132696960)))]; + tensor out_57_cast_fp16 = mul(x = out_55_cast_fp16, y = layer_layers_4_post_attention_layernorm_weight_to_fp16)[name = string("out_57_cast_fp16")]; + tensor var_1459_split_sizes_0 = const()[name = string("op_1459_split_sizes_0"), val = tensor([1024, 1024])]; + int32 var_1459_axis_0 = const()[name = string("op_1459_axis_0"), val = int32(1)]; + tensor var_1459_cast_fp16_0, tensor var_1459_cast_fp16_1 = split(axis = var_1459_axis_0, split_sizes = var_1459_split_sizes_0, x = out_57_cast_fp16)[name = string("op_1459_cast_fp16")]; + string input_9_pad_type_0 = const()[name = string("input_9_pad_type_0"), val = string("valid")]; + tensor input_9_strides_0 = const()[name = string("input_9_strides_0"), val = tensor([1, 1])]; + tensor input_9_pad_0 = const()[name = string("input_9_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor input_9_dilations_0 = const()[name = string("input_9_dilations_0"), val = tensor([1, 1])]; + int32 input_9_groups_0 = const()[name = string("input_9_groups_0"), val = int32(1)]; + tensor var_1464_to_fp16 = const()[name = string("op_1464_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(132701120)))]; + tensor input_9_cast_fp16 = conv(dilations = input_9_dilations_0, groups = input_9_groups_0, pad = input_9_pad_0, pad_type = input_9_pad_type_0, strides = input_9_strides_0, weight = var_1464_to_fp16, x = var_1459_cast_fp16_0)[name = string("input_9_cast_fp16")]; + tensor var_1475_cast_fp16 = silu(x = input_9_cast_fp16)[name = string("op_1475_cast_fp16")]; + string var_1480_pad_type_0 = const()[name = string("op_1480_pad_type_0"), val = string("valid")]; + tensor var_1480_strides_0 = const()[name = string("op_1480_strides_0"), val = tensor([1, 1])]; + tensor var_1480_pad_0 = const()[name = string("op_1480_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1480_dilations_0 = const()[name = string("op_1480_dilations_0"), val = tensor([1, 1])]; + int32 var_1480_groups_0 = const()[name = string("op_1480_groups_0"), val = int32(1)]; + tensor var_1463_to_fp16 = const()[name = string("op_1463_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(141089792)))]; + tensor var_1480_cast_fp16 = conv(dilations = var_1480_dilations_0, groups = var_1480_groups_0, pad = var_1480_pad_0, pad_type = var_1480_pad_type_0, strides = var_1480_strides_0, weight = var_1463_to_fp16, x = var_1459_cast_fp16_0)[name = string("op_1480_cast_fp16")]; + tensor x_87_cast_fp16 = mul(x = var_1475_cast_fp16, y = var_1480_cast_fp16)[name = string("x_87_cast_fp16")]; + string hidden_states_29_pad_type_0 = const()[name = string("hidden_states_29_pad_type_0"), val = string("valid")]; + tensor hidden_states_29_strides_0 = const()[name = string("hidden_states_29_strides_0"), val = tensor([1, 1])]; + tensor hidden_states_29_pad_0 = const()[name = string("hidden_states_29_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor hidden_states_29_dilations_0 = const()[name = string("hidden_states_29_dilations_0"), val = tensor([1, 1])]; + int32 hidden_states_29_groups_0 = const()[name = string("hidden_states_29_groups_0"), val = int32(1)]; + tensor var_1462_to_fp16 = const()[name = string("op_1462_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(149478464)))]; + tensor hidden_states_29_cast_fp16 = conv(dilations = hidden_states_29_dilations_0, groups = hidden_states_29_groups_0, pad = hidden_states_29_pad_0, pad_type = hidden_states_29_pad_type_0, strides = hidden_states_29_strides_0, weight = var_1462_to_fp16, x = x_87_cast_fp16)[name = string("hidden_states_29_cast_fp16")]; + tensor x_89_cast_fp16 = add(x = x_79_cast_fp16, y = hidden_states_29_cast_fp16)[name = string("x_89_cast_fp16")]; + int32 var_1493 = const()[name = string("op_1493"), val = int32(1)]; + fp16 const_54_promoted_to_fp16 = const()[name = string("const_54_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_1496_cast_fp16 = mul(x = x_89_cast_fp16, y = const_54_promoted_to_fp16)[name = string("op_1496_cast_fp16")]; + bool x_91_interleave_0 = const()[name = string("x_91_interleave_0"), val = bool(false)]; + tensor x_91_cast_fp16 = concat(axis = var_1493, interleave = x_91_interleave_0, values = (x_89_cast_fp16, var_1496_cast_fp16))[name = string("x_91_cast_fp16")]; + tensor out_61_axes_0 = const()[name = string("out_61_axes_0"), val = tensor([1])]; + fp16 var_1506_to_fp16 = const()[name = string("op_1506_to_fp16"), val = fp16(0x1.5p-17)]; + tensor out_61_cast_fp16 = layer_norm(axes = out_61_axes_0, epsilon = var_1506_to_fp16, x = x_91_cast_fp16)[name = string("out_61_cast_fp16")]; + tensor layer_layers_5_input_layernorm_weight_to_fp16 = const()[name = string("layer_layers_5_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(157867136)))]; + tensor out_63_cast_fp16 = mul(x = out_61_cast_fp16, y = layer_layers_5_input_layernorm_weight_to_fp16)[name = string("out_63_cast_fp16")]; + tensor var_1512_split_sizes_0 = const()[name = string("op_1512_split_sizes_0"), val = tensor([1024, 1024])]; + int32 var_1512_axis_0 = const()[name = string("op_1512_axis_0"), val = int32(1)]; + tensor var_1512_cast_fp16_0, tensor var_1512_cast_fp16_1 = split(axis = var_1512_axis_0, split_sizes = var_1512_split_sizes_0, x = out_63_cast_fp16)[name = string("op_1512_cast_fp16")]; + string query_states_21_pad_type_0 = const()[name = string("query_states_21_pad_type_0"), val = string("valid")]; + tensor query_states_21_strides_0 = const()[name = string("query_states_21_strides_0"), val = tensor([1, 1])]; + tensor query_states_21_pad_0 = const()[name = string("query_states_21_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor query_states_21_dilations_0 = const()[name = string("query_states_21_dilations_0"), val = tensor([1, 1])]; + int32 query_states_21_groups_0 = const()[name = string("query_states_21_groups_0"), val = int32(1)]; + tensor var_1534_to_fp16 = const()[name = string("op_1534_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(157871296)))]; + tensor query_states_21_cast_fp16 = conv(dilations = query_states_21_dilations_0, groups = query_states_21_groups_0, pad = query_states_21_pad_0, pad_type = query_states_21_pad_type_0, strides = query_states_21_strides_0, weight = var_1534_to_fp16, x = var_1512_cast_fp16_0)[name = string("query_states_21_cast_fp16")]; + string key_states_21_pad_type_0 = const()[name = string("key_states_21_pad_type_0"), val = string("valid")]; + tensor key_states_21_strides_0 = const()[name = string("key_states_21_strides_0"), val = tensor([1, 1])]; + tensor key_states_21_pad_0 = const()[name = string("key_states_21_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor key_states_21_dilations_0 = const()[name = string("key_states_21_dilations_0"), val = tensor([1, 1])]; + int32 key_states_21_groups_0 = const()[name = string("key_states_21_groups_0"), val = int32(1)]; + tensor var_1545_to_fp16 = const()[name = string("op_1545_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(159968512)))]; + tensor key_states_21_cast_fp16 = conv(dilations = key_states_21_dilations_0, groups = key_states_21_groups_0, pad = key_states_21_pad_0, pad_type = key_states_21_pad_type_0, strides = key_states_21_strides_0, weight = var_1545_to_fp16, x = var_1512_cast_fp16_0)[name = string("key_states_21_cast_fp16")]; + string value_states_21_pad_type_0 = const()[name = string("value_states_21_pad_type_0"), val = string("valid")]; + tensor value_states_21_strides_0 = const()[name = string("value_states_21_strides_0"), val = tensor([1, 1])]; + tensor value_states_21_pad_0 = const()[name = string("value_states_21_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor value_states_21_dilations_0 = const()[name = string("value_states_21_dilations_0"), val = tensor([1, 1])]; + int32 value_states_21_groups_0 = const()[name = string("value_states_21_groups_0"), val = int32(1)]; + tensor var_1556_to_fp16 = const()[name = string("op_1556_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(160230720)))]; + tensor value_states_21_cast_fp16 = conv(dilations = value_states_21_dilations_0, groups = value_states_21_groups_0, pad = value_states_21_pad_0, pad_type = value_states_21_pad_type_0, strides = value_states_21_strides_0, weight = var_1556_to_fp16, x = var_1512_cast_fp16_0)[name = string("value_states_21_cast_fp16")]; + tensor var_1564 = const()[name = string("op_1564"), val = tensor([1, 16, 64, 8])]; + tensor embed_21_cast_fp16 = reshape(shape = var_1564, x = query_states_21_cast_fp16)[name = string("embed_21_cast_fp16")]; + tensor var_1568 = const()[name = string("op_1568"), val = tensor([1, 2, 64, 8])]; + tensor var_1569_cast_fp16 = reshape(shape = var_1568, x = key_states_21_cast_fp16)[name = string("op_1569_cast_fp16")]; + tensor embed_perm_0 = const()[name = string("embed_perm_0"), val = tensor([0, 1, 3, 2])]; + tensor var_1575 = const()[name = string("op_1575"), val = tensor([1, 2, 64, 8])]; + tensor var_1576_cast_fp16 = reshape(shape = var_1575, x = value_states_21_cast_fp16)[name = string("op_1576_cast_fp16")]; + tensor value_states_perm_0 = const()[name = string("value_states_perm_0"), val = tensor([0, 1, 3, 2])]; + tensor var_1580_cast_fp16 = mul(x = embed_21_cast_fp16, y = cos_1_cast_fp16)[name = string("op_1580_cast_fp16")]; + tensor var_1581_split_sizes_0 = const()[name = string("op_1581_split_sizes_0"), val = tensor([32, 32])]; + int32 var_1581_axis_0 = const()[name = string("op_1581_axis_0"), val = int32(-2)]; + tensor var_1581_cast_fp16_0, tensor var_1581_cast_fp16_1 = split(axis = var_1581_axis_0, split_sizes = var_1581_split_sizes_0, x = embed_21_cast_fp16)[name = string("op_1581_cast_fp16")]; + fp16 const_57_promoted_to_fp16 = const()[name = string("const_57_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_1583_cast_fp16 = mul(x = var_1581_cast_fp16_1, y = const_57_promoted_to_fp16)[name = string("op_1583_cast_fp16")]; + int32 var_1585 = const()[name = string("op_1585"), val = int32(-2)]; + bool var_1586_interleave_0 = const()[name = string("op_1586_interleave_0"), val = bool(false)]; + tensor var_1586_cast_fp16 = concat(axis = var_1585, interleave = var_1586_interleave_0, values = (var_1583_cast_fp16, var_1581_cast_fp16_0))[name = string("op_1586_cast_fp16")]; + tensor var_1587_cast_fp16 = mul(x = var_1586_cast_fp16, y = sin_1_cast_fp16)[name = string("op_1587_cast_fp16")]; + tensor query_states_cast_fp16 = add(x = var_1580_cast_fp16, y = var_1587_cast_fp16)[name = string("query_states_cast_fp16")]; + tensor embed_cast_fp16 = transpose(perm = embed_perm_0, x = var_1569_cast_fp16)[name = string("transpose_2")]; + tensor var_1590_cast_fp16 = mul(x = embed_cast_fp16, y = cos_cast_fp16)[name = string("op_1590_cast_fp16")]; + tensor var_1591_split_sizes_0 = const()[name = string("op_1591_split_sizes_0"), val = tensor([32, 32])]; + int32 var_1591_axis_0 = const()[name = string("op_1591_axis_0"), val = int32(-1)]; + tensor var_1591_cast_fp16_0, tensor var_1591_cast_fp16_1 = split(axis = var_1591_axis_0, split_sizes = var_1591_split_sizes_0, x = embed_cast_fp16)[name = string("op_1591_cast_fp16")]; + fp16 const_58_promoted_to_fp16 = const()[name = string("const_58_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_1593_cast_fp16 = mul(x = var_1591_cast_fp16_1, y = const_58_promoted_to_fp16)[name = string("op_1593_cast_fp16")]; + int32 var_1595 = const()[name = string("op_1595"), val = int32(-1)]; + bool var_1596_interleave_0 = const()[name = string("op_1596_interleave_0"), val = bool(false)]; + tensor var_1596_cast_fp16 = concat(axis = var_1595, interleave = var_1596_interleave_0, values = (var_1593_cast_fp16, var_1591_cast_fp16_0))[name = string("op_1596_cast_fp16")]; + tensor var_1597_cast_fp16 = mul(x = var_1596_cast_fp16, y = sin_cast_fp16)[name = string("op_1597_cast_fp16")]; + tensor key_states_cast_fp16 = add(x = var_1590_cast_fp16, y = var_1597_cast_fp16)[name = string("key_states_cast_fp16")]; + tensor expand_dims_51 = const()[name = string("expand_dims_51"), val = tensor([5])]; + tensor expand_dims_52 = const()[name = string("expand_dims_52"), val = tensor([0])]; + tensor expand_dims_54 = const()[name = string("expand_dims_54"), val = tensor([6])]; + tensor concat_43_values3_0 = const()[name = string("concat_43_values3_0"), val = tensor([0])]; + int32 concat_43_axis_0 = const()[name = string("concat_43_axis_0"), val = int32(0)]; + bool concat_43_interleave_0 = const()[name = string("concat_43_interleave_0"), val = bool(false)]; + tensor concat_43 = concat(axis = concat_43_axis_0, interleave = concat_43_interleave_0, values = (expand_dims_51, expand_dims_52, position_id, concat_43_values3_0))[name = string("concat_43")]; + tensor concat_44_values1_0 = const()[name = string("concat_44_values1_0"), val = tensor([0])]; + tensor concat_44_values3_0 = const()[name = string("concat_44_values3_0"), val = tensor([0])]; + int32 concat_44_axis_0 = const()[name = string("concat_44_axis_0"), val = int32(0)]; + bool concat_44_interleave_0 = const()[name = string("concat_44_interleave_0"), val = bool(false)]; + tensor concat_44 = concat(axis = concat_44_axis_0, interleave = concat_44_interleave_0, values = (expand_dims_54, concat_44_values1_0, var_246, concat_44_values3_0))[name = string("concat_44")]; + tensor key_cache_internal_tensor_assign_6_stride_0 = const()[name = string("key_cache_internal_tensor_assign_6_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor key_cache_internal_tensor_assign_6_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_6_begin_mask_0"), val = tensor([false, false, false, true])]; + tensor key_cache_internal_tensor_assign_6_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_6_end_mask_0"), val = tensor([false, true, false, true])]; + tensor key_cache_internal_tensor_assign_6_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_6_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor key_cache_internal_tensor_assign_6_cast_fp16 = slice_update(begin = concat_43, begin_mask = key_cache_internal_tensor_assign_6_begin_mask_0, end = concat_44, end_mask = key_cache_internal_tensor_assign_6_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_6_squeeze_mask_0, stride = key_cache_internal_tensor_assign_6_stride_0, update = key_states_cast_fp16, x = coreml_update_state_20)[name = string("key_cache_internal_tensor_assign_6_cast_fp16")]; + write_state(data = key_cache_internal_tensor_assign_6_cast_fp16, input = key_cache)[name = string("coreml_update_state_34_write_state")]; + tensor coreml_update_state_22 = read_state(input = key_cache)[name = string("coreml_update_state_34")]; + tensor value_cache_internal_tensor_assign_6_stride_0 = const()[name = string("value_cache_internal_tensor_assign_6_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor value_cache_internal_tensor_assign_6_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_6_begin_mask_0"), val = tensor([false, false, false, true])]; + tensor value_cache_internal_tensor_assign_6_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_6_end_mask_0"), val = tensor([false, true, false, true])]; + tensor value_cache_internal_tensor_assign_6_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_6_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor value_states_cast_fp16 = transpose(perm = value_states_perm_0, x = var_1576_cast_fp16)[name = string("transpose_1")]; + tensor value_cache_internal_tensor_assign_6_cast_fp16 = slice_update(begin = concat_43, begin_mask = value_cache_internal_tensor_assign_6_begin_mask_0, end = concat_44, end_mask = value_cache_internal_tensor_assign_6_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_6_squeeze_mask_0, stride = value_cache_internal_tensor_assign_6_stride_0, update = value_states_cast_fp16, x = coreml_update_state_21)[name = string("value_cache_internal_tensor_assign_6_cast_fp16")]; + write_state(data = value_cache_internal_tensor_assign_6_cast_fp16, input = value_cache)[name = string("coreml_update_state_35_write_state")]; + tensor coreml_update_state_23 = read_state(input = value_cache)[name = string("coreml_update_state_35")]; + tensor var_1640_begin_0 = const()[name = string("op_1640_begin_0"), val = tensor([5, 0, 0, 0])]; + tensor var_1640_end_0 = const()[name = string("op_1640_end_0"), val = tensor([1, 2, 2048, 64])]; + tensor var_1640_end_mask_0 = const()[name = string("op_1640_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_1640_cast_fp16 = slice_by_index(begin = var_1640_begin_0, end = var_1640_end_0, end_mask = var_1640_end_mask_0, x = coreml_update_state_22)[name = string("op_1640_cast_fp16")]; + tensor tile_10 = const()[name = string("tile_10"), val = tensor([1, 1])]; + int32 var_1643_axis_0 = const()[name = string("op_1643_axis_0"), val = int32(1)]; + tensor var_1643_cast_fp16_0, tensor var_1643_cast_fp16_1 = split(axis = var_1643_axis_0, split_sizes = tile_10, x = var_1640_cast_fp16)[name = string("op_1643_cast_fp16")]; + tensor var_1650_begin_0 = const()[name = string("op_1650_begin_0"), val = tensor([5, 0, 0, 0])]; + tensor var_1650_end_0 = const()[name = string("op_1650_end_0"), val = tensor([1, 2, 2048, 64])]; + tensor var_1650_end_mask_0 = const()[name = string("op_1650_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_1650_cast_fp16 = slice_by_index(begin = var_1650_begin_0, end = var_1650_end_0, end_mask = var_1650_end_mask_0, x = coreml_update_state_23)[name = string("op_1650_cast_fp16")]; + tensor tile_11 = const()[name = string("tile_11"), val = tensor([1, 1])]; + int32 var_1653_axis_0 = const()[name = string("op_1653_axis_0"), val = int32(1)]; + tensor var_1653_cast_fp16_0, tensor var_1653_cast_fp16_1 = split(axis = var_1653_axis_0, split_sizes = tile_11, x = var_1650_cast_fp16)[name = string("op_1653_cast_fp16")]; + tensor var_1656_split_sizes_0 = const()[name = string("op_1656_split_sizes_0"), val = tensor([8, 8])]; + int32 var_1656_axis_0 = const()[name = string("op_1656_axis_0"), val = int32(1)]; + tensor var_1656_cast_fp16_0, tensor var_1656_cast_fp16_1 = split(axis = var_1656_axis_0, split_sizes = var_1656_split_sizes_0, x = query_states_cast_fp16)[name = string("op_1656_cast_fp16")]; + bool attn_weights_81_transpose_x_0 = const()[name = string("attn_weights_81_transpose_x_0"), val = bool(false)]; + bool attn_weights_81_transpose_y_0 = const()[name = string("attn_weights_81_transpose_y_0"), val = bool(false)]; + tensor attn_weights_81_cast_fp16 = matmul(transpose_x = attn_weights_81_transpose_x_0, transpose_y = attn_weights_81_transpose_y_0, x = var_1643_cast_fp16_0, y = var_1656_cast_fp16_0)[name = string("attn_weights_81_cast_fp16")]; + fp16 _inversed_attn_weights_83_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_83_y_0_to_fp16"), val = fp16(0x1p-3)]; + tensor _inversed_attn_weights_83_cast_fp16 = mul(x = attn_weights_81_cast_fp16, y = _inversed_attn_weights_83_y_0_to_fp16)[name = string("_inversed_attn_weights_83_cast_fp16")]; + tensor attn_weights_85_cast_fp16 = add(x = _inversed_attn_weights_83_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_85_cast_fp16")]; + int32 var_1663 = const()[name = string("op_1663"), val = int32(2)]; + tensor attn_weights_87_cast_fp16 = softmax(axis = var_1663, x = attn_weights_85_cast_fp16)[name = string("attn_weights_87_cast_fp16")]; + bool var_1669_transpose_x_1 = const()[name = string("op_1669_transpose_x_1"), val = bool(true)]; + bool var_1669_transpose_y_1 = const()[name = string("op_1669_transpose_y_1"), val = bool(false)]; + tensor var_1669_cast_fp16 = matmul(transpose_x = var_1669_transpose_x_1, transpose_y = var_1669_transpose_y_1, x = attn_weights_87_cast_fp16, y = var_1653_cast_fp16_0)[name = string("op_1669_cast_fp16")]; + bool attn_weights_89_transpose_x_0 = const()[name = string("attn_weights_89_transpose_x_0"), val = bool(false)]; + bool attn_weights_89_transpose_y_0 = const()[name = string("attn_weights_89_transpose_y_0"), val = bool(false)]; + tensor attn_weights_89_cast_fp16 = matmul(transpose_x = attn_weights_89_transpose_x_0, transpose_y = attn_weights_89_transpose_y_0, x = var_1643_cast_fp16_1, y = var_1656_cast_fp16_1)[name = string("attn_weights_89_cast_fp16")]; + fp16 _inversed_attn_weights_91_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_91_y_0_to_fp16"), val = fp16(0x1p-3)]; + tensor _inversed_attn_weights_91_cast_fp16 = mul(x = attn_weights_89_cast_fp16, y = _inversed_attn_weights_91_y_0_to_fp16)[name = string("_inversed_attn_weights_91_cast_fp16")]; + tensor attn_weights_93_cast_fp16 = add(x = _inversed_attn_weights_91_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_93_cast_fp16")]; + int32 var_1675 = const()[name = string("op_1675"), val = int32(2)]; + tensor attn_weights_cast_fp16 = softmax(axis = var_1675, x = attn_weights_93_cast_fp16)[name = string("attn_weights_cast_fp16")]; + bool attn_output_31_transpose_x_1 = const()[name = string("attn_output_31_transpose_x_1"), val = bool(true)]; + bool attn_output_31_transpose_y_1 = const()[name = string("attn_output_31_transpose_y_1"), val = bool(false)]; + tensor attn_output_31_cast_fp16 = matmul(transpose_x = attn_output_31_transpose_x_1, transpose_y = attn_output_31_transpose_y_1, x = attn_weights_cast_fp16, y = var_1653_cast_fp16_1)[name = string("attn_output_31_cast_fp16")]; + int32 var_1683 = const()[name = string("op_1683"), val = int32(1)]; + bool attn_output_33_interleave_0 = const()[name = string("attn_output_33_interleave_0"), val = bool(false)]; + tensor attn_output_33_cast_fp16 = concat(axis = var_1683, interleave = attn_output_33_interleave_0, values = (var_1669_cast_fp16, attn_output_31_cast_fp16))[name = string("attn_output_33_cast_fp16")]; + tensor var_1687_perm_0 = const()[name = string("op_1687_perm_0"), val = tensor([0, 1, 3, 2])]; + tensor var_1692 = const()[name = string("op_1692"), val = tensor([1, 1024, 1, 8])]; + tensor var_1687_cast_fp16 = transpose(perm = var_1687_perm_0, x = attn_output_33_cast_fp16)[name = string("transpose_0")]; + tensor x_95_cast_fp16 = reshape(shape = var_1692, x = var_1687_cast_fp16)[name = string("x_95_cast_fp16")]; + string hidden_states_33_pad_type_0 = const()[name = string("hidden_states_33_pad_type_0"), val = string("valid")]; + tensor hidden_states_33_strides_0 = const()[name = string("hidden_states_33_strides_0"), val = tensor([1, 1])]; + tensor hidden_states_33_pad_0 = const()[name = string("hidden_states_33_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor hidden_states_33_dilations_0 = const()[name = string("hidden_states_33_dilations_0"), val = tensor([1, 1])]; + int32 hidden_states_33_groups_0 = const()[name = string("hidden_states_33_groups_0"), val = int32(1)]; + tensor var_1699_to_fp16 = const()[name = string("op_1699_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(160492928)))]; + tensor hidden_states_33_cast_fp16 = conv(dilations = hidden_states_33_dilations_0, groups = hidden_states_33_groups_0, pad = hidden_states_33_pad_0, pad_type = hidden_states_33_pad_type_0, strides = hidden_states_33_strides_0, weight = var_1699_to_fp16, x = x_95_cast_fp16)[name = string("hidden_states_33_cast_fp16")]; + tensor x_97_cast_fp16 = add(x = x_89_cast_fp16, y = hidden_states_33_cast_fp16)[name = string("x_97_cast_fp16")]; + int32 var_1711 = const()[name = string("op_1711"), val = int32(1)]; + fp16 const_63_promoted_to_fp16 = const()[name = string("const_63_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_1714_cast_fp16 = mul(x = x_97_cast_fp16, y = const_63_promoted_to_fp16)[name = string("op_1714_cast_fp16")]; + bool x_99_interleave_0 = const()[name = string("x_99_interleave_0"), val = bool(false)]; + tensor x_99_cast_fp16 = concat(axis = var_1711, interleave = x_99_interleave_0, values = (x_97_cast_fp16, var_1714_cast_fp16))[name = string("x_99_cast_fp16")]; + tensor out_67_axes_0 = const()[name = string("out_67_axes_0"), val = tensor([1])]; + fp16 var_1724_to_fp16 = const()[name = string("op_1724_to_fp16"), val = fp16(0x1.5p-17)]; + tensor out_67_cast_fp16 = layer_norm(axes = out_67_axes_0, epsilon = var_1724_to_fp16, x = x_99_cast_fp16)[name = string("out_67_cast_fp16")]; + tensor layer_layers_5_post_attention_layernorm_weight_to_fp16 = const()[name = string("layer_layers_5_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(162590144)))]; + tensor out_69_cast_fp16 = mul(x = out_67_cast_fp16, y = layer_layers_5_post_attention_layernorm_weight_to_fp16)[name = string("out_69_cast_fp16")]; + tensor var_1730_split_sizes_0 = const()[name = string("op_1730_split_sizes_0"), val = tensor([1024, 1024])]; + int32 var_1730_axis_0 = const()[name = string("op_1730_axis_0"), val = int32(1)]; + tensor var_1730_cast_fp16_0, tensor var_1730_cast_fp16_1 = split(axis = var_1730_axis_0, split_sizes = var_1730_split_sizes_0, x = out_69_cast_fp16)[name = string("op_1730_cast_fp16")]; + string input_pad_type_0 = const()[name = string("input_pad_type_0"), val = string("valid")]; + tensor input_strides_0 = const()[name = string("input_strides_0"), val = tensor([1, 1])]; + tensor input_pad_0 = const()[name = string("input_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor input_dilations_0 = const()[name = string("input_dilations_0"), val = tensor([1, 1])]; + int32 input_groups_0 = const()[name = string("input_groups_0"), val = int32(1)]; + tensor var_1735_to_fp16 = const()[name = string("op_1735_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(162594304)))]; + tensor input_cast_fp16 = conv(dilations = input_dilations_0, groups = input_groups_0, pad = input_pad_0, pad_type = input_pad_type_0, strides = input_strides_0, weight = var_1735_to_fp16, x = var_1730_cast_fp16_0)[name = string("input_cast_fp16")]; + tensor var_1746_cast_fp16 = silu(x = input_cast_fp16)[name = string("op_1746_cast_fp16")]; + string var_1751_pad_type_0 = const()[name = string("op_1751_pad_type_0"), val = string("valid")]; + tensor var_1751_strides_0 = const()[name = string("op_1751_strides_0"), val = tensor([1, 1])]; + tensor var_1751_pad_0 = const()[name = string("op_1751_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1751_dilations_0 = const()[name = string("op_1751_dilations_0"), val = tensor([1, 1])]; + int32 var_1751_groups_0 = const()[name = string("op_1751_groups_0"), val = int32(1)]; + tensor var_1734_to_fp16 = const()[name = string("op_1734_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(170982976)))]; + tensor var_1751_cast_fp16 = conv(dilations = var_1751_dilations_0, groups = var_1751_groups_0, pad = var_1751_pad_0, pad_type = var_1751_pad_type_0, strides = var_1751_strides_0, weight = var_1734_to_fp16, x = var_1730_cast_fp16_0)[name = string("op_1751_cast_fp16")]; + tensor x_105_cast_fp16 = mul(x = var_1746_cast_fp16, y = var_1751_cast_fp16)[name = string("x_105_cast_fp16")]; + string hidden_states_pad_type_0 = const()[name = string("hidden_states_pad_type_0"), val = string("valid")]; + tensor hidden_states_strides_0 = const()[name = string("hidden_states_strides_0"), val = tensor([1, 1])]; + tensor hidden_states_pad_0 = const()[name = string("hidden_states_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor hidden_states_dilations_0 = const()[name = string("hidden_states_dilations_0"), val = tensor([1, 1])]; + int32 hidden_states_groups_0 = const()[name = string("hidden_states_groups_0"), val = int32(1)]; + tensor var_1733_to_fp16 = const()[name = string("op_1733_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(179371648)))]; + tensor hidden_states_cast_fp16 = conv(dilations = hidden_states_dilations_0, groups = hidden_states_groups_0, pad = hidden_states_pad_0, pad_type = hidden_states_pad_type_0, strides = hidden_states_strides_0, weight = var_1733_to_fp16, x = x_105_cast_fp16)[name = string("hidden_states_cast_fp16")]; + tensor x_107_cast_fp16 = add(x = x_97_cast_fp16, y = hidden_states_cast_fp16)[name = string("x_107_cast_fp16")]; + int32 var_1764 = const()[name = string("op_1764"), val = int32(1)]; + fp16 const_64_promoted_to_fp16 = const()[name = string("const_64_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_1767_cast_fp16 = mul(x = x_107_cast_fp16, y = const_64_promoted_to_fp16)[name = string("op_1767_cast_fp16")]; + bool x_109_interleave_0 = const()[name = string("x_109_interleave_0"), val = bool(false)]; + tensor x_109_cast_fp16 = concat(axis = var_1764, interleave = x_109_interleave_0, values = (x_107_cast_fp16, var_1767_cast_fp16))[name = string("x_109_cast_fp16")]; + tensor out_73_axes_0 = const()[name = string("out_73_axes_0"), val = tensor([1])]; + fp16 var_1777_to_fp16 = const()[name = string("op_1777_to_fp16"), val = fp16(0x1.5p-17)]; + tensor out_73_cast_fp16 = layer_norm(axes = out_73_axes_0, epsilon = var_1777_to_fp16, x = x_109_cast_fp16)[name = string("out_73_cast_fp16")]; + tensor layer_norm_weight_to_fp16 = const()[name = string("layer_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(187760320)))]; + tensor out_75_cast_fp16 = mul(x = out_73_cast_fp16, y = layer_norm_weight_to_fp16)[name = string("out_75_cast_fp16")]; + tensor var_1783_split_sizes_0 = const()[name = string("op_1783_split_sizes_0"), val = tensor([1024, 1024])]; + int32 var_1783_axis_0 = const()[name = string("op_1783_axis_0"), val = int32(1)]; + tensor output, tensor var_1783_cast_fp16_1 = split(axis = var_1783_axis_0, split_sizes = var_1783_split_sizes_0, x = out_75_cast_fp16)[name = string("op_1783_cast_fp16")]; + } -> (output); +} \ No newline at end of file