program(1.3) [buildInfo = dict({{"coremlc-component-MIL", "3405.2.1"}, {"coremlc-version", "3405.2.1"}})] { func length_1(tensor inputs_embeds, state> key_cache, tensor position_id, state> value_cache) { tensor var_260 = const()[name = string("op_260"), val = tensor([[0]])]; tensor position_ids_1 = add(x = var_260, y = position_id)[name = string("position_ids_1")]; int32 cos_batch_dims_0 = const()[name = string("cos_batch_dims_0"), val = int32(0)]; bool cos_validate_indices_0 = const()[name = string("cos_validate_indices_0"), val = bool(false)]; tensor layer_rope_emb_cos_cached_to_fp16 = const()[name = string("layer_rope_emb_cos_cached_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64)))]; int32 greater_equal_0_y_0 = const()[name = string("greater_equal_0_y_0"), val = int32(0)]; tensor greater_equal_0 = greater_equal(x = position_ids_1, y = greater_equal_0_y_0)[name = string("greater_equal_0")]; int32 slice_by_index_0 = const()[name = string("slice_by_index_0"), val = int32(32768)]; tensor add_0 = add(x = position_ids_1, y = slice_by_index_0)[name = string("add_0")]; tensor select_0 = select(a = position_ids_1, b = add_0, cond = greater_equal_0)[name = string("select_0")]; int32 cos_cast_fp16_axis_0 = const()[name = string("cos_cast_fp16_axis_0"), val = int32(0)]; tensor cos_cast_fp16 = gather(axis = cos_cast_fp16_axis_0, batch_dims = cos_batch_dims_0, indices = select_0, validate_indices = cos_validate_indices_0, x = layer_rope_emb_cos_cached_to_fp16)[name = string("cos_cast_fp16")]; int32 sin_batch_dims_0 = const()[name = string("sin_batch_dims_0"), val = int32(0)]; bool sin_validate_indices_0 = const()[name = string("sin_validate_indices_0"), val = bool(false)]; tensor layer_rope_emb_sin_cached_to_fp16 = const()[name = string("layer_rope_emb_sin_cached_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(4194432)))]; int32 sin_cast_fp16_axis_0 = const()[name = string("sin_cast_fp16_axis_0"), val = int32(0)]; tensor sin_cast_fp16 = gather(axis = sin_cast_fp16_axis_0, batch_dims = sin_batch_dims_0, indices = select_0, validate_indices = sin_validate_indices_0, x = layer_rope_emb_sin_cached_to_fp16)[name = string("sin_cast_fp16")]; tensor var_285 = const()[name = string("op_285"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(8388800)))]; tensor var_292_axes_0 = const()[name = string("op_292_axes_0"), val = tensor([2])]; tensor var_292 = expand_dims(axes = var_292_axes_0, x = position_ids_1)[name = string("op_292")]; tensor var_293 = greater(x = var_285, y = var_292)[name = string("op_293")]; tensor attention_mask_3_axes_0 = const()[name = string("attention_mask_3_axes_0"), val = tensor([1])]; string attention_mask_1_to_fp16_dtype_0 = const()[name = string("attention_mask_1_to_fp16_dtype_0"), val = string("fp16")]; tensor var_293_to_fp16 = cast(dtype = attention_mask_1_to_fp16_dtype_0, x = var_293)[name = string("cast_245")]; tensor attention_mask_3_cast_fp16 = expand_dims(axes = attention_mask_3_axes_0, x = var_293_to_fp16)[name = string("attention_mask_3_cast_fp16")]; fp16 var_301_promoted_to_fp16 = const()[name = string("op_301_promoted_to_fp16"), val = fp16(0x0p+0)]; tensor var_302_cast_fp16 = equal(x = attention_mask_3_cast_fp16, y = var_301_promoted_to_fp16)[name = string("op_302_cast_fp16")]; tensor var_303_after_broadcast_to_fp16 = const()[name = string("op_303_after_broadcast_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(8397056)))]; tensor attention_mask_cast_fp16 = select(a = attention_mask_3_cast_fp16, b = var_303_after_broadcast_to_fp16, cond = var_302_cast_fp16)[name = string("attention_mask_cast_fp16")]; tensor cos_1_perm_0 = const()[name = string("cos_1_perm_0"), val = tensor([0, -1, -2])]; tensor sin_1_perm_0 = const()[name = string("sin_1_perm_0"), val = tensor([0, -1, -2])]; tensor attn_mask_perm_0 = const()[name = string("attn_mask_perm_0"), val = tensor([0, 1, -1, -2])]; int32 var_318 = const()[name = string("op_318"), val = int32(1)]; fp16 const_4_promoted_to_fp16 = const()[name = string("const_4_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_321_cast_fp16 = mul(x = inputs_embeds, y = const_4_promoted_to_fp16)[name = string("op_321_cast_fp16")]; bool x_1_interleave_0 = const()[name = string("x_1_interleave_0"), val = bool(false)]; tensor x_1_cast_fp16 = concat(axis = var_318, interleave = x_1_interleave_0, values = (inputs_embeds, var_321_cast_fp16))[name = string("x_1_cast_fp16")]; tensor out_1_axes_0 = const()[name = string("out_1_axes_0"), val = tensor([1])]; fp16 var_331_to_fp16 = const()[name = string("op_331_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_1_cast_fp16 = layer_norm(axes = out_1_axes_0, epsilon = var_331_to_fp16, x = x_1_cast_fp16)[name = string("out_1_cast_fp16")]; tensor layer_layers_0_input_layernorm_weight_to_fp16 = const()[name = string("layer_layers_0_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(8401216)))]; tensor out_3_cast_fp16 = mul(x = out_1_cast_fp16, y = layer_layers_0_input_layernorm_weight_to_fp16)[name = string("out_3_cast_fp16")]; tensor var_337_split_sizes_0 = const()[name = string("op_337_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_337_axis_0 = const()[name = string("op_337_axis_0"), val = int32(1)]; tensor var_337_cast_fp16_0, tensor var_337_cast_fp16_1 = split(axis = var_337_axis_0, split_sizes = var_337_split_sizes_0, x = out_3_cast_fp16)[name = string("op_337_cast_fp16")]; tensor q_len_1 = const()[name = string("q_len_1"), val = tensor([1])]; string query_states_1_pad_type_0 = const()[name = string("query_states_1_pad_type_0"), val = string("valid")]; tensor query_states_1_strides_0 = const()[name = string("query_states_1_strides_0"), val = tensor([1, 1])]; tensor query_states_1_pad_0 = const()[name = string("query_states_1_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_states_1_dilations_0 = const()[name = string("query_states_1_dilations_0"), val = tensor([1, 1])]; int32 query_states_1_groups_0 = const()[name = string("query_states_1_groups_0"), val = int32(1)]; tensor var_359_to_fp16 = const()[name = string("op_359_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(8405376)))]; tensor query_states_1_cast_fp16 = conv(dilations = query_states_1_dilations_0, groups = query_states_1_groups_0, pad = query_states_1_pad_0, pad_type = query_states_1_pad_type_0, strides = query_states_1_strides_0, weight = var_359_to_fp16, x = var_337_cast_fp16_0)[name = string("query_states_1_cast_fp16")]; string key_states_1_pad_type_0 = const()[name = string("key_states_1_pad_type_0"), val = string("valid")]; tensor key_states_1_strides_0 = const()[name = string("key_states_1_strides_0"), val = tensor([1, 1])]; tensor key_states_1_pad_0 = const()[name = string("key_states_1_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_states_1_dilations_0 = const()[name = string("key_states_1_dilations_0"), val = tensor([1, 1])]; int32 key_states_1_groups_0 = const()[name = string("key_states_1_groups_0"), val = int32(1)]; tensor var_370_to_fp16 = const()[name = string("op_370_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(10502592)))]; tensor key_states_1_cast_fp16 = conv(dilations = key_states_1_dilations_0, groups = key_states_1_groups_0, pad = key_states_1_pad_0, pad_type = key_states_1_pad_type_0, strides = key_states_1_strides_0, weight = var_370_to_fp16, x = var_337_cast_fp16_0)[name = string("key_states_1_cast_fp16")]; string value_states_1_pad_type_0 = const()[name = string("value_states_1_pad_type_0"), val = string("valid")]; tensor value_states_1_strides_0 = const()[name = string("value_states_1_strides_0"), val = tensor([1, 1])]; tensor value_states_1_pad_0 = const()[name = string("value_states_1_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_states_1_dilations_0 = const()[name = string("value_states_1_dilations_0"), val = tensor([1, 1])]; int32 value_states_1_groups_0 = const()[name = string("value_states_1_groups_0"), val = int32(1)]; tensor var_381_to_fp16 = const()[name = string("op_381_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(10764800)))]; tensor value_states_1_cast_fp16 = conv(dilations = value_states_1_dilations_0, groups = value_states_1_groups_0, pad = value_states_1_pad_0, pad_type = value_states_1_pad_type_0, strides = value_states_1_strides_0, weight = var_381_to_fp16, x = var_337_cast_fp16_0)[name = string("value_states_1_cast_fp16")]; tensor var_389 = const()[name = string("op_389"), val = tensor([1, 16, 64, 1])]; tensor embed_1_cast_fp16 = reshape(shape = var_389, x = query_states_1_cast_fp16)[name = string("embed_1_cast_fp16")]; tensor var_393 = const()[name = string("op_393"), val = tensor([1, 2, 64, 1])]; tensor var_394_cast_fp16 = reshape(shape = var_393, x = key_states_1_cast_fp16)[name = string("op_394_cast_fp16")]; tensor embed_3_perm_0 = const()[name = string("embed_3_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_400 = const()[name = string("op_400"), val = tensor([1, 2, 64, 1])]; tensor var_401_cast_fp16 = reshape(shape = var_400, x = value_states_1_cast_fp16)[name = string("op_401_cast_fp16")]; tensor value_states_3_perm_0 = const()[name = string("value_states_3_perm_0"), val = tensor([0, 1, 3, 2])]; tensor cos_1_cast_fp16 = transpose(perm = cos_1_perm_0, x = cos_cast_fp16)[name = string("transpose_74")]; tensor var_405_cast_fp16 = mul(x = embed_1_cast_fp16, y = cos_1_cast_fp16)[name = string("op_405_cast_fp16")]; tensor var_406_split_sizes_0 = const()[name = string("op_406_split_sizes_0"), val = tensor([32, 32])]; int32 var_406_axis_0 = const()[name = string("op_406_axis_0"), val = int32(-2)]; tensor var_406_cast_fp16_0, tensor var_406_cast_fp16_1 = split(axis = var_406_axis_0, split_sizes = var_406_split_sizes_0, x = embed_1_cast_fp16)[name = string("op_406_cast_fp16")]; fp16 const_7_promoted_to_fp16 = const()[name = string("const_7_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_408_cast_fp16 = mul(x = var_406_cast_fp16_1, y = const_7_promoted_to_fp16)[name = string("op_408_cast_fp16")]; int32 var_410 = const()[name = string("op_410"), val = int32(-2)]; bool var_411_interleave_0 = const()[name = string("op_411_interleave_0"), val = bool(false)]; tensor var_411_cast_fp16 = concat(axis = var_410, interleave = var_411_interleave_0, values = (var_408_cast_fp16, var_406_cast_fp16_0))[name = string("op_411_cast_fp16")]; tensor sin_1_cast_fp16 = transpose(perm = sin_1_perm_0, x = sin_cast_fp16)[name = string("transpose_73")]; tensor var_412_cast_fp16 = mul(x = var_411_cast_fp16, y = sin_1_cast_fp16)[name = string("op_412_cast_fp16")]; tensor query_states_3_cast_fp16 = add(x = var_405_cast_fp16, y = var_412_cast_fp16)[name = string("query_states_3_cast_fp16")]; tensor embed_3_cast_fp16 = transpose(perm = embed_3_perm_0, x = var_394_cast_fp16)[name = string("transpose_71")]; tensor var_415_cast_fp16 = mul(x = embed_3_cast_fp16, y = cos_cast_fp16)[name = string("op_415_cast_fp16")]; tensor var_416_split_sizes_0 = const()[name = string("op_416_split_sizes_0"), val = tensor([32, 32])]; int32 var_416_axis_0 = const()[name = string("op_416_axis_0"), val = int32(-1)]; tensor var_416_cast_fp16_0, tensor var_416_cast_fp16_1 = split(axis = var_416_axis_0, split_sizes = var_416_split_sizes_0, x = embed_3_cast_fp16)[name = string("op_416_cast_fp16")]; fp16 const_8_promoted_to_fp16 = const()[name = string("const_8_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_418_cast_fp16 = mul(x = var_416_cast_fp16_1, y = const_8_promoted_to_fp16)[name = string("op_418_cast_fp16")]; int32 var_420 = const()[name = string("op_420"), val = int32(-1)]; bool var_421_interleave_0 = const()[name = string("op_421_interleave_0"), val = bool(false)]; tensor var_421_cast_fp16 = concat(axis = var_420, interleave = var_421_interleave_0, values = (var_418_cast_fp16, var_416_cast_fp16_0))[name = string("op_421_cast_fp16")]; tensor var_422_cast_fp16 = mul(x = var_421_cast_fp16, y = sin_cast_fp16)[name = string("op_422_cast_fp16")]; tensor key_states_3_cast_fp16 = add(x = var_415_cast_fp16, y = var_422_cast_fp16)[name = string("key_states_3_cast_fp16")]; tensor var_426 = add(x = position_id, y = q_len_1)[name = string("op_426")]; tensor read_state_0 = read_state(input = key_cache)[name = string("read_state_0")]; tensor expand_dims_1 = const()[name = string("expand_dims_1"), val = tensor([0])]; tensor expand_dims_2 = const()[name = string("expand_dims_2"), val = tensor([0])]; tensor expand_dims_4 = const()[name = string("expand_dims_4"), val = tensor([1])]; tensor concat_3_values3_0 = const()[name = string("concat_3_values3_0"), val = tensor([0])]; int32 concat_3_axis_0 = const()[name = string("concat_3_axis_0"), val = int32(0)]; bool concat_3_interleave_0 = const()[name = string("concat_3_interleave_0"), val = bool(false)]; tensor concat_3 = concat(axis = concat_3_axis_0, interleave = concat_3_interleave_0, values = (expand_dims_1, expand_dims_2, position_id, concat_3_values3_0))[name = string("concat_3")]; tensor concat_4_values1_0 = const()[name = string("concat_4_values1_0"), val = tensor([0])]; tensor concat_4_values3_0 = const()[name = string("concat_4_values3_0"), val = tensor([0])]; int32 concat_4_axis_0 = const()[name = string("concat_4_axis_0"), val = int32(0)]; bool concat_4_interleave_0 = const()[name = string("concat_4_interleave_0"), val = bool(false)]; tensor concat_4 = concat(axis = concat_4_axis_0, interleave = concat_4_interleave_0, values = (expand_dims_4, concat_4_values1_0, var_426, concat_4_values3_0))[name = string("concat_4")]; tensor key_cache_internal_tensor_assign_1_stride_0 = const()[name = string("key_cache_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1, 1])]; tensor key_cache_internal_tensor_assign_1_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false, true])]; tensor key_cache_internal_tensor_assign_1_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_1_end_mask_0"), val = tensor([false, true, false, true])]; tensor key_cache_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor key_cache_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_3, begin_mask = key_cache_internal_tensor_assign_1_begin_mask_0, end = concat_4, end_mask = key_cache_internal_tensor_assign_1_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_1_squeeze_mask_0, stride = key_cache_internal_tensor_assign_1_stride_0, update = key_states_3_cast_fp16, x = read_state_0)[name = string("key_cache_internal_tensor_assign_1_cast_fp16")]; write_state(data = key_cache_internal_tensor_assign_1_cast_fp16, input = key_cache)[name = string("coreml_update_state_166_write_state")]; tensor coreml_update_state_48 = read_state(input = key_cache)[name = string("coreml_update_state_166")]; tensor read_state_1 = read_state(input = value_cache)[name = string("read_state_1")]; tensor value_cache_internal_tensor_assign_1_stride_0 = const()[name = string("value_cache_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1, 1])]; tensor value_cache_internal_tensor_assign_1_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false, true])]; tensor value_cache_internal_tensor_assign_1_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_1_end_mask_0"), val = tensor([false, true, false, true])]; tensor value_cache_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor value_states_3_cast_fp16 = transpose(perm = value_states_3_perm_0, x = var_401_cast_fp16)[name = string("transpose_70")]; tensor value_cache_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_3, begin_mask = value_cache_internal_tensor_assign_1_begin_mask_0, end = concat_4, end_mask = value_cache_internal_tensor_assign_1_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_1_squeeze_mask_0, stride = value_cache_internal_tensor_assign_1_stride_0, update = value_states_3_cast_fp16, x = read_state_1)[name = string("value_cache_internal_tensor_assign_1_cast_fp16")]; write_state(data = value_cache_internal_tensor_assign_1_cast_fp16, input = value_cache)[name = string("coreml_update_state_167_write_state")]; tensor coreml_update_state_49 = read_state(input = value_cache)[name = string("coreml_update_state_167")]; tensor var_465_begin_0 = const()[name = string("op_465_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_465_end_0 = const()[name = string("op_465_end_0"), val = tensor([1, 2, 2048, 64])]; tensor var_465_end_mask_0 = const()[name = string("op_465_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_465_cast_fp16 = slice_by_index(begin = var_465_begin_0, end = var_465_end_0, end_mask = var_465_end_mask_0, x = coreml_update_state_48)[name = string("op_465_cast_fp16")]; tensor tile_0 = const()[name = string("tile_0"), val = tensor([1, 1])]; int32 var_468_axis_0 = const()[name = string("op_468_axis_0"), val = int32(1)]; tensor var_468_cast_fp16_0, tensor var_468_cast_fp16_1 = split(axis = var_468_axis_0, split_sizes = tile_0, x = var_465_cast_fp16)[name = string("op_468_cast_fp16")]; tensor var_475_begin_0 = const()[name = string("op_475_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_475_end_0 = const()[name = string("op_475_end_0"), val = tensor([1, 2, 2048, 64])]; tensor var_475_end_mask_0 = const()[name = string("op_475_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_475_cast_fp16 = slice_by_index(begin = var_475_begin_0, end = var_475_end_0, end_mask = var_475_end_mask_0, x = coreml_update_state_49)[name = string("op_475_cast_fp16")]; tensor tile_1 = const()[name = string("tile_1"), val = tensor([1, 1])]; int32 var_478_axis_0 = const()[name = string("op_478_axis_0"), val = int32(1)]; tensor var_478_cast_fp16_0, tensor var_478_cast_fp16_1 = split(axis = var_478_axis_0, split_sizes = tile_1, x = var_475_cast_fp16)[name = string("op_478_cast_fp16")]; tensor var_481_split_sizes_0 = const()[name = string("op_481_split_sizes_0"), val = tensor([8, 8])]; int32 var_481_axis_0 = const()[name = string("op_481_axis_0"), val = int32(1)]; tensor var_481_cast_fp16_0, tensor var_481_cast_fp16_1 = split(axis = var_481_axis_0, split_sizes = var_481_split_sizes_0, x = query_states_3_cast_fp16)[name = string("op_481_cast_fp16")]; bool attn_weights_1_transpose_x_0 = const()[name = string("attn_weights_1_transpose_x_0"), val = bool(false)]; bool attn_weights_1_transpose_y_0 = const()[name = string("attn_weights_1_transpose_y_0"), val = bool(false)]; tensor attn_weights_1_cast_fp16 = matmul(transpose_x = attn_weights_1_transpose_x_0, transpose_y = attn_weights_1_transpose_y_0, x = var_468_cast_fp16_0, y = var_481_cast_fp16_0)[name = string("attn_weights_1_cast_fp16")]; fp16 _inversed_attn_weights_3_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_3_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_3_cast_fp16 = mul(x = attn_weights_1_cast_fp16, y = _inversed_attn_weights_3_y_0_to_fp16)[name = string("_inversed_attn_weights_3_cast_fp16")]; tensor attn_mask_cast_fp16 = transpose(perm = attn_mask_perm_0, x = attention_mask_cast_fp16)[name = string("transpose_72")]; tensor attn_weights_5_cast_fp16 = add(x = _inversed_attn_weights_3_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_5_cast_fp16")]; int32 var_488 = const()[name = string("op_488"), val = int32(2)]; tensor attn_weights_7_cast_fp16 = softmax(axis = var_488, x = attn_weights_5_cast_fp16)[name = string("attn_weights_7_cast_fp16")]; bool var_494_transpose_x_1 = const()[name = string("op_494_transpose_x_1"), val = bool(true)]; bool var_494_transpose_y_1 = const()[name = string("op_494_transpose_y_1"), val = bool(false)]; tensor var_494_cast_fp16 = matmul(transpose_x = var_494_transpose_x_1, transpose_y = var_494_transpose_y_1, x = attn_weights_7_cast_fp16, y = var_478_cast_fp16_0)[name = string("op_494_cast_fp16")]; bool attn_weights_9_transpose_x_0 = const()[name = string("attn_weights_9_transpose_x_0"), val = bool(false)]; bool attn_weights_9_transpose_y_0 = const()[name = string("attn_weights_9_transpose_y_0"), val = bool(false)]; tensor attn_weights_9_cast_fp16 = matmul(transpose_x = attn_weights_9_transpose_x_0, transpose_y = attn_weights_9_transpose_y_0, x = var_468_cast_fp16_1, y = var_481_cast_fp16_1)[name = string("attn_weights_9_cast_fp16")]; fp16 _inversed_attn_weights_11_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_11_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_11_cast_fp16 = mul(x = attn_weights_9_cast_fp16, y = _inversed_attn_weights_11_y_0_to_fp16)[name = string("_inversed_attn_weights_11_cast_fp16")]; tensor attn_weights_13_cast_fp16 = add(x = _inversed_attn_weights_11_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_13_cast_fp16")]; int32 var_500 = const()[name = string("op_500"), val = int32(2)]; tensor attn_weights_15_cast_fp16 = softmax(axis = var_500, x = attn_weights_13_cast_fp16)[name = string("attn_weights_15_cast_fp16")]; bool attn_output_1_transpose_x_1 = const()[name = string("attn_output_1_transpose_x_1"), val = bool(true)]; bool attn_output_1_transpose_y_1 = const()[name = string("attn_output_1_transpose_y_1"), val = bool(false)]; tensor attn_output_1_cast_fp16 = matmul(transpose_x = attn_output_1_transpose_x_1, transpose_y = attn_output_1_transpose_y_1, x = attn_weights_15_cast_fp16, y = var_478_cast_fp16_1)[name = string("attn_output_1_cast_fp16")]; int32 var_508 = const()[name = string("op_508"), val = int32(1)]; bool attn_output_3_interleave_0 = const()[name = string("attn_output_3_interleave_0"), val = bool(false)]; tensor attn_output_3_cast_fp16 = concat(axis = var_508, interleave = attn_output_3_interleave_0, values = (var_494_cast_fp16, attn_output_1_cast_fp16))[name = string("attn_output_3_cast_fp16")]; tensor var_512_perm_0 = const()[name = string("op_512_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_517 = const()[name = string("op_517"), val = tensor([1, 1024, 1, 1])]; tensor var_512_cast_fp16 = transpose(perm = var_512_perm_0, x = attn_output_3_cast_fp16)[name = string("transpose_69")]; tensor x_5_cast_fp16 = reshape(shape = var_517, x = var_512_cast_fp16)[name = string("x_5_cast_fp16")]; string hidden_states_3_pad_type_0 = const()[name = string("hidden_states_3_pad_type_0"), val = string("valid")]; tensor hidden_states_3_strides_0 = const()[name = string("hidden_states_3_strides_0"), val = tensor([1, 1])]; tensor hidden_states_3_pad_0 = const()[name = string("hidden_states_3_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_3_dilations_0 = const()[name = string("hidden_states_3_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_3_groups_0 = const()[name = string("hidden_states_3_groups_0"), val = int32(1)]; tensor var_524_to_fp16 = const()[name = string("op_524_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(11027008)))]; tensor hidden_states_3_cast_fp16 = conv(dilations = hidden_states_3_dilations_0, groups = hidden_states_3_groups_0, pad = hidden_states_3_pad_0, pad_type = hidden_states_3_pad_type_0, strides = hidden_states_3_strides_0, weight = var_524_to_fp16, x = x_5_cast_fp16)[name = string("hidden_states_3_cast_fp16")]; tensor x_7_cast_fp16 = add(x = inputs_embeds, y = hidden_states_3_cast_fp16)[name = string("x_7_cast_fp16")]; int32 var_536 = const()[name = string("op_536"), val = int32(1)]; fp16 const_13_promoted_to_fp16 = const()[name = string("const_13_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_539_cast_fp16 = mul(x = x_7_cast_fp16, y = const_13_promoted_to_fp16)[name = string("op_539_cast_fp16")]; bool x_9_interleave_0 = const()[name = string("x_9_interleave_0"), val = bool(false)]; tensor x_9_cast_fp16 = concat(axis = var_536, interleave = x_9_interleave_0, values = (x_7_cast_fp16, var_539_cast_fp16))[name = string("x_9_cast_fp16")]; tensor out_7_axes_0 = const()[name = string("out_7_axes_0"), val = tensor([1])]; fp16 var_549_to_fp16 = const()[name = string("op_549_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_7_cast_fp16 = layer_norm(axes = out_7_axes_0, epsilon = var_549_to_fp16, x = x_9_cast_fp16)[name = string("out_7_cast_fp16")]; tensor layer_layers_0_post_attention_layernorm_weight_to_fp16 = const()[name = string("layer_layers_0_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(13124224)))]; tensor out_9_cast_fp16 = mul(x = out_7_cast_fp16, y = layer_layers_0_post_attention_layernorm_weight_to_fp16)[name = string("out_9_cast_fp16")]; tensor var_555_split_sizes_0 = const()[name = string("op_555_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_555_axis_0 = const()[name = string("op_555_axis_0"), val = int32(1)]; tensor var_555_cast_fp16_0, tensor var_555_cast_fp16_1 = split(axis = var_555_axis_0, split_sizes = var_555_split_sizes_0, x = out_9_cast_fp16)[name = string("op_555_cast_fp16")]; string input_1_pad_type_0 = const()[name = string("input_1_pad_type_0"), val = string("valid")]; tensor input_1_strides_0 = const()[name = string("input_1_strides_0"), val = tensor([1, 1])]; tensor input_1_pad_0 = const()[name = string("input_1_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_1_dilations_0 = const()[name = string("input_1_dilations_0"), val = tensor([1, 1])]; int32 input_1_groups_0 = const()[name = string("input_1_groups_0"), val = int32(1)]; tensor var_560_to_fp16 = const()[name = string("op_560_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(13128384)))]; tensor input_1_cast_fp16 = conv(dilations = input_1_dilations_0, groups = input_1_groups_0, pad = input_1_pad_0, pad_type = input_1_pad_type_0, strides = input_1_strides_0, weight = var_560_to_fp16, x = var_555_cast_fp16_0)[name = string("input_1_cast_fp16")]; tensor var_571_cast_fp16 = silu(x = input_1_cast_fp16)[name = string("op_571_cast_fp16")]; string var_576_pad_type_0 = const()[name = string("op_576_pad_type_0"), val = string("valid")]; tensor var_576_strides_0 = const()[name = string("op_576_strides_0"), val = tensor([1, 1])]; tensor var_576_pad_0 = const()[name = string("op_576_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_576_dilations_0 = const()[name = string("op_576_dilations_0"), val = tensor([1, 1])]; int32 var_576_groups_0 = const()[name = string("op_576_groups_0"), val = int32(1)]; tensor var_559_to_fp16 = const()[name = string("op_559_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(21517056)))]; tensor var_576_cast_fp16 = conv(dilations = var_576_dilations_0, groups = var_576_groups_0, pad = var_576_pad_0, pad_type = var_576_pad_type_0, strides = var_576_strides_0, weight = var_559_to_fp16, x = var_555_cast_fp16_0)[name = string("op_576_cast_fp16")]; tensor x_15_cast_fp16 = mul(x = var_571_cast_fp16, y = var_576_cast_fp16)[name = string("x_15_cast_fp16")]; string hidden_states_5_pad_type_0 = const()[name = string("hidden_states_5_pad_type_0"), val = string("valid")]; tensor hidden_states_5_strides_0 = const()[name = string("hidden_states_5_strides_0"), val = tensor([1, 1])]; tensor hidden_states_5_pad_0 = const()[name = string("hidden_states_5_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_5_dilations_0 = const()[name = string("hidden_states_5_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_5_groups_0 = const()[name = string("hidden_states_5_groups_0"), val = int32(1)]; tensor var_558_to_fp16 = const()[name = string("op_558_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(29905728)))]; tensor hidden_states_5_cast_fp16 = conv(dilations = hidden_states_5_dilations_0, groups = hidden_states_5_groups_0, pad = hidden_states_5_pad_0, pad_type = hidden_states_5_pad_type_0, strides = hidden_states_5_strides_0, weight = var_558_to_fp16, x = x_15_cast_fp16)[name = string("hidden_states_5_cast_fp16")]; tensor x_17_cast_fp16 = add(x = x_7_cast_fp16, y = hidden_states_5_cast_fp16)[name = string("x_17_cast_fp16")]; int32 var_589 = const()[name = string("op_589"), val = int32(1)]; fp16 const_14_promoted_to_fp16 = const()[name = string("const_14_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_592_cast_fp16 = mul(x = x_17_cast_fp16, y = const_14_promoted_to_fp16)[name = string("op_592_cast_fp16")]; bool x_19_interleave_0 = const()[name = string("x_19_interleave_0"), val = bool(false)]; tensor x_19_cast_fp16 = concat(axis = var_589, interleave = x_19_interleave_0, values = (x_17_cast_fp16, var_592_cast_fp16))[name = string("x_19_cast_fp16")]; tensor out_13_axes_0 = const()[name = string("out_13_axes_0"), val = tensor([1])]; fp16 var_602_to_fp16 = const()[name = string("op_602_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_13_cast_fp16 = layer_norm(axes = out_13_axes_0, epsilon = var_602_to_fp16, x = x_19_cast_fp16)[name = string("out_13_cast_fp16")]; tensor layer_layers_1_input_layernorm_weight_to_fp16 = const()[name = string("layer_layers_1_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(38294400)))]; tensor out_15_cast_fp16 = mul(x = out_13_cast_fp16, y = layer_layers_1_input_layernorm_weight_to_fp16)[name = string("out_15_cast_fp16")]; tensor var_608_split_sizes_0 = const()[name = string("op_608_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_608_axis_0 = const()[name = string("op_608_axis_0"), val = int32(1)]; tensor var_608_cast_fp16_0, tensor var_608_cast_fp16_1 = split(axis = var_608_axis_0, split_sizes = var_608_split_sizes_0, x = out_15_cast_fp16)[name = string("op_608_cast_fp16")]; string query_states_5_pad_type_0 = const()[name = string("query_states_5_pad_type_0"), val = string("valid")]; tensor query_states_5_strides_0 = const()[name = string("query_states_5_strides_0"), val = tensor([1, 1])]; tensor query_states_5_pad_0 = const()[name = string("query_states_5_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_states_5_dilations_0 = const()[name = string("query_states_5_dilations_0"), val = tensor([1, 1])]; int32 query_states_5_groups_0 = const()[name = string("query_states_5_groups_0"), val = int32(1)]; tensor var_630_to_fp16 = const()[name = string("op_630_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(38298560)))]; tensor query_states_5_cast_fp16 = conv(dilations = query_states_5_dilations_0, groups = query_states_5_groups_0, pad = query_states_5_pad_0, pad_type = query_states_5_pad_type_0, strides = query_states_5_strides_0, weight = var_630_to_fp16, x = var_608_cast_fp16_0)[name = string("query_states_5_cast_fp16")]; string key_states_5_pad_type_0 = const()[name = string("key_states_5_pad_type_0"), val = string("valid")]; tensor key_states_5_strides_0 = const()[name = string("key_states_5_strides_0"), val = tensor([1, 1])]; tensor key_states_5_pad_0 = const()[name = string("key_states_5_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_states_5_dilations_0 = const()[name = string("key_states_5_dilations_0"), val = tensor([1, 1])]; int32 key_states_5_groups_0 = const()[name = string("key_states_5_groups_0"), val = int32(1)]; tensor var_641_to_fp16 = const()[name = string("op_641_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(40395776)))]; tensor key_states_5_cast_fp16 = conv(dilations = key_states_5_dilations_0, groups = key_states_5_groups_0, pad = key_states_5_pad_0, pad_type = key_states_5_pad_type_0, strides = key_states_5_strides_0, weight = var_641_to_fp16, x = var_608_cast_fp16_0)[name = string("key_states_5_cast_fp16")]; string value_states_5_pad_type_0 = const()[name = string("value_states_5_pad_type_0"), val = string("valid")]; tensor value_states_5_strides_0 = const()[name = string("value_states_5_strides_0"), val = tensor([1, 1])]; tensor value_states_5_pad_0 = const()[name = string("value_states_5_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_states_5_dilations_0 = const()[name = string("value_states_5_dilations_0"), val = tensor([1, 1])]; int32 value_states_5_groups_0 = const()[name = string("value_states_5_groups_0"), val = int32(1)]; tensor var_652_to_fp16 = const()[name = string("op_652_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(40657984)))]; tensor value_states_5_cast_fp16 = conv(dilations = value_states_5_dilations_0, groups = value_states_5_groups_0, pad = value_states_5_pad_0, pad_type = value_states_5_pad_type_0, strides = value_states_5_strides_0, weight = var_652_to_fp16, x = var_608_cast_fp16_0)[name = string("value_states_5_cast_fp16")]; tensor var_660 = const()[name = string("op_660"), val = tensor([1, 16, 64, 1])]; tensor embed_5_cast_fp16 = reshape(shape = var_660, x = query_states_5_cast_fp16)[name = string("embed_5_cast_fp16")]; tensor var_664 = const()[name = string("op_664"), val = tensor([1, 2, 64, 1])]; tensor var_665_cast_fp16 = reshape(shape = var_664, x = key_states_5_cast_fp16)[name = string("op_665_cast_fp16")]; tensor embed_7_perm_0 = const()[name = string("embed_7_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_671 = const()[name = string("op_671"), val = tensor([1, 2, 64, 1])]; tensor var_672_cast_fp16 = reshape(shape = var_671, x = value_states_5_cast_fp16)[name = string("op_672_cast_fp16")]; tensor value_states_7_perm_0 = const()[name = string("value_states_7_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_676_cast_fp16 = mul(x = embed_5_cast_fp16, y = cos_1_cast_fp16)[name = string("op_676_cast_fp16")]; tensor var_677_split_sizes_0 = const()[name = string("op_677_split_sizes_0"), val = tensor([32, 32])]; int32 var_677_axis_0 = const()[name = string("op_677_axis_0"), val = int32(-2)]; tensor var_677_cast_fp16_0, tensor var_677_cast_fp16_1 = split(axis = var_677_axis_0, split_sizes = var_677_split_sizes_0, x = embed_5_cast_fp16)[name = string("op_677_cast_fp16")]; fp16 const_17_promoted_to_fp16 = const()[name = string("const_17_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_679_cast_fp16 = mul(x = var_677_cast_fp16_1, y = const_17_promoted_to_fp16)[name = string("op_679_cast_fp16")]; int32 var_681 = const()[name = string("op_681"), val = int32(-2)]; bool var_682_interleave_0 = const()[name = string("op_682_interleave_0"), val = bool(false)]; tensor var_682_cast_fp16 = concat(axis = var_681, interleave = var_682_interleave_0, values = (var_679_cast_fp16, var_677_cast_fp16_0))[name = string("op_682_cast_fp16")]; tensor var_683_cast_fp16 = mul(x = var_682_cast_fp16, y = sin_1_cast_fp16)[name = string("op_683_cast_fp16")]; tensor query_states_7_cast_fp16 = add(x = var_676_cast_fp16, y = var_683_cast_fp16)[name = string("query_states_7_cast_fp16")]; tensor embed_7_cast_fp16 = transpose(perm = embed_7_perm_0, x = var_665_cast_fp16)[name = string("transpose_68")]; tensor var_686_cast_fp16 = mul(x = embed_7_cast_fp16, y = cos_cast_fp16)[name = string("op_686_cast_fp16")]; tensor var_687_split_sizes_0 = const()[name = string("op_687_split_sizes_0"), val = tensor([32, 32])]; int32 var_687_axis_0 = const()[name = string("op_687_axis_0"), val = int32(-1)]; tensor var_687_cast_fp16_0, tensor var_687_cast_fp16_1 = split(axis = var_687_axis_0, split_sizes = var_687_split_sizes_0, x = embed_7_cast_fp16)[name = string("op_687_cast_fp16")]; fp16 const_18_promoted_to_fp16 = const()[name = string("const_18_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_689_cast_fp16 = mul(x = var_687_cast_fp16_1, y = const_18_promoted_to_fp16)[name = string("op_689_cast_fp16")]; int32 var_691 = const()[name = string("op_691"), val = int32(-1)]; bool var_692_interleave_0 = const()[name = string("op_692_interleave_0"), val = bool(false)]; tensor var_692_cast_fp16 = concat(axis = var_691, interleave = var_692_interleave_0, values = (var_689_cast_fp16, var_687_cast_fp16_0))[name = string("op_692_cast_fp16")]; tensor var_693_cast_fp16 = mul(x = var_692_cast_fp16, y = sin_cast_fp16)[name = string("op_693_cast_fp16")]; tensor key_states_7_cast_fp16 = add(x = var_686_cast_fp16, y = var_693_cast_fp16)[name = string("key_states_7_cast_fp16")]; tensor expand_dims_11 = const()[name = string("expand_dims_11"), val = tensor([1])]; tensor expand_dims_12 = const()[name = string("expand_dims_12"), val = tensor([0])]; tensor expand_dims_14 = const()[name = string("expand_dims_14"), val = tensor([2])]; tensor concat_11_values3_0 = const()[name = string("concat_11_values3_0"), val = tensor([0])]; int32 concat_11_axis_0 = const()[name = string("concat_11_axis_0"), val = int32(0)]; bool concat_11_interleave_0 = const()[name = string("concat_11_interleave_0"), val = bool(false)]; tensor concat_11 = concat(axis = concat_11_axis_0, interleave = concat_11_interleave_0, values = (expand_dims_11, expand_dims_12, position_id, concat_11_values3_0))[name = string("concat_11")]; tensor concat_12_values1_0 = const()[name = string("concat_12_values1_0"), val = tensor([0])]; tensor concat_12_values3_0 = const()[name = string("concat_12_values3_0"), val = tensor([0])]; int32 concat_12_axis_0 = const()[name = string("concat_12_axis_0"), val = int32(0)]; bool concat_12_interleave_0 = const()[name = string("concat_12_interleave_0"), val = bool(false)]; tensor concat_12 = concat(axis = concat_12_axis_0, interleave = concat_12_interleave_0, values = (expand_dims_14, concat_12_values1_0, var_426, concat_12_values3_0))[name = string("concat_12")]; tensor key_cache_internal_tensor_assign_2_stride_0 = const()[name = string("key_cache_internal_tensor_assign_2_stride_0"), val = tensor([1, 1, 1, 1])]; tensor key_cache_internal_tensor_assign_2_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_2_begin_mask_0"), val = tensor([false, false, false, true])]; tensor key_cache_internal_tensor_assign_2_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_2_end_mask_0"), val = tensor([false, true, false, true])]; tensor key_cache_internal_tensor_assign_2_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_2_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor key_cache_internal_tensor_assign_2_cast_fp16 = slice_update(begin = concat_11, begin_mask = key_cache_internal_tensor_assign_2_begin_mask_0, end = concat_12, end_mask = key_cache_internal_tensor_assign_2_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_2_squeeze_mask_0, stride = key_cache_internal_tensor_assign_2_stride_0, update = key_states_7_cast_fp16, x = coreml_update_state_48)[name = string("key_cache_internal_tensor_assign_2_cast_fp16")]; write_state(data = key_cache_internal_tensor_assign_2_cast_fp16, input = key_cache)[name = string("coreml_update_state_168_write_state")]; tensor coreml_update_state_50 = read_state(input = key_cache)[name = string("coreml_update_state_168")]; tensor value_cache_internal_tensor_assign_2_stride_0 = const()[name = string("value_cache_internal_tensor_assign_2_stride_0"), val = tensor([1, 1, 1, 1])]; tensor value_cache_internal_tensor_assign_2_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_2_begin_mask_0"), val = tensor([false, false, false, true])]; tensor value_cache_internal_tensor_assign_2_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_2_end_mask_0"), val = tensor([false, true, false, true])]; tensor value_cache_internal_tensor_assign_2_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_2_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor value_states_7_cast_fp16 = transpose(perm = value_states_7_perm_0, x = var_672_cast_fp16)[name = string("transpose_67")]; tensor value_cache_internal_tensor_assign_2_cast_fp16 = slice_update(begin = concat_11, begin_mask = value_cache_internal_tensor_assign_2_begin_mask_0, end = concat_12, end_mask = value_cache_internal_tensor_assign_2_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_2_squeeze_mask_0, stride = value_cache_internal_tensor_assign_2_stride_0, update = value_states_7_cast_fp16, x = coreml_update_state_49)[name = string("value_cache_internal_tensor_assign_2_cast_fp16")]; write_state(data = value_cache_internal_tensor_assign_2_cast_fp16, input = value_cache)[name = string("coreml_update_state_169_write_state")]; tensor coreml_update_state_51 = read_state(input = value_cache)[name = string("coreml_update_state_169")]; tensor var_736_begin_0 = const()[name = string("op_736_begin_0"), val = tensor([1, 0, 0, 0])]; tensor var_736_end_0 = const()[name = string("op_736_end_0"), val = tensor([2, 2, 2048, 64])]; tensor var_736_end_mask_0 = const()[name = string("op_736_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_736_cast_fp16 = slice_by_index(begin = var_736_begin_0, end = var_736_end_0, end_mask = var_736_end_mask_0, x = coreml_update_state_50)[name = string("op_736_cast_fp16")]; tensor tile_2 = const()[name = string("tile_2"), val = tensor([1, 1])]; int32 var_739_axis_0 = const()[name = string("op_739_axis_0"), val = int32(1)]; tensor var_739_cast_fp16_0, tensor var_739_cast_fp16_1 = split(axis = var_739_axis_0, split_sizes = tile_2, x = var_736_cast_fp16)[name = string("op_739_cast_fp16")]; tensor var_746_begin_0 = const()[name = string("op_746_begin_0"), val = tensor([1, 0, 0, 0])]; tensor var_746_end_0 = const()[name = string("op_746_end_0"), val = tensor([2, 2, 2048, 64])]; tensor var_746_end_mask_0 = const()[name = string("op_746_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_746_cast_fp16 = slice_by_index(begin = var_746_begin_0, end = var_746_end_0, end_mask = var_746_end_mask_0, x = coreml_update_state_51)[name = string("op_746_cast_fp16")]; tensor tile_3 = const()[name = string("tile_3"), val = tensor([1, 1])]; int32 var_749_axis_0 = const()[name = string("op_749_axis_0"), val = int32(1)]; tensor var_749_cast_fp16_0, tensor var_749_cast_fp16_1 = split(axis = var_749_axis_0, split_sizes = tile_3, x = var_746_cast_fp16)[name = string("op_749_cast_fp16")]; tensor var_752_split_sizes_0 = const()[name = string("op_752_split_sizes_0"), val = tensor([8, 8])]; int32 var_752_axis_0 = const()[name = string("op_752_axis_0"), val = int32(1)]; tensor var_752_cast_fp16_0, tensor var_752_cast_fp16_1 = split(axis = var_752_axis_0, split_sizes = var_752_split_sizes_0, x = query_states_7_cast_fp16)[name = string("op_752_cast_fp16")]; bool attn_weights_17_transpose_x_0 = const()[name = string("attn_weights_17_transpose_x_0"), val = bool(false)]; bool attn_weights_17_transpose_y_0 = const()[name = string("attn_weights_17_transpose_y_0"), val = bool(false)]; tensor attn_weights_17_cast_fp16 = matmul(transpose_x = attn_weights_17_transpose_x_0, transpose_y = attn_weights_17_transpose_y_0, x = var_739_cast_fp16_0, y = var_752_cast_fp16_0)[name = string("attn_weights_17_cast_fp16")]; fp16 _inversed_attn_weights_19_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_19_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_19_cast_fp16 = mul(x = attn_weights_17_cast_fp16, y = _inversed_attn_weights_19_y_0_to_fp16)[name = string("_inversed_attn_weights_19_cast_fp16")]; tensor attn_weights_21_cast_fp16 = add(x = _inversed_attn_weights_19_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_21_cast_fp16")]; int32 var_759 = const()[name = string("op_759"), val = int32(2)]; tensor attn_weights_23_cast_fp16 = softmax(axis = var_759, x = attn_weights_21_cast_fp16)[name = string("attn_weights_23_cast_fp16")]; bool var_765_transpose_x_1 = const()[name = string("op_765_transpose_x_1"), val = bool(true)]; bool var_765_transpose_y_1 = const()[name = string("op_765_transpose_y_1"), val = bool(false)]; tensor var_765_cast_fp16 = matmul(transpose_x = var_765_transpose_x_1, transpose_y = var_765_transpose_y_1, x = attn_weights_23_cast_fp16, y = var_749_cast_fp16_0)[name = string("op_765_cast_fp16")]; bool attn_weights_25_transpose_x_0 = const()[name = string("attn_weights_25_transpose_x_0"), val = bool(false)]; bool attn_weights_25_transpose_y_0 = const()[name = string("attn_weights_25_transpose_y_0"), val = bool(false)]; tensor attn_weights_25_cast_fp16 = matmul(transpose_x = attn_weights_25_transpose_x_0, transpose_y = attn_weights_25_transpose_y_0, x = var_739_cast_fp16_1, y = var_752_cast_fp16_1)[name = string("attn_weights_25_cast_fp16")]; fp16 _inversed_attn_weights_27_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_27_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_27_cast_fp16 = mul(x = attn_weights_25_cast_fp16, y = _inversed_attn_weights_27_y_0_to_fp16)[name = string("_inversed_attn_weights_27_cast_fp16")]; tensor attn_weights_29_cast_fp16 = add(x = _inversed_attn_weights_27_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_29_cast_fp16")]; int32 var_771 = const()[name = string("op_771"), val = int32(2)]; tensor attn_weights_31_cast_fp16 = softmax(axis = var_771, x = attn_weights_29_cast_fp16)[name = string("attn_weights_31_cast_fp16")]; bool attn_output_7_transpose_x_1 = const()[name = string("attn_output_7_transpose_x_1"), val = bool(true)]; bool attn_output_7_transpose_y_1 = const()[name = string("attn_output_7_transpose_y_1"), val = bool(false)]; tensor attn_output_7_cast_fp16 = matmul(transpose_x = attn_output_7_transpose_x_1, transpose_y = attn_output_7_transpose_y_1, x = attn_weights_31_cast_fp16, y = var_749_cast_fp16_1)[name = string("attn_output_7_cast_fp16")]; int32 var_779 = const()[name = string("op_779"), val = int32(1)]; bool attn_output_9_interleave_0 = const()[name = string("attn_output_9_interleave_0"), val = bool(false)]; tensor attn_output_9_cast_fp16 = concat(axis = var_779, interleave = attn_output_9_interleave_0, values = (var_765_cast_fp16, attn_output_7_cast_fp16))[name = string("attn_output_9_cast_fp16")]; tensor var_783_perm_0 = const()[name = string("op_783_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_788 = const()[name = string("op_788"), val = tensor([1, 1024, 1, 1])]; tensor var_783_cast_fp16 = transpose(perm = var_783_perm_0, x = attn_output_9_cast_fp16)[name = string("transpose_66")]; tensor x_23_cast_fp16 = reshape(shape = var_788, x = var_783_cast_fp16)[name = string("x_23_cast_fp16")]; string hidden_states_9_pad_type_0 = const()[name = string("hidden_states_9_pad_type_0"), val = string("valid")]; tensor hidden_states_9_strides_0 = const()[name = string("hidden_states_9_strides_0"), val = tensor([1, 1])]; tensor hidden_states_9_pad_0 = const()[name = string("hidden_states_9_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_9_dilations_0 = const()[name = string("hidden_states_9_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_9_groups_0 = const()[name = string("hidden_states_9_groups_0"), val = int32(1)]; tensor var_795_to_fp16 = const()[name = string("op_795_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(40920192)))]; tensor hidden_states_9_cast_fp16 = conv(dilations = hidden_states_9_dilations_0, groups = hidden_states_9_groups_0, pad = hidden_states_9_pad_0, pad_type = hidden_states_9_pad_type_0, strides = hidden_states_9_strides_0, weight = var_795_to_fp16, x = x_23_cast_fp16)[name = string("hidden_states_9_cast_fp16")]; tensor x_25_cast_fp16 = add(x = x_17_cast_fp16, y = hidden_states_9_cast_fp16)[name = string("x_25_cast_fp16")]; int32 var_807 = const()[name = string("op_807"), val = int32(1)]; fp16 const_23_promoted_to_fp16 = const()[name = string("const_23_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_810_cast_fp16 = mul(x = x_25_cast_fp16, y = const_23_promoted_to_fp16)[name = string("op_810_cast_fp16")]; bool x_27_interleave_0 = const()[name = string("x_27_interleave_0"), val = bool(false)]; tensor x_27_cast_fp16 = concat(axis = var_807, interleave = x_27_interleave_0, values = (x_25_cast_fp16, var_810_cast_fp16))[name = string("x_27_cast_fp16")]; tensor out_19_axes_0 = const()[name = string("out_19_axes_0"), val = tensor([1])]; fp16 var_820_to_fp16 = const()[name = string("op_820_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_19_cast_fp16 = layer_norm(axes = out_19_axes_0, epsilon = var_820_to_fp16, x = x_27_cast_fp16)[name = string("out_19_cast_fp16")]; tensor layer_layers_1_post_attention_layernorm_weight_to_fp16 = const()[name = string("layer_layers_1_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(43017408)))]; tensor out_21_cast_fp16 = mul(x = out_19_cast_fp16, y = layer_layers_1_post_attention_layernorm_weight_to_fp16)[name = string("out_21_cast_fp16")]; tensor var_826_split_sizes_0 = const()[name = string("op_826_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_826_axis_0 = const()[name = string("op_826_axis_0"), val = int32(1)]; tensor var_826_cast_fp16_0, tensor var_826_cast_fp16_1 = split(axis = var_826_axis_0, split_sizes = var_826_split_sizes_0, x = out_21_cast_fp16)[name = string("op_826_cast_fp16")]; string input_3_pad_type_0 = const()[name = string("input_3_pad_type_0"), val = string("valid")]; tensor input_3_strides_0 = const()[name = string("input_3_strides_0"), val = tensor([1, 1])]; tensor input_3_pad_0 = const()[name = string("input_3_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_3_dilations_0 = const()[name = string("input_3_dilations_0"), val = tensor([1, 1])]; int32 input_3_groups_0 = const()[name = string("input_3_groups_0"), val = int32(1)]; tensor var_831_to_fp16 = const()[name = string("op_831_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(43021568)))]; tensor input_3_cast_fp16 = conv(dilations = input_3_dilations_0, groups = input_3_groups_0, pad = input_3_pad_0, pad_type = input_3_pad_type_0, strides = input_3_strides_0, weight = var_831_to_fp16, x = var_826_cast_fp16_0)[name = string("input_3_cast_fp16")]; tensor var_842_cast_fp16 = silu(x = input_3_cast_fp16)[name = string("op_842_cast_fp16")]; string var_847_pad_type_0 = const()[name = string("op_847_pad_type_0"), val = string("valid")]; tensor var_847_strides_0 = const()[name = string("op_847_strides_0"), val = tensor([1, 1])]; tensor var_847_pad_0 = const()[name = string("op_847_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_847_dilations_0 = const()[name = string("op_847_dilations_0"), val = tensor([1, 1])]; int32 var_847_groups_0 = const()[name = string("op_847_groups_0"), val = int32(1)]; tensor var_830_to_fp16 = const()[name = string("op_830_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(51410240)))]; tensor var_847_cast_fp16 = conv(dilations = var_847_dilations_0, groups = var_847_groups_0, pad = var_847_pad_0, pad_type = var_847_pad_type_0, strides = var_847_strides_0, weight = var_830_to_fp16, x = var_826_cast_fp16_0)[name = string("op_847_cast_fp16")]; tensor x_33_cast_fp16 = mul(x = var_842_cast_fp16, y = var_847_cast_fp16)[name = string("x_33_cast_fp16")]; string hidden_states_11_pad_type_0 = const()[name = string("hidden_states_11_pad_type_0"), val = string("valid")]; tensor hidden_states_11_strides_0 = const()[name = string("hidden_states_11_strides_0"), val = tensor([1, 1])]; tensor hidden_states_11_pad_0 = const()[name = string("hidden_states_11_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_11_dilations_0 = const()[name = string("hidden_states_11_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_11_groups_0 = const()[name = string("hidden_states_11_groups_0"), val = int32(1)]; tensor var_829_to_fp16 = const()[name = string("op_829_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(59798912)))]; tensor hidden_states_11_cast_fp16 = conv(dilations = hidden_states_11_dilations_0, groups = hidden_states_11_groups_0, pad = hidden_states_11_pad_0, pad_type = hidden_states_11_pad_type_0, strides = hidden_states_11_strides_0, weight = var_829_to_fp16, x = x_33_cast_fp16)[name = string("hidden_states_11_cast_fp16")]; tensor x_35_cast_fp16 = add(x = x_25_cast_fp16, y = hidden_states_11_cast_fp16)[name = string("x_35_cast_fp16")]; int32 var_860 = const()[name = string("op_860"), val = int32(1)]; fp16 const_24_promoted_to_fp16 = const()[name = string("const_24_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_863_cast_fp16 = mul(x = x_35_cast_fp16, y = const_24_promoted_to_fp16)[name = string("op_863_cast_fp16")]; bool x_37_interleave_0 = const()[name = string("x_37_interleave_0"), val = bool(false)]; tensor x_37_cast_fp16 = concat(axis = var_860, interleave = x_37_interleave_0, values = (x_35_cast_fp16, var_863_cast_fp16))[name = string("x_37_cast_fp16")]; tensor out_25_axes_0 = const()[name = string("out_25_axes_0"), val = tensor([1])]; fp16 var_873_to_fp16 = const()[name = string("op_873_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_25_cast_fp16 = layer_norm(axes = out_25_axes_0, epsilon = var_873_to_fp16, x = x_37_cast_fp16)[name = string("out_25_cast_fp16")]; tensor layer_layers_2_input_layernorm_weight_to_fp16 = const()[name = string("layer_layers_2_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(68187584)))]; tensor out_27_cast_fp16 = mul(x = out_25_cast_fp16, y = layer_layers_2_input_layernorm_weight_to_fp16)[name = string("out_27_cast_fp16")]; tensor var_879_split_sizes_0 = const()[name = string("op_879_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_879_axis_0 = const()[name = string("op_879_axis_0"), val = int32(1)]; tensor var_879_cast_fp16_0, tensor var_879_cast_fp16_1 = split(axis = var_879_axis_0, split_sizes = var_879_split_sizes_0, x = out_27_cast_fp16)[name = string("op_879_cast_fp16")]; string query_states_9_pad_type_0 = const()[name = string("query_states_9_pad_type_0"), val = string("valid")]; tensor query_states_9_strides_0 = const()[name = string("query_states_9_strides_0"), val = tensor([1, 1])]; tensor query_states_9_pad_0 = const()[name = string("query_states_9_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_states_9_dilations_0 = const()[name = string("query_states_9_dilations_0"), val = tensor([1, 1])]; int32 query_states_9_groups_0 = const()[name = string("query_states_9_groups_0"), val = int32(1)]; tensor var_901_to_fp16 = const()[name = string("op_901_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(68191744)))]; tensor query_states_9_cast_fp16 = conv(dilations = query_states_9_dilations_0, groups = query_states_9_groups_0, pad = query_states_9_pad_0, pad_type = query_states_9_pad_type_0, strides = query_states_9_strides_0, weight = var_901_to_fp16, x = var_879_cast_fp16_0)[name = string("query_states_9_cast_fp16")]; string key_states_9_pad_type_0 = const()[name = string("key_states_9_pad_type_0"), val = string("valid")]; tensor key_states_9_strides_0 = const()[name = string("key_states_9_strides_0"), val = tensor([1, 1])]; tensor key_states_9_pad_0 = const()[name = string("key_states_9_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_states_9_dilations_0 = const()[name = string("key_states_9_dilations_0"), val = tensor([1, 1])]; int32 key_states_9_groups_0 = const()[name = string("key_states_9_groups_0"), val = int32(1)]; tensor var_912_to_fp16 = const()[name = string("op_912_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(70288960)))]; tensor key_states_9_cast_fp16 = conv(dilations = key_states_9_dilations_0, groups = key_states_9_groups_0, pad = key_states_9_pad_0, pad_type = key_states_9_pad_type_0, strides = key_states_9_strides_0, weight = var_912_to_fp16, x = var_879_cast_fp16_0)[name = string("key_states_9_cast_fp16")]; string value_states_9_pad_type_0 = const()[name = string("value_states_9_pad_type_0"), val = string("valid")]; tensor value_states_9_strides_0 = const()[name = string("value_states_9_strides_0"), val = tensor([1, 1])]; tensor value_states_9_pad_0 = const()[name = string("value_states_9_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_states_9_dilations_0 = const()[name = string("value_states_9_dilations_0"), val = tensor([1, 1])]; int32 value_states_9_groups_0 = const()[name = string("value_states_9_groups_0"), val = int32(1)]; tensor var_923_to_fp16 = const()[name = string("op_923_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(70551168)))]; tensor value_states_9_cast_fp16 = conv(dilations = value_states_9_dilations_0, groups = value_states_9_groups_0, pad = value_states_9_pad_0, pad_type = value_states_9_pad_type_0, strides = value_states_9_strides_0, weight = var_923_to_fp16, x = var_879_cast_fp16_0)[name = string("value_states_9_cast_fp16")]; tensor var_931 = const()[name = string("op_931"), val = tensor([1, 16, 64, 1])]; tensor embed_9_cast_fp16 = reshape(shape = var_931, x = query_states_9_cast_fp16)[name = string("embed_9_cast_fp16")]; tensor var_935 = const()[name = string("op_935"), val = tensor([1, 2, 64, 1])]; tensor var_936_cast_fp16 = reshape(shape = var_935, x = key_states_9_cast_fp16)[name = string("op_936_cast_fp16")]; tensor embed_11_perm_0 = const()[name = string("embed_11_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_942 = const()[name = string("op_942"), val = tensor([1, 2, 64, 1])]; tensor var_943_cast_fp16 = reshape(shape = var_942, x = value_states_9_cast_fp16)[name = string("op_943_cast_fp16")]; tensor value_states_11_perm_0 = const()[name = string("value_states_11_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_947_cast_fp16 = mul(x = embed_9_cast_fp16, y = cos_1_cast_fp16)[name = string("op_947_cast_fp16")]; tensor var_948_split_sizes_0 = const()[name = string("op_948_split_sizes_0"), val = tensor([32, 32])]; int32 var_948_axis_0 = const()[name = string("op_948_axis_0"), val = int32(-2)]; tensor var_948_cast_fp16_0, tensor var_948_cast_fp16_1 = split(axis = var_948_axis_0, split_sizes = var_948_split_sizes_0, x = embed_9_cast_fp16)[name = string("op_948_cast_fp16")]; fp16 const_27_promoted_to_fp16 = const()[name = string("const_27_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_950_cast_fp16 = mul(x = var_948_cast_fp16_1, y = const_27_promoted_to_fp16)[name = string("op_950_cast_fp16")]; int32 var_952 = const()[name = string("op_952"), val = int32(-2)]; bool var_953_interleave_0 = const()[name = string("op_953_interleave_0"), val = bool(false)]; tensor var_953_cast_fp16 = concat(axis = var_952, interleave = var_953_interleave_0, values = (var_950_cast_fp16, var_948_cast_fp16_0))[name = string("op_953_cast_fp16")]; tensor var_954_cast_fp16 = mul(x = var_953_cast_fp16, y = sin_1_cast_fp16)[name = string("op_954_cast_fp16")]; tensor query_states_11_cast_fp16 = add(x = var_947_cast_fp16, y = var_954_cast_fp16)[name = string("query_states_11_cast_fp16")]; tensor embed_11_cast_fp16 = transpose(perm = embed_11_perm_0, x = var_936_cast_fp16)[name = string("transpose_65")]; tensor var_957_cast_fp16 = mul(x = embed_11_cast_fp16, y = cos_cast_fp16)[name = string("op_957_cast_fp16")]; tensor var_958_split_sizes_0 = const()[name = string("op_958_split_sizes_0"), val = tensor([32, 32])]; int32 var_958_axis_0 = const()[name = string("op_958_axis_0"), val = int32(-1)]; tensor var_958_cast_fp16_0, tensor var_958_cast_fp16_1 = split(axis = var_958_axis_0, split_sizes = var_958_split_sizes_0, x = embed_11_cast_fp16)[name = string("op_958_cast_fp16")]; fp16 const_28_promoted_to_fp16 = const()[name = string("const_28_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_960_cast_fp16 = mul(x = var_958_cast_fp16_1, y = const_28_promoted_to_fp16)[name = string("op_960_cast_fp16")]; int32 var_962 = const()[name = string("op_962"), val = int32(-1)]; bool var_963_interleave_0 = const()[name = string("op_963_interleave_0"), val = bool(false)]; tensor var_963_cast_fp16 = concat(axis = var_962, interleave = var_963_interleave_0, values = (var_960_cast_fp16, var_958_cast_fp16_0))[name = string("op_963_cast_fp16")]; tensor var_964_cast_fp16 = mul(x = var_963_cast_fp16, y = sin_cast_fp16)[name = string("op_964_cast_fp16")]; tensor key_states_11_cast_fp16 = add(x = var_957_cast_fp16, y = var_964_cast_fp16)[name = string("key_states_11_cast_fp16")]; tensor expand_dims_21 = const()[name = string("expand_dims_21"), val = tensor([2])]; tensor expand_dims_22 = const()[name = string("expand_dims_22"), val = tensor([0])]; tensor expand_dims_24 = const()[name = string("expand_dims_24"), val = tensor([3])]; tensor concat_19_values3_0 = const()[name = string("concat_19_values3_0"), val = tensor([0])]; int32 concat_19_axis_0 = const()[name = string("concat_19_axis_0"), val = int32(0)]; bool concat_19_interleave_0 = const()[name = string("concat_19_interleave_0"), val = bool(false)]; tensor concat_19 = concat(axis = concat_19_axis_0, interleave = concat_19_interleave_0, values = (expand_dims_21, expand_dims_22, position_id, concat_19_values3_0))[name = string("concat_19")]; tensor concat_20_values1_0 = const()[name = string("concat_20_values1_0"), val = tensor([0])]; tensor concat_20_values3_0 = const()[name = string("concat_20_values3_0"), val = tensor([0])]; int32 concat_20_axis_0 = const()[name = string("concat_20_axis_0"), val = int32(0)]; bool concat_20_interleave_0 = const()[name = string("concat_20_interleave_0"), val = bool(false)]; tensor concat_20 = concat(axis = concat_20_axis_0, interleave = concat_20_interleave_0, values = (expand_dims_24, concat_20_values1_0, var_426, concat_20_values3_0))[name = string("concat_20")]; tensor key_cache_internal_tensor_assign_3_stride_0 = const()[name = string("key_cache_internal_tensor_assign_3_stride_0"), val = tensor([1, 1, 1, 1])]; tensor key_cache_internal_tensor_assign_3_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_3_begin_mask_0"), val = tensor([false, false, false, true])]; tensor key_cache_internal_tensor_assign_3_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_3_end_mask_0"), val = tensor([false, true, false, true])]; tensor key_cache_internal_tensor_assign_3_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_3_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor key_cache_internal_tensor_assign_3_cast_fp16 = slice_update(begin = concat_19, begin_mask = key_cache_internal_tensor_assign_3_begin_mask_0, end = concat_20, end_mask = key_cache_internal_tensor_assign_3_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_3_squeeze_mask_0, stride = key_cache_internal_tensor_assign_3_stride_0, update = key_states_11_cast_fp16, x = coreml_update_state_50)[name = string("key_cache_internal_tensor_assign_3_cast_fp16")]; write_state(data = key_cache_internal_tensor_assign_3_cast_fp16, input = key_cache)[name = string("coreml_update_state_170_write_state")]; tensor coreml_update_state_52 = read_state(input = key_cache)[name = string("coreml_update_state_170")]; tensor value_cache_internal_tensor_assign_3_stride_0 = const()[name = string("value_cache_internal_tensor_assign_3_stride_0"), val = tensor([1, 1, 1, 1])]; tensor value_cache_internal_tensor_assign_3_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_3_begin_mask_0"), val = tensor([false, false, false, true])]; tensor value_cache_internal_tensor_assign_3_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_3_end_mask_0"), val = tensor([false, true, false, true])]; tensor value_cache_internal_tensor_assign_3_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_3_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor value_states_11_cast_fp16 = transpose(perm = value_states_11_perm_0, x = var_943_cast_fp16)[name = string("transpose_64")]; tensor value_cache_internal_tensor_assign_3_cast_fp16 = slice_update(begin = concat_19, begin_mask = value_cache_internal_tensor_assign_3_begin_mask_0, end = concat_20, end_mask = value_cache_internal_tensor_assign_3_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_3_squeeze_mask_0, stride = value_cache_internal_tensor_assign_3_stride_0, update = value_states_11_cast_fp16, x = coreml_update_state_51)[name = string("value_cache_internal_tensor_assign_3_cast_fp16")]; write_state(data = value_cache_internal_tensor_assign_3_cast_fp16, input = value_cache)[name = string("coreml_update_state_171_write_state")]; tensor coreml_update_state_53 = read_state(input = value_cache)[name = string("coreml_update_state_171")]; tensor var_1007_begin_0 = const()[name = string("op_1007_begin_0"), val = tensor([2, 0, 0, 0])]; tensor var_1007_end_0 = const()[name = string("op_1007_end_0"), val = tensor([3, 2, 2048, 64])]; tensor var_1007_end_mask_0 = const()[name = string("op_1007_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_1007_cast_fp16 = slice_by_index(begin = var_1007_begin_0, end = var_1007_end_0, end_mask = var_1007_end_mask_0, x = coreml_update_state_52)[name = string("op_1007_cast_fp16")]; tensor tile_4 = const()[name = string("tile_4"), val = tensor([1, 1])]; int32 var_1010_axis_0 = const()[name = string("op_1010_axis_0"), val = int32(1)]; tensor var_1010_cast_fp16_0, tensor var_1010_cast_fp16_1 = split(axis = var_1010_axis_0, split_sizes = tile_4, x = var_1007_cast_fp16)[name = string("op_1010_cast_fp16")]; tensor var_1017_begin_0 = const()[name = string("op_1017_begin_0"), val = tensor([2, 0, 0, 0])]; tensor var_1017_end_0 = const()[name = string("op_1017_end_0"), val = tensor([3, 2, 2048, 64])]; tensor var_1017_end_mask_0 = const()[name = string("op_1017_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_1017_cast_fp16 = slice_by_index(begin = var_1017_begin_0, end = var_1017_end_0, end_mask = var_1017_end_mask_0, x = coreml_update_state_53)[name = string("op_1017_cast_fp16")]; tensor tile_5 = const()[name = string("tile_5"), val = tensor([1, 1])]; int32 var_1020_axis_0 = const()[name = string("op_1020_axis_0"), val = int32(1)]; tensor var_1020_cast_fp16_0, tensor var_1020_cast_fp16_1 = split(axis = var_1020_axis_0, split_sizes = tile_5, x = var_1017_cast_fp16)[name = string("op_1020_cast_fp16")]; tensor var_1023_split_sizes_0 = const()[name = string("op_1023_split_sizes_0"), val = tensor([8, 8])]; int32 var_1023_axis_0 = const()[name = string("op_1023_axis_0"), val = int32(1)]; tensor var_1023_cast_fp16_0, tensor var_1023_cast_fp16_1 = split(axis = var_1023_axis_0, split_sizes = var_1023_split_sizes_0, x = query_states_11_cast_fp16)[name = string("op_1023_cast_fp16")]; bool attn_weights_33_transpose_x_0 = const()[name = string("attn_weights_33_transpose_x_0"), val = bool(false)]; bool attn_weights_33_transpose_y_0 = const()[name = string("attn_weights_33_transpose_y_0"), val = bool(false)]; tensor attn_weights_33_cast_fp16 = matmul(transpose_x = attn_weights_33_transpose_x_0, transpose_y = attn_weights_33_transpose_y_0, x = var_1010_cast_fp16_0, y = var_1023_cast_fp16_0)[name = string("attn_weights_33_cast_fp16")]; fp16 _inversed_attn_weights_35_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_35_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_35_cast_fp16 = mul(x = attn_weights_33_cast_fp16, y = _inversed_attn_weights_35_y_0_to_fp16)[name = string("_inversed_attn_weights_35_cast_fp16")]; tensor attn_weights_37_cast_fp16 = add(x = _inversed_attn_weights_35_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_37_cast_fp16")]; int32 var_1030 = const()[name = string("op_1030"), val = int32(2)]; tensor attn_weights_39_cast_fp16 = softmax(axis = var_1030, x = attn_weights_37_cast_fp16)[name = string("attn_weights_39_cast_fp16")]; bool var_1036_transpose_x_1 = const()[name = string("op_1036_transpose_x_1"), val = bool(true)]; bool var_1036_transpose_y_1 = const()[name = string("op_1036_transpose_y_1"), val = bool(false)]; tensor var_1036_cast_fp16 = matmul(transpose_x = var_1036_transpose_x_1, transpose_y = var_1036_transpose_y_1, x = attn_weights_39_cast_fp16, y = var_1020_cast_fp16_0)[name = string("op_1036_cast_fp16")]; bool attn_weights_41_transpose_x_0 = const()[name = string("attn_weights_41_transpose_x_0"), val = bool(false)]; bool attn_weights_41_transpose_y_0 = const()[name = string("attn_weights_41_transpose_y_0"), val = bool(false)]; tensor attn_weights_41_cast_fp16 = matmul(transpose_x = attn_weights_41_transpose_x_0, transpose_y = attn_weights_41_transpose_y_0, x = var_1010_cast_fp16_1, y = var_1023_cast_fp16_1)[name = string("attn_weights_41_cast_fp16")]; fp16 _inversed_attn_weights_43_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_43_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_43_cast_fp16 = mul(x = attn_weights_41_cast_fp16, y = _inversed_attn_weights_43_y_0_to_fp16)[name = string("_inversed_attn_weights_43_cast_fp16")]; tensor attn_weights_45_cast_fp16 = add(x = _inversed_attn_weights_43_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_45_cast_fp16")]; int32 var_1042 = const()[name = string("op_1042"), val = int32(2)]; tensor attn_weights_47_cast_fp16 = softmax(axis = var_1042, x = attn_weights_45_cast_fp16)[name = string("attn_weights_47_cast_fp16")]; bool attn_output_13_transpose_x_1 = const()[name = string("attn_output_13_transpose_x_1"), val = bool(true)]; bool attn_output_13_transpose_y_1 = const()[name = string("attn_output_13_transpose_y_1"), val = bool(false)]; tensor attn_output_13_cast_fp16 = matmul(transpose_x = attn_output_13_transpose_x_1, transpose_y = attn_output_13_transpose_y_1, x = attn_weights_47_cast_fp16, y = var_1020_cast_fp16_1)[name = string("attn_output_13_cast_fp16")]; int32 var_1050 = const()[name = string("op_1050"), val = int32(1)]; bool attn_output_15_interleave_0 = const()[name = string("attn_output_15_interleave_0"), val = bool(false)]; tensor attn_output_15_cast_fp16 = concat(axis = var_1050, interleave = attn_output_15_interleave_0, values = (var_1036_cast_fp16, attn_output_13_cast_fp16))[name = string("attn_output_15_cast_fp16")]; tensor var_1054_perm_0 = const()[name = string("op_1054_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_1059 = const()[name = string("op_1059"), val = tensor([1, 1024, 1, 1])]; tensor var_1054_cast_fp16 = transpose(perm = var_1054_perm_0, x = attn_output_15_cast_fp16)[name = string("transpose_63")]; tensor x_41_cast_fp16 = reshape(shape = var_1059, x = var_1054_cast_fp16)[name = string("x_41_cast_fp16")]; string hidden_states_15_pad_type_0 = const()[name = string("hidden_states_15_pad_type_0"), val = string("valid")]; tensor hidden_states_15_strides_0 = const()[name = string("hidden_states_15_strides_0"), val = tensor([1, 1])]; tensor hidden_states_15_pad_0 = const()[name = string("hidden_states_15_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_15_dilations_0 = const()[name = string("hidden_states_15_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_15_groups_0 = const()[name = string("hidden_states_15_groups_0"), val = int32(1)]; tensor var_1066_to_fp16 = const()[name = string("op_1066_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(70813376)))]; tensor hidden_states_15_cast_fp16 = conv(dilations = hidden_states_15_dilations_0, groups = hidden_states_15_groups_0, pad = hidden_states_15_pad_0, pad_type = hidden_states_15_pad_type_0, strides = hidden_states_15_strides_0, weight = var_1066_to_fp16, x = x_41_cast_fp16)[name = string("hidden_states_15_cast_fp16")]; tensor x_43_cast_fp16 = add(x = x_35_cast_fp16, y = hidden_states_15_cast_fp16)[name = string("x_43_cast_fp16")]; int32 var_1078 = const()[name = string("op_1078"), val = int32(1)]; fp16 const_33_promoted_to_fp16 = const()[name = string("const_33_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_1081_cast_fp16 = mul(x = x_43_cast_fp16, y = const_33_promoted_to_fp16)[name = string("op_1081_cast_fp16")]; bool x_45_interleave_0 = const()[name = string("x_45_interleave_0"), val = bool(false)]; tensor x_45_cast_fp16 = concat(axis = var_1078, interleave = x_45_interleave_0, values = (x_43_cast_fp16, var_1081_cast_fp16))[name = string("x_45_cast_fp16")]; tensor out_31_axes_0 = const()[name = string("out_31_axes_0"), val = tensor([1])]; fp16 var_1091_to_fp16 = const()[name = string("op_1091_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_31_cast_fp16 = layer_norm(axes = out_31_axes_0, epsilon = var_1091_to_fp16, x = x_45_cast_fp16)[name = string("out_31_cast_fp16")]; tensor layer_layers_2_post_attention_layernorm_weight_to_fp16 = const()[name = string("layer_layers_2_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(72910592)))]; tensor out_33_cast_fp16 = mul(x = out_31_cast_fp16, y = layer_layers_2_post_attention_layernorm_weight_to_fp16)[name = string("out_33_cast_fp16")]; tensor var_1097_split_sizes_0 = const()[name = string("op_1097_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_1097_axis_0 = const()[name = string("op_1097_axis_0"), val = int32(1)]; tensor var_1097_cast_fp16_0, tensor var_1097_cast_fp16_1 = split(axis = var_1097_axis_0, split_sizes = var_1097_split_sizes_0, x = out_33_cast_fp16)[name = string("op_1097_cast_fp16")]; string input_5_pad_type_0 = const()[name = string("input_5_pad_type_0"), val = string("valid")]; tensor input_5_strides_0 = const()[name = string("input_5_strides_0"), val = tensor([1, 1])]; tensor input_5_pad_0 = const()[name = string("input_5_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_5_dilations_0 = const()[name = string("input_5_dilations_0"), val = tensor([1, 1])]; int32 input_5_groups_0 = const()[name = string("input_5_groups_0"), val = int32(1)]; tensor var_1102_to_fp16 = const()[name = string("op_1102_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(72914752)))]; tensor input_5_cast_fp16 = conv(dilations = input_5_dilations_0, groups = input_5_groups_0, pad = input_5_pad_0, pad_type = input_5_pad_type_0, strides = input_5_strides_0, weight = var_1102_to_fp16, x = var_1097_cast_fp16_0)[name = string("input_5_cast_fp16")]; tensor var_1113_cast_fp16 = silu(x = input_5_cast_fp16)[name = string("op_1113_cast_fp16")]; string var_1118_pad_type_0 = const()[name = string("op_1118_pad_type_0"), val = string("valid")]; tensor var_1118_strides_0 = const()[name = string("op_1118_strides_0"), val = tensor([1, 1])]; tensor var_1118_pad_0 = const()[name = string("op_1118_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_1118_dilations_0 = const()[name = string("op_1118_dilations_0"), val = tensor([1, 1])]; int32 var_1118_groups_0 = const()[name = string("op_1118_groups_0"), val = int32(1)]; tensor var_1101_to_fp16 = const()[name = string("op_1101_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(81303424)))]; tensor var_1118_cast_fp16 = conv(dilations = var_1118_dilations_0, groups = var_1118_groups_0, pad = var_1118_pad_0, pad_type = var_1118_pad_type_0, strides = var_1118_strides_0, weight = var_1101_to_fp16, x = var_1097_cast_fp16_0)[name = string("op_1118_cast_fp16")]; tensor x_51_cast_fp16 = mul(x = var_1113_cast_fp16, y = var_1118_cast_fp16)[name = string("x_51_cast_fp16")]; string hidden_states_17_pad_type_0 = const()[name = string("hidden_states_17_pad_type_0"), val = string("valid")]; tensor hidden_states_17_strides_0 = const()[name = string("hidden_states_17_strides_0"), val = tensor([1, 1])]; tensor hidden_states_17_pad_0 = const()[name = string("hidden_states_17_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_17_dilations_0 = const()[name = string("hidden_states_17_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_17_groups_0 = const()[name = string("hidden_states_17_groups_0"), val = int32(1)]; tensor var_1100_to_fp16 = const()[name = string("op_1100_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(89692096)))]; tensor hidden_states_17_cast_fp16 = conv(dilations = hidden_states_17_dilations_0, groups = hidden_states_17_groups_0, pad = hidden_states_17_pad_0, pad_type = hidden_states_17_pad_type_0, strides = hidden_states_17_strides_0, weight = var_1100_to_fp16, x = x_51_cast_fp16)[name = string("hidden_states_17_cast_fp16")]; tensor x_53_cast_fp16 = add(x = x_43_cast_fp16, y = hidden_states_17_cast_fp16)[name = string("x_53_cast_fp16")]; int32 var_1131 = const()[name = string("op_1131"), val = int32(1)]; fp16 const_34_promoted_to_fp16 = const()[name = string("const_34_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_1134_cast_fp16 = mul(x = x_53_cast_fp16, y = const_34_promoted_to_fp16)[name = string("op_1134_cast_fp16")]; bool x_55_interleave_0 = const()[name = string("x_55_interleave_0"), val = bool(false)]; tensor x_55_cast_fp16 = concat(axis = var_1131, interleave = x_55_interleave_0, values = (x_53_cast_fp16, var_1134_cast_fp16))[name = string("x_55_cast_fp16")]; tensor out_37_axes_0 = const()[name = string("out_37_axes_0"), val = tensor([1])]; fp16 var_1144_to_fp16 = const()[name = string("op_1144_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_37_cast_fp16 = layer_norm(axes = out_37_axes_0, epsilon = var_1144_to_fp16, x = x_55_cast_fp16)[name = string("out_37_cast_fp16")]; tensor layer_layers_3_input_layernorm_weight_to_fp16 = const()[name = string("layer_layers_3_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(98080768)))]; tensor out_39_cast_fp16 = mul(x = out_37_cast_fp16, y = layer_layers_3_input_layernorm_weight_to_fp16)[name = string("out_39_cast_fp16")]; tensor var_1150_split_sizes_0 = const()[name = string("op_1150_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_1150_axis_0 = const()[name = string("op_1150_axis_0"), val = int32(1)]; tensor var_1150_cast_fp16_0, tensor var_1150_cast_fp16_1 = split(axis = var_1150_axis_0, split_sizes = var_1150_split_sizes_0, x = out_39_cast_fp16)[name = string("op_1150_cast_fp16")]; string query_states_13_pad_type_0 = const()[name = string("query_states_13_pad_type_0"), val = string("valid")]; tensor query_states_13_strides_0 = const()[name = string("query_states_13_strides_0"), val = tensor([1, 1])]; tensor query_states_13_pad_0 = const()[name = string("query_states_13_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_states_13_dilations_0 = const()[name = string("query_states_13_dilations_0"), val = tensor([1, 1])]; int32 query_states_13_groups_0 = const()[name = string("query_states_13_groups_0"), val = int32(1)]; tensor var_1172_to_fp16 = const()[name = string("op_1172_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(98084928)))]; tensor query_states_13_cast_fp16 = conv(dilations = query_states_13_dilations_0, groups = query_states_13_groups_0, pad = query_states_13_pad_0, pad_type = query_states_13_pad_type_0, strides = query_states_13_strides_0, weight = var_1172_to_fp16, x = var_1150_cast_fp16_0)[name = string("query_states_13_cast_fp16")]; string key_states_13_pad_type_0 = const()[name = string("key_states_13_pad_type_0"), val = string("valid")]; tensor key_states_13_strides_0 = const()[name = string("key_states_13_strides_0"), val = tensor([1, 1])]; tensor key_states_13_pad_0 = const()[name = string("key_states_13_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_states_13_dilations_0 = const()[name = string("key_states_13_dilations_0"), val = tensor([1, 1])]; int32 key_states_13_groups_0 = const()[name = string("key_states_13_groups_0"), val = int32(1)]; tensor var_1183_to_fp16 = const()[name = string("op_1183_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(100182144)))]; tensor key_states_13_cast_fp16 = conv(dilations = key_states_13_dilations_0, groups = key_states_13_groups_0, pad = key_states_13_pad_0, pad_type = key_states_13_pad_type_0, strides = key_states_13_strides_0, weight = var_1183_to_fp16, x = var_1150_cast_fp16_0)[name = string("key_states_13_cast_fp16")]; string value_states_13_pad_type_0 = const()[name = string("value_states_13_pad_type_0"), val = string("valid")]; tensor value_states_13_strides_0 = const()[name = string("value_states_13_strides_0"), val = tensor([1, 1])]; tensor value_states_13_pad_0 = const()[name = string("value_states_13_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_states_13_dilations_0 = const()[name = string("value_states_13_dilations_0"), val = tensor([1, 1])]; int32 value_states_13_groups_0 = const()[name = string("value_states_13_groups_0"), val = int32(1)]; tensor var_1194_to_fp16 = const()[name = string("op_1194_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(100444352)))]; tensor value_states_13_cast_fp16 = conv(dilations = value_states_13_dilations_0, groups = value_states_13_groups_0, pad = value_states_13_pad_0, pad_type = value_states_13_pad_type_0, strides = value_states_13_strides_0, weight = var_1194_to_fp16, x = var_1150_cast_fp16_0)[name = string("value_states_13_cast_fp16")]; tensor var_1202 = const()[name = string("op_1202"), val = tensor([1, 16, 64, 1])]; tensor embed_13_cast_fp16 = reshape(shape = var_1202, x = query_states_13_cast_fp16)[name = string("embed_13_cast_fp16")]; tensor var_1206 = const()[name = string("op_1206"), val = tensor([1, 2, 64, 1])]; tensor var_1207_cast_fp16 = reshape(shape = var_1206, x = key_states_13_cast_fp16)[name = string("op_1207_cast_fp16")]; tensor embed_15_perm_0 = const()[name = string("embed_15_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_1213 = const()[name = string("op_1213"), val = tensor([1, 2, 64, 1])]; tensor var_1214_cast_fp16 = reshape(shape = var_1213, x = value_states_13_cast_fp16)[name = string("op_1214_cast_fp16")]; tensor value_states_15_perm_0 = const()[name = string("value_states_15_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_1218_cast_fp16 = mul(x = embed_13_cast_fp16, y = cos_1_cast_fp16)[name = string("op_1218_cast_fp16")]; tensor var_1219_split_sizes_0 = const()[name = string("op_1219_split_sizes_0"), val = tensor([32, 32])]; int32 var_1219_axis_0 = const()[name = string("op_1219_axis_0"), val = int32(-2)]; tensor var_1219_cast_fp16_0, tensor var_1219_cast_fp16_1 = split(axis = var_1219_axis_0, split_sizes = var_1219_split_sizes_0, x = embed_13_cast_fp16)[name = string("op_1219_cast_fp16")]; fp16 const_37_promoted_to_fp16 = const()[name = string("const_37_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_1221_cast_fp16 = mul(x = var_1219_cast_fp16_1, y = const_37_promoted_to_fp16)[name = string("op_1221_cast_fp16")]; int32 var_1223 = const()[name = string("op_1223"), val = int32(-2)]; bool var_1224_interleave_0 = const()[name = string("op_1224_interleave_0"), val = bool(false)]; tensor var_1224_cast_fp16 = concat(axis = var_1223, interleave = var_1224_interleave_0, values = (var_1221_cast_fp16, var_1219_cast_fp16_0))[name = string("op_1224_cast_fp16")]; tensor var_1225_cast_fp16 = mul(x = var_1224_cast_fp16, y = sin_1_cast_fp16)[name = string("op_1225_cast_fp16")]; tensor query_states_15_cast_fp16 = add(x = var_1218_cast_fp16, y = var_1225_cast_fp16)[name = string("query_states_15_cast_fp16")]; tensor embed_15_cast_fp16 = transpose(perm = embed_15_perm_0, x = var_1207_cast_fp16)[name = string("transpose_62")]; tensor var_1228_cast_fp16 = mul(x = embed_15_cast_fp16, y = cos_cast_fp16)[name = string("op_1228_cast_fp16")]; tensor var_1229_split_sizes_0 = const()[name = string("op_1229_split_sizes_0"), val = tensor([32, 32])]; int32 var_1229_axis_0 = const()[name = string("op_1229_axis_0"), val = int32(-1)]; tensor var_1229_cast_fp16_0, tensor var_1229_cast_fp16_1 = split(axis = var_1229_axis_0, split_sizes = var_1229_split_sizes_0, x = embed_15_cast_fp16)[name = string("op_1229_cast_fp16")]; fp16 const_38_promoted_to_fp16 = const()[name = string("const_38_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_1231_cast_fp16 = mul(x = var_1229_cast_fp16_1, y = const_38_promoted_to_fp16)[name = string("op_1231_cast_fp16")]; int32 var_1233 = const()[name = string("op_1233"), val = int32(-1)]; bool var_1234_interleave_0 = const()[name = string("op_1234_interleave_0"), val = bool(false)]; tensor var_1234_cast_fp16 = concat(axis = var_1233, interleave = var_1234_interleave_0, values = (var_1231_cast_fp16, var_1229_cast_fp16_0))[name = string("op_1234_cast_fp16")]; tensor var_1235_cast_fp16 = mul(x = var_1234_cast_fp16, y = sin_cast_fp16)[name = string("op_1235_cast_fp16")]; tensor key_states_15_cast_fp16 = add(x = var_1228_cast_fp16, y = var_1235_cast_fp16)[name = string("key_states_15_cast_fp16")]; tensor expand_dims_31 = const()[name = string("expand_dims_31"), val = tensor([3])]; tensor expand_dims_32 = const()[name = string("expand_dims_32"), val = tensor([0])]; tensor expand_dims_34 = const()[name = string("expand_dims_34"), val = tensor([4])]; tensor concat_27_values3_0 = const()[name = string("concat_27_values3_0"), val = tensor([0])]; int32 concat_27_axis_0 = const()[name = string("concat_27_axis_0"), val = int32(0)]; bool concat_27_interleave_0 = const()[name = string("concat_27_interleave_0"), val = bool(false)]; tensor concat_27 = concat(axis = concat_27_axis_0, interleave = concat_27_interleave_0, values = (expand_dims_31, expand_dims_32, position_id, concat_27_values3_0))[name = string("concat_27")]; tensor concat_28_values1_0 = const()[name = string("concat_28_values1_0"), val = tensor([0])]; tensor concat_28_values3_0 = const()[name = string("concat_28_values3_0"), val = tensor([0])]; int32 concat_28_axis_0 = const()[name = string("concat_28_axis_0"), val = int32(0)]; bool concat_28_interleave_0 = const()[name = string("concat_28_interleave_0"), val = bool(false)]; tensor concat_28 = concat(axis = concat_28_axis_0, interleave = concat_28_interleave_0, values = (expand_dims_34, concat_28_values1_0, var_426, concat_28_values3_0))[name = string("concat_28")]; tensor key_cache_internal_tensor_assign_4_stride_0 = const()[name = string("key_cache_internal_tensor_assign_4_stride_0"), val = tensor([1, 1, 1, 1])]; tensor key_cache_internal_tensor_assign_4_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_4_begin_mask_0"), val = tensor([false, false, false, true])]; tensor key_cache_internal_tensor_assign_4_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_4_end_mask_0"), val = tensor([false, true, false, true])]; tensor key_cache_internal_tensor_assign_4_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_4_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor key_cache_internal_tensor_assign_4_cast_fp16 = slice_update(begin = concat_27, begin_mask = key_cache_internal_tensor_assign_4_begin_mask_0, end = concat_28, end_mask = key_cache_internal_tensor_assign_4_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_4_squeeze_mask_0, stride = key_cache_internal_tensor_assign_4_stride_0, update = key_states_15_cast_fp16, x = coreml_update_state_52)[name = string("key_cache_internal_tensor_assign_4_cast_fp16")]; write_state(data = key_cache_internal_tensor_assign_4_cast_fp16, input = key_cache)[name = string("coreml_update_state_172_write_state")]; tensor coreml_update_state_54 = read_state(input = key_cache)[name = string("coreml_update_state_172")]; tensor value_cache_internal_tensor_assign_4_stride_0 = const()[name = string("value_cache_internal_tensor_assign_4_stride_0"), val = tensor([1, 1, 1, 1])]; tensor value_cache_internal_tensor_assign_4_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_4_begin_mask_0"), val = tensor([false, false, false, true])]; tensor value_cache_internal_tensor_assign_4_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_4_end_mask_0"), val = tensor([false, true, false, true])]; tensor value_cache_internal_tensor_assign_4_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_4_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor value_states_15_cast_fp16 = transpose(perm = value_states_15_perm_0, x = var_1214_cast_fp16)[name = string("transpose_61")]; tensor value_cache_internal_tensor_assign_4_cast_fp16 = slice_update(begin = concat_27, begin_mask = value_cache_internal_tensor_assign_4_begin_mask_0, end = concat_28, end_mask = value_cache_internal_tensor_assign_4_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_4_squeeze_mask_0, stride = value_cache_internal_tensor_assign_4_stride_0, update = value_states_15_cast_fp16, x = coreml_update_state_53)[name = string("value_cache_internal_tensor_assign_4_cast_fp16")]; write_state(data = value_cache_internal_tensor_assign_4_cast_fp16, input = value_cache)[name = string("coreml_update_state_173_write_state")]; tensor coreml_update_state_55 = read_state(input = value_cache)[name = string("coreml_update_state_173")]; tensor var_1278_begin_0 = const()[name = string("op_1278_begin_0"), val = tensor([3, 0, 0, 0])]; tensor var_1278_end_0 = const()[name = string("op_1278_end_0"), val = tensor([4, 2, 2048, 64])]; tensor var_1278_end_mask_0 = const()[name = string("op_1278_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_1278_cast_fp16 = slice_by_index(begin = var_1278_begin_0, end = var_1278_end_0, end_mask = var_1278_end_mask_0, x = coreml_update_state_54)[name = string("op_1278_cast_fp16")]; tensor tile_6 = const()[name = string("tile_6"), val = tensor([1, 1])]; int32 var_1281_axis_0 = const()[name = string("op_1281_axis_0"), val = int32(1)]; tensor var_1281_cast_fp16_0, tensor var_1281_cast_fp16_1 = split(axis = var_1281_axis_0, split_sizes = tile_6, x = var_1278_cast_fp16)[name = string("op_1281_cast_fp16")]; tensor var_1288_begin_0 = const()[name = string("op_1288_begin_0"), val = tensor([3, 0, 0, 0])]; tensor var_1288_end_0 = const()[name = string("op_1288_end_0"), val = tensor([4, 2, 2048, 64])]; tensor var_1288_end_mask_0 = const()[name = string("op_1288_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_1288_cast_fp16 = slice_by_index(begin = var_1288_begin_0, end = var_1288_end_0, end_mask = var_1288_end_mask_0, x = coreml_update_state_55)[name = string("op_1288_cast_fp16")]; tensor tile_7 = const()[name = string("tile_7"), val = tensor([1, 1])]; int32 var_1291_axis_0 = const()[name = string("op_1291_axis_0"), val = int32(1)]; tensor var_1291_cast_fp16_0, tensor var_1291_cast_fp16_1 = split(axis = var_1291_axis_0, split_sizes = tile_7, x = var_1288_cast_fp16)[name = string("op_1291_cast_fp16")]; tensor var_1294_split_sizes_0 = const()[name = string("op_1294_split_sizes_0"), val = tensor([8, 8])]; int32 var_1294_axis_0 = const()[name = string("op_1294_axis_0"), val = int32(1)]; tensor var_1294_cast_fp16_0, tensor var_1294_cast_fp16_1 = split(axis = var_1294_axis_0, split_sizes = var_1294_split_sizes_0, x = query_states_15_cast_fp16)[name = string("op_1294_cast_fp16")]; bool attn_weights_49_transpose_x_0 = const()[name = string("attn_weights_49_transpose_x_0"), val = bool(false)]; bool attn_weights_49_transpose_y_0 = const()[name = string("attn_weights_49_transpose_y_0"), val = bool(false)]; tensor attn_weights_49_cast_fp16 = matmul(transpose_x = attn_weights_49_transpose_x_0, transpose_y = attn_weights_49_transpose_y_0, x = var_1281_cast_fp16_0, y = var_1294_cast_fp16_0)[name = string("attn_weights_49_cast_fp16")]; fp16 _inversed_attn_weights_51_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_51_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_51_cast_fp16 = mul(x = attn_weights_49_cast_fp16, y = _inversed_attn_weights_51_y_0_to_fp16)[name = string("_inversed_attn_weights_51_cast_fp16")]; tensor attn_weights_53_cast_fp16 = add(x = _inversed_attn_weights_51_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_53_cast_fp16")]; int32 var_1301 = const()[name = string("op_1301"), val = int32(2)]; tensor attn_weights_55_cast_fp16 = softmax(axis = var_1301, x = attn_weights_53_cast_fp16)[name = string("attn_weights_55_cast_fp16")]; bool var_1307_transpose_x_1 = const()[name = string("op_1307_transpose_x_1"), val = bool(true)]; bool var_1307_transpose_y_1 = const()[name = string("op_1307_transpose_y_1"), val = bool(false)]; tensor var_1307_cast_fp16 = matmul(transpose_x = var_1307_transpose_x_1, transpose_y = var_1307_transpose_y_1, x = attn_weights_55_cast_fp16, y = var_1291_cast_fp16_0)[name = string("op_1307_cast_fp16")]; bool attn_weights_57_transpose_x_0 = const()[name = string("attn_weights_57_transpose_x_0"), val = bool(false)]; bool attn_weights_57_transpose_y_0 = const()[name = string("attn_weights_57_transpose_y_0"), val = bool(false)]; tensor attn_weights_57_cast_fp16 = matmul(transpose_x = attn_weights_57_transpose_x_0, transpose_y = attn_weights_57_transpose_y_0, x = var_1281_cast_fp16_1, y = var_1294_cast_fp16_1)[name = string("attn_weights_57_cast_fp16")]; fp16 _inversed_attn_weights_59_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_59_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_59_cast_fp16 = mul(x = attn_weights_57_cast_fp16, y = _inversed_attn_weights_59_y_0_to_fp16)[name = string("_inversed_attn_weights_59_cast_fp16")]; tensor attn_weights_61_cast_fp16 = add(x = _inversed_attn_weights_59_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_61_cast_fp16")]; int32 var_1313 = const()[name = string("op_1313"), val = int32(2)]; tensor attn_weights_63_cast_fp16 = softmax(axis = var_1313, x = attn_weights_61_cast_fp16)[name = string("attn_weights_63_cast_fp16")]; bool attn_output_19_transpose_x_1 = const()[name = string("attn_output_19_transpose_x_1"), val = bool(true)]; bool attn_output_19_transpose_y_1 = const()[name = string("attn_output_19_transpose_y_1"), val = bool(false)]; tensor attn_output_19_cast_fp16 = matmul(transpose_x = attn_output_19_transpose_x_1, transpose_y = attn_output_19_transpose_y_1, x = attn_weights_63_cast_fp16, y = var_1291_cast_fp16_1)[name = string("attn_output_19_cast_fp16")]; int32 var_1321 = const()[name = string("op_1321"), val = int32(1)]; bool attn_output_21_interleave_0 = const()[name = string("attn_output_21_interleave_0"), val = bool(false)]; tensor attn_output_21_cast_fp16 = concat(axis = var_1321, interleave = attn_output_21_interleave_0, values = (var_1307_cast_fp16, attn_output_19_cast_fp16))[name = string("attn_output_21_cast_fp16")]; tensor var_1325_perm_0 = const()[name = string("op_1325_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_1330 = const()[name = string("op_1330"), val = tensor([1, 1024, 1, 1])]; tensor var_1325_cast_fp16 = transpose(perm = var_1325_perm_0, x = attn_output_21_cast_fp16)[name = string("transpose_60")]; tensor x_59_cast_fp16 = reshape(shape = var_1330, x = var_1325_cast_fp16)[name = string("x_59_cast_fp16")]; string hidden_states_21_pad_type_0 = const()[name = string("hidden_states_21_pad_type_0"), val = string("valid")]; tensor hidden_states_21_strides_0 = const()[name = string("hidden_states_21_strides_0"), val = tensor([1, 1])]; tensor hidden_states_21_pad_0 = const()[name = string("hidden_states_21_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_21_dilations_0 = const()[name = string("hidden_states_21_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_21_groups_0 = const()[name = string("hidden_states_21_groups_0"), val = int32(1)]; tensor var_1337_to_fp16 = const()[name = string("op_1337_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(100706560)))]; tensor hidden_states_21_cast_fp16 = conv(dilations = hidden_states_21_dilations_0, groups = hidden_states_21_groups_0, pad = hidden_states_21_pad_0, pad_type = hidden_states_21_pad_type_0, strides = hidden_states_21_strides_0, weight = var_1337_to_fp16, x = x_59_cast_fp16)[name = string("hidden_states_21_cast_fp16")]; tensor x_61_cast_fp16 = add(x = x_53_cast_fp16, y = hidden_states_21_cast_fp16)[name = string("x_61_cast_fp16")]; int32 var_1349 = const()[name = string("op_1349"), val = int32(1)]; fp16 const_43_promoted_to_fp16 = const()[name = string("const_43_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_1352_cast_fp16 = mul(x = x_61_cast_fp16, y = const_43_promoted_to_fp16)[name = string("op_1352_cast_fp16")]; bool x_63_interleave_0 = const()[name = string("x_63_interleave_0"), val = bool(false)]; tensor x_63_cast_fp16 = concat(axis = var_1349, interleave = x_63_interleave_0, values = (x_61_cast_fp16, var_1352_cast_fp16))[name = string("x_63_cast_fp16")]; tensor out_43_axes_0 = const()[name = string("out_43_axes_0"), val = tensor([1])]; fp16 var_1362_to_fp16 = const()[name = string("op_1362_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_43_cast_fp16 = layer_norm(axes = out_43_axes_0, epsilon = var_1362_to_fp16, x = x_63_cast_fp16)[name = string("out_43_cast_fp16")]; tensor layer_layers_3_post_attention_layernorm_weight_to_fp16 = const()[name = string("layer_layers_3_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(102803776)))]; tensor out_45_cast_fp16 = mul(x = out_43_cast_fp16, y = layer_layers_3_post_attention_layernorm_weight_to_fp16)[name = string("out_45_cast_fp16")]; tensor var_1368_split_sizes_0 = const()[name = string("op_1368_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_1368_axis_0 = const()[name = string("op_1368_axis_0"), val = int32(1)]; tensor var_1368_cast_fp16_0, tensor var_1368_cast_fp16_1 = split(axis = var_1368_axis_0, split_sizes = var_1368_split_sizes_0, x = out_45_cast_fp16)[name = string("op_1368_cast_fp16")]; string input_7_pad_type_0 = const()[name = string("input_7_pad_type_0"), val = string("valid")]; tensor input_7_strides_0 = const()[name = string("input_7_strides_0"), val = tensor([1, 1])]; tensor input_7_pad_0 = const()[name = string("input_7_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_7_dilations_0 = const()[name = string("input_7_dilations_0"), val = tensor([1, 1])]; int32 input_7_groups_0 = const()[name = string("input_7_groups_0"), val = int32(1)]; tensor var_1373_to_fp16 = const()[name = string("op_1373_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(102807936)))]; tensor input_7_cast_fp16 = conv(dilations = input_7_dilations_0, groups = input_7_groups_0, pad = input_7_pad_0, pad_type = input_7_pad_type_0, strides = input_7_strides_0, weight = var_1373_to_fp16, x = var_1368_cast_fp16_0)[name = string("input_7_cast_fp16")]; tensor var_1384_cast_fp16 = silu(x = input_7_cast_fp16)[name = string("op_1384_cast_fp16")]; string var_1389_pad_type_0 = const()[name = string("op_1389_pad_type_0"), val = string("valid")]; tensor var_1389_strides_0 = const()[name = string("op_1389_strides_0"), val = tensor([1, 1])]; tensor var_1389_pad_0 = const()[name = string("op_1389_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_1389_dilations_0 = const()[name = string("op_1389_dilations_0"), val = tensor([1, 1])]; int32 var_1389_groups_0 = const()[name = string("op_1389_groups_0"), val = int32(1)]; tensor var_1372_to_fp16 = const()[name = string("op_1372_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(111196608)))]; tensor var_1389_cast_fp16 = conv(dilations = var_1389_dilations_0, groups = var_1389_groups_0, pad = var_1389_pad_0, pad_type = var_1389_pad_type_0, strides = var_1389_strides_0, weight = var_1372_to_fp16, x = var_1368_cast_fp16_0)[name = string("op_1389_cast_fp16")]; tensor x_69_cast_fp16 = mul(x = var_1384_cast_fp16, y = var_1389_cast_fp16)[name = string("x_69_cast_fp16")]; string hidden_states_23_pad_type_0 = const()[name = string("hidden_states_23_pad_type_0"), val = string("valid")]; tensor hidden_states_23_strides_0 = const()[name = string("hidden_states_23_strides_0"), val = tensor([1, 1])]; tensor hidden_states_23_pad_0 = const()[name = string("hidden_states_23_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_23_dilations_0 = const()[name = string("hidden_states_23_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_23_groups_0 = const()[name = string("hidden_states_23_groups_0"), val = int32(1)]; tensor var_1371_to_fp16 = const()[name = string("op_1371_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(119585280)))]; tensor hidden_states_23_cast_fp16 = conv(dilations = hidden_states_23_dilations_0, groups = hidden_states_23_groups_0, pad = hidden_states_23_pad_0, pad_type = hidden_states_23_pad_type_0, strides = hidden_states_23_strides_0, weight = var_1371_to_fp16, x = x_69_cast_fp16)[name = string("hidden_states_23_cast_fp16")]; tensor x_71_cast_fp16 = add(x = x_61_cast_fp16, y = hidden_states_23_cast_fp16)[name = string("x_71_cast_fp16")]; int32 var_1402 = const()[name = string("op_1402"), val = int32(1)]; fp16 const_44_promoted_to_fp16 = const()[name = string("const_44_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_1405_cast_fp16 = mul(x = x_71_cast_fp16, y = const_44_promoted_to_fp16)[name = string("op_1405_cast_fp16")]; bool x_73_interleave_0 = const()[name = string("x_73_interleave_0"), val = bool(false)]; tensor x_73_cast_fp16 = concat(axis = var_1402, interleave = x_73_interleave_0, values = (x_71_cast_fp16, var_1405_cast_fp16))[name = string("x_73_cast_fp16")]; tensor out_49_axes_0 = const()[name = string("out_49_axes_0"), val = tensor([1])]; fp16 var_1415_to_fp16 = const()[name = string("op_1415_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_49_cast_fp16 = layer_norm(axes = out_49_axes_0, epsilon = var_1415_to_fp16, x = x_73_cast_fp16)[name = string("out_49_cast_fp16")]; tensor layer_layers_4_input_layernorm_weight_to_fp16 = const()[name = string("layer_layers_4_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(127973952)))]; tensor out_51_cast_fp16 = mul(x = out_49_cast_fp16, y = layer_layers_4_input_layernorm_weight_to_fp16)[name = string("out_51_cast_fp16")]; tensor var_1421_split_sizes_0 = const()[name = string("op_1421_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_1421_axis_0 = const()[name = string("op_1421_axis_0"), val = int32(1)]; tensor var_1421_cast_fp16_0, tensor var_1421_cast_fp16_1 = split(axis = var_1421_axis_0, split_sizes = var_1421_split_sizes_0, x = out_51_cast_fp16)[name = string("op_1421_cast_fp16")]; string query_states_17_pad_type_0 = const()[name = string("query_states_17_pad_type_0"), val = string("valid")]; tensor query_states_17_strides_0 = const()[name = string("query_states_17_strides_0"), val = tensor([1, 1])]; tensor query_states_17_pad_0 = const()[name = string("query_states_17_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_states_17_dilations_0 = const()[name = string("query_states_17_dilations_0"), val = tensor([1, 1])]; int32 query_states_17_groups_0 = const()[name = string("query_states_17_groups_0"), val = int32(1)]; tensor var_1443_to_fp16 = const()[name = string("op_1443_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(127978112)))]; tensor query_states_17_cast_fp16 = conv(dilations = query_states_17_dilations_0, groups = query_states_17_groups_0, pad = query_states_17_pad_0, pad_type = query_states_17_pad_type_0, strides = query_states_17_strides_0, weight = var_1443_to_fp16, x = var_1421_cast_fp16_0)[name = string("query_states_17_cast_fp16")]; string key_states_17_pad_type_0 = const()[name = string("key_states_17_pad_type_0"), val = string("valid")]; tensor key_states_17_strides_0 = const()[name = string("key_states_17_strides_0"), val = tensor([1, 1])]; tensor key_states_17_pad_0 = const()[name = string("key_states_17_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_states_17_dilations_0 = const()[name = string("key_states_17_dilations_0"), val = tensor([1, 1])]; int32 key_states_17_groups_0 = const()[name = string("key_states_17_groups_0"), val = int32(1)]; tensor var_1454_to_fp16 = const()[name = string("op_1454_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(130075328)))]; tensor key_states_17_cast_fp16 = conv(dilations = key_states_17_dilations_0, groups = key_states_17_groups_0, pad = key_states_17_pad_0, pad_type = key_states_17_pad_type_0, strides = key_states_17_strides_0, weight = var_1454_to_fp16, x = var_1421_cast_fp16_0)[name = string("key_states_17_cast_fp16")]; string value_states_17_pad_type_0 = const()[name = string("value_states_17_pad_type_0"), val = string("valid")]; tensor value_states_17_strides_0 = const()[name = string("value_states_17_strides_0"), val = tensor([1, 1])]; tensor value_states_17_pad_0 = const()[name = string("value_states_17_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_states_17_dilations_0 = const()[name = string("value_states_17_dilations_0"), val = tensor([1, 1])]; int32 value_states_17_groups_0 = const()[name = string("value_states_17_groups_0"), val = int32(1)]; tensor var_1465_to_fp16 = const()[name = string("op_1465_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(130337536)))]; tensor value_states_17_cast_fp16 = conv(dilations = value_states_17_dilations_0, groups = value_states_17_groups_0, pad = value_states_17_pad_0, pad_type = value_states_17_pad_type_0, strides = value_states_17_strides_0, weight = var_1465_to_fp16, x = var_1421_cast_fp16_0)[name = string("value_states_17_cast_fp16")]; tensor var_1473 = const()[name = string("op_1473"), val = tensor([1, 16, 64, 1])]; tensor embed_17_cast_fp16 = reshape(shape = var_1473, x = query_states_17_cast_fp16)[name = string("embed_17_cast_fp16")]; tensor var_1477 = const()[name = string("op_1477"), val = tensor([1, 2, 64, 1])]; tensor var_1478_cast_fp16 = reshape(shape = var_1477, x = key_states_17_cast_fp16)[name = string("op_1478_cast_fp16")]; tensor embed_19_perm_0 = const()[name = string("embed_19_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_1484 = const()[name = string("op_1484"), val = tensor([1, 2, 64, 1])]; tensor var_1485_cast_fp16 = reshape(shape = var_1484, x = value_states_17_cast_fp16)[name = string("op_1485_cast_fp16")]; tensor value_states_19_perm_0 = const()[name = string("value_states_19_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_1489_cast_fp16 = mul(x = embed_17_cast_fp16, y = cos_1_cast_fp16)[name = string("op_1489_cast_fp16")]; tensor var_1490_split_sizes_0 = const()[name = string("op_1490_split_sizes_0"), val = tensor([32, 32])]; int32 var_1490_axis_0 = const()[name = string("op_1490_axis_0"), val = int32(-2)]; tensor var_1490_cast_fp16_0, tensor var_1490_cast_fp16_1 = split(axis = var_1490_axis_0, split_sizes = var_1490_split_sizes_0, x = embed_17_cast_fp16)[name = string("op_1490_cast_fp16")]; fp16 const_47_promoted_to_fp16 = const()[name = string("const_47_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_1492_cast_fp16 = mul(x = var_1490_cast_fp16_1, y = const_47_promoted_to_fp16)[name = string("op_1492_cast_fp16")]; int32 var_1494 = const()[name = string("op_1494"), val = int32(-2)]; bool var_1495_interleave_0 = const()[name = string("op_1495_interleave_0"), val = bool(false)]; tensor var_1495_cast_fp16 = concat(axis = var_1494, interleave = var_1495_interleave_0, values = (var_1492_cast_fp16, var_1490_cast_fp16_0))[name = string("op_1495_cast_fp16")]; tensor var_1496_cast_fp16 = mul(x = var_1495_cast_fp16, y = sin_1_cast_fp16)[name = string("op_1496_cast_fp16")]; tensor query_states_19_cast_fp16 = add(x = var_1489_cast_fp16, y = var_1496_cast_fp16)[name = string("query_states_19_cast_fp16")]; tensor embed_19_cast_fp16 = transpose(perm = embed_19_perm_0, x = var_1478_cast_fp16)[name = string("transpose_59")]; tensor var_1499_cast_fp16 = mul(x = embed_19_cast_fp16, y = cos_cast_fp16)[name = string("op_1499_cast_fp16")]; tensor var_1500_split_sizes_0 = const()[name = string("op_1500_split_sizes_0"), val = tensor([32, 32])]; int32 var_1500_axis_0 = const()[name = string("op_1500_axis_0"), val = int32(-1)]; tensor var_1500_cast_fp16_0, tensor var_1500_cast_fp16_1 = split(axis = var_1500_axis_0, split_sizes = var_1500_split_sizes_0, x = embed_19_cast_fp16)[name = string("op_1500_cast_fp16")]; fp16 const_48_promoted_to_fp16 = const()[name = string("const_48_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_1502_cast_fp16 = mul(x = var_1500_cast_fp16_1, y = const_48_promoted_to_fp16)[name = string("op_1502_cast_fp16")]; int32 var_1504 = const()[name = string("op_1504"), val = int32(-1)]; bool var_1505_interleave_0 = const()[name = string("op_1505_interleave_0"), val = bool(false)]; tensor var_1505_cast_fp16 = concat(axis = var_1504, interleave = var_1505_interleave_0, values = (var_1502_cast_fp16, var_1500_cast_fp16_0))[name = string("op_1505_cast_fp16")]; tensor var_1506_cast_fp16 = mul(x = var_1505_cast_fp16, y = sin_cast_fp16)[name = string("op_1506_cast_fp16")]; tensor key_states_19_cast_fp16 = add(x = var_1499_cast_fp16, y = var_1506_cast_fp16)[name = string("key_states_19_cast_fp16")]; tensor expand_dims_41 = const()[name = string("expand_dims_41"), val = tensor([4])]; tensor expand_dims_42 = const()[name = string("expand_dims_42"), val = tensor([0])]; tensor expand_dims_44 = const()[name = string("expand_dims_44"), val = tensor([5])]; tensor concat_35_values3_0 = const()[name = string("concat_35_values3_0"), val = tensor([0])]; int32 concat_35_axis_0 = const()[name = string("concat_35_axis_0"), val = int32(0)]; bool concat_35_interleave_0 = const()[name = string("concat_35_interleave_0"), val = bool(false)]; tensor concat_35 = concat(axis = concat_35_axis_0, interleave = concat_35_interleave_0, values = (expand_dims_41, expand_dims_42, position_id, concat_35_values3_0))[name = string("concat_35")]; tensor concat_36_values1_0 = const()[name = string("concat_36_values1_0"), val = tensor([0])]; tensor concat_36_values3_0 = const()[name = string("concat_36_values3_0"), val = tensor([0])]; int32 concat_36_axis_0 = const()[name = string("concat_36_axis_0"), val = int32(0)]; bool concat_36_interleave_0 = const()[name = string("concat_36_interleave_0"), val = bool(false)]; tensor concat_36 = concat(axis = concat_36_axis_0, interleave = concat_36_interleave_0, values = (expand_dims_44, concat_36_values1_0, var_426, concat_36_values3_0))[name = string("concat_36")]; tensor key_cache_internal_tensor_assign_5_stride_0 = const()[name = string("key_cache_internal_tensor_assign_5_stride_0"), val = tensor([1, 1, 1, 1])]; tensor key_cache_internal_tensor_assign_5_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_5_begin_mask_0"), val = tensor([false, false, false, true])]; tensor key_cache_internal_tensor_assign_5_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_5_end_mask_0"), val = tensor([false, true, false, true])]; tensor key_cache_internal_tensor_assign_5_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_5_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor key_cache_internal_tensor_assign_5_cast_fp16 = slice_update(begin = concat_35, begin_mask = key_cache_internal_tensor_assign_5_begin_mask_0, end = concat_36, end_mask = key_cache_internal_tensor_assign_5_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_5_squeeze_mask_0, stride = key_cache_internal_tensor_assign_5_stride_0, update = key_states_19_cast_fp16, x = coreml_update_state_54)[name = string("key_cache_internal_tensor_assign_5_cast_fp16")]; write_state(data = key_cache_internal_tensor_assign_5_cast_fp16, input = key_cache)[name = string("coreml_update_state_174_write_state")]; tensor coreml_update_state_56 = read_state(input = key_cache)[name = string("coreml_update_state_174")]; tensor value_cache_internal_tensor_assign_5_stride_0 = const()[name = string("value_cache_internal_tensor_assign_5_stride_0"), val = tensor([1, 1, 1, 1])]; tensor value_cache_internal_tensor_assign_5_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_5_begin_mask_0"), val = tensor([false, false, false, true])]; tensor value_cache_internal_tensor_assign_5_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_5_end_mask_0"), val = tensor([false, true, false, true])]; tensor value_cache_internal_tensor_assign_5_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_5_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor value_states_19_cast_fp16 = transpose(perm = value_states_19_perm_0, x = var_1485_cast_fp16)[name = string("transpose_58")]; tensor value_cache_internal_tensor_assign_5_cast_fp16 = slice_update(begin = concat_35, begin_mask = value_cache_internal_tensor_assign_5_begin_mask_0, end = concat_36, end_mask = value_cache_internal_tensor_assign_5_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_5_squeeze_mask_0, stride = value_cache_internal_tensor_assign_5_stride_0, update = value_states_19_cast_fp16, x = coreml_update_state_55)[name = string("value_cache_internal_tensor_assign_5_cast_fp16")]; write_state(data = value_cache_internal_tensor_assign_5_cast_fp16, input = value_cache)[name = string("coreml_update_state_175_write_state")]; tensor coreml_update_state_57 = read_state(input = value_cache)[name = string("coreml_update_state_175")]; tensor var_1549_begin_0 = const()[name = string("op_1549_begin_0"), val = tensor([4, 0, 0, 0])]; tensor var_1549_end_0 = const()[name = string("op_1549_end_0"), val = tensor([5, 2, 2048, 64])]; tensor var_1549_end_mask_0 = const()[name = string("op_1549_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_1549_cast_fp16 = slice_by_index(begin = var_1549_begin_0, end = var_1549_end_0, end_mask = var_1549_end_mask_0, x = coreml_update_state_56)[name = string("op_1549_cast_fp16")]; tensor tile_8 = const()[name = string("tile_8"), val = tensor([1, 1])]; int32 var_1552_axis_0 = const()[name = string("op_1552_axis_0"), val = int32(1)]; tensor var_1552_cast_fp16_0, tensor var_1552_cast_fp16_1 = split(axis = var_1552_axis_0, split_sizes = tile_8, x = var_1549_cast_fp16)[name = string("op_1552_cast_fp16")]; tensor var_1559_begin_0 = const()[name = string("op_1559_begin_0"), val = tensor([4, 0, 0, 0])]; tensor var_1559_end_0 = const()[name = string("op_1559_end_0"), val = tensor([5, 2, 2048, 64])]; tensor var_1559_end_mask_0 = const()[name = string("op_1559_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_1559_cast_fp16 = slice_by_index(begin = var_1559_begin_0, end = var_1559_end_0, end_mask = var_1559_end_mask_0, x = coreml_update_state_57)[name = string("op_1559_cast_fp16")]; tensor tile_9 = const()[name = string("tile_9"), val = tensor([1, 1])]; int32 var_1562_axis_0 = const()[name = string("op_1562_axis_0"), val = int32(1)]; tensor var_1562_cast_fp16_0, tensor var_1562_cast_fp16_1 = split(axis = var_1562_axis_0, split_sizes = tile_9, x = var_1559_cast_fp16)[name = string("op_1562_cast_fp16")]; tensor var_1565_split_sizes_0 = const()[name = string("op_1565_split_sizes_0"), val = tensor([8, 8])]; int32 var_1565_axis_0 = const()[name = string("op_1565_axis_0"), val = int32(1)]; tensor var_1565_cast_fp16_0, tensor var_1565_cast_fp16_1 = split(axis = var_1565_axis_0, split_sizes = var_1565_split_sizes_0, x = query_states_19_cast_fp16)[name = string("op_1565_cast_fp16")]; bool attn_weights_65_transpose_x_0 = const()[name = string("attn_weights_65_transpose_x_0"), val = bool(false)]; bool attn_weights_65_transpose_y_0 = const()[name = string("attn_weights_65_transpose_y_0"), val = bool(false)]; tensor attn_weights_65_cast_fp16 = matmul(transpose_x = attn_weights_65_transpose_x_0, transpose_y = attn_weights_65_transpose_y_0, x = var_1552_cast_fp16_0, y = var_1565_cast_fp16_0)[name = string("attn_weights_65_cast_fp16")]; fp16 _inversed_attn_weights_67_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_67_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_67_cast_fp16 = mul(x = attn_weights_65_cast_fp16, y = _inversed_attn_weights_67_y_0_to_fp16)[name = string("_inversed_attn_weights_67_cast_fp16")]; tensor attn_weights_69_cast_fp16 = add(x = _inversed_attn_weights_67_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_69_cast_fp16")]; int32 var_1572 = const()[name = string("op_1572"), val = int32(2)]; tensor attn_weights_71_cast_fp16 = softmax(axis = var_1572, x = attn_weights_69_cast_fp16)[name = string("attn_weights_71_cast_fp16")]; bool var_1578_transpose_x_1 = const()[name = string("op_1578_transpose_x_1"), val = bool(true)]; bool var_1578_transpose_y_1 = const()[name = string("op_1578_transpose_y_1"), val = bool(false)]; tensor var_1578_cast_fp16 = matmul(transpose_x = var_1578_transpose_x_1, transpose_y = var_1578_transpose_y_1, x = attn_weights_71_cast_fp16, y = var_1562_cast_fp16_0)[name = string("op_1578_cast_fp16")]; bool attn_weights_73_transpose_x_0 = const()[name = string("attn_weights_73_transpose_x_0"), val = bool(false)]; bool attn_weights_73_transpose_y_0 = const()[name = string("attn_weights_73_transpose_y_0"), val = bool(false)]; tensor attn_weights_73_cast_fp16 = matmul(transpose_x = attn_weights_73_transpose_x_0, transpose_y = attn_weights_73_transpose_y_0, x = var_1552_cast_fp16_1, y = var_1565_cast_fp16_1)[name = string("attn_weights_73_cast_fp16")]; fp16 _inversed_attn_weights_75_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_75_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_75_cast_fp16 = mul(x = attn_weights_73_cast_fp16, y = _inversed_attn_weights_75_y_0_to_fp16)[name = string("_inversed_attn_weights_75_cast_fp16")]; tensor attn_weights_77_cast_fp16 = add(x = _inversed_attn_weights_75_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_77_cast_fp16")]; int32 var_1584 = const()[name = string("op_1584"), val = int32(2)]; tensor attn_weights_79_cast_fp16 = softmax(axis = var_1584, x = attn_weights_77_cast_fp16)[name = string("attn_weights_79_cast_fp16")]; bool attn_output_25_transpose_x_1 = const()[name = string("attn_output_25_transpose_x_1"), val = bool(true)]; bool attn_output_25_transpose_y_1 = const()[name = string("attn_output_25_transpose_y_1"), val = bool(false)]; tensor attn_output_25_cast_fp16 = matmul(transpose_x = attn_output_25_transpose_x_1, transpose_y = attn_output_25_transpose_y_1, x = attn_weights_79_cast_fp16, y = var_1562_cast_fp16_1)[name = string("attn_output_25_cast_fp16")]; int32 var_1592 = const()[name = string("op_1592"), val = int32(1)]; bool attn_output_27_interleave_0 = const()[name = string("attn_output_27_interleave_0"), val = bool(false)]; tensor attn_output_27_cast_fp16 = concat(axis = var_1592, interleave = attn_output_27_interleave_0, values = (var_1578_cast_fp16, attn_output_25_cast_fp16))[name = string("attn_output_27_cast_fp16")]; tensor var_1596_perm_0 = const()[name = string("op_1596_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_1601 = const()[name = string("op_1601"), val = tensor([1, 1024, 1, 1])]; tensor var_1596_cast_fp16 = transpose(perm = var_1596_perm_0, x = attn_output_27_cast_fp16)[name = string("transpose_57")]; tensor x_77_cast_fp16 = reshape(shape = var_1601, x = var_1596_cast_fp16)[name = string("x_77_cast_fp16")]; string hidden_states_27_pad_type_0 = const()[name = string("hidden_states_27_pad_type_0"), val = string("valid")]; tensor hidden_states_27_strides_0 = const()[name = string("hidden_states_27_strides_0"), val = tensor([1, 1])]; tensor hidden_states_27_pad_0 = const()[name = string("hidden_states_27_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_27_dilations_0 = const()[name = string("hidden_states_27_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_27_groups_0 = const()[name = string("hidden_states_27_groups_0"), val = int32(1)]; tensor var_1608_to_fp16 = const()[name = string("op_1608_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(130599744)))]; tensor hidden_states_27_cast_fp16 = conv(dilations = hidden_states_27_dilations_0, groups = hidden_states_27_groups_0, pad = hidden_states_27_pad_0, pad_type = hidden_states_27_pad_type_0, strides = hidden_states_27_strides_0, weight = var_1608_to_fp16, x = x_77_cast_fp16)[name = string("hidden_states_27_cast_fp16")]; tensor x_79_cast_fp16 = add(x = x_71_cast_fp16, y = hidden_states_27_cast_fp16)[name = string("x_79_cast_fp16")]; int32 var_1620 = const()[name = string("op_1620"), val = int32(1)]; fp16 const_53_promoted_to_fp16 = const()[name = string("const_53_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_1623_cast_fp16 = mul(x = x_79_cast_fp16, y = const_53_promoted_to_fp16)[name = string("op_1623_cast_fp16")]; bool x_81_interleave_0 = const()[name = string("x_81_interleave_0"), val = bool(false)]; tensor x_81_cast_fp16 = concat(axis = var_1620, interleave = x_81_interleave_0, values = (x_79_cast_fp16, var_1623_cast_fp16))[name = string("x_81_cast_fp16")]; tensor out_55_axes_0 = const()[name = string("out_55_axes_0"), val = tensor([1])]; fp16 var_1633_to_fp16 = const()[name = string("op_1633_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_55_cast_fp16 = layer_norm(axes = out_55_axes_0, epsilon = var_1633_to_fp16, x = x_81_cast_fp16)[name = string("out_55_cast_fp16")]; tensor layer_layers_4_post_attention_layernorm_weight_to_fp16 = const()[name = string("layer_layers_4_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(132696960)))]; tensor out_57_cast_fp16 = mul(x = out_55_cast_fp16, y = layer_layers_4_post_attention_layernorm_weight_to_fp16)[name = string("out_57_cast_fp16")]; tensor var_1639_split_sizes_0 = const()[name = string("op_1639_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_1639_axis_0 = const()[name = string("op_1639_axis_0"), val = int32(1)]; tensor var_1639_cast_fp16_0, tensor var_1639_cast_fp16_1 = split(axis = var_1639_axis_0, split_sizes = var_1639_split_sizes_0, x = out_57_cast_fp16)[name = string("op_1639_cast_fp16")]; string input_9_pad_type_0 = const()[name = string("input_9_pad_type_0"), val = string("valid")]; tensor input_9_strides_0 = const()[name = string("input_9_strides_0"), val = tensor([1, 1])]; tensor input_9_pad_0 = const()[name = string("input_9_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_9_dilations_0 = const()[name = string("input_9_dilations_0"), val = tensor([1, 1])]; int32 input_9_groups_0 = const()[name = string("input_9_groups_0"), val = int32(1)]; tensor var_1644_to_fp16 = const()[name = string("op_1644_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(132701120)))]; tensor input_9_cast_fp16 = conv(dilations = input_9_dilations_0, groups = input_9_groups_0, pad = input_9_pad_0, pad_type = input_9_pad_type_0, strides = input_9_strides_0, weight = var_1644_to_fp16, x = var_1639_cast_fp16_0)[name = string("input_9_cast_fp16")]; tensor var_1655_cast_fp16 = silu(x = input_9_cast_fp16)[name = string("op_1655_cast_fp16")]; string var_1660_pad_type_0 = const()[name = string("op_1660_pad_type_0"), val = string("valid")]; tensor var_1660_strides_0 = const()[name = string("op_1660_strides_0"), val = tensor([1, 1])]; tensor var_1660_pad_0 = const()[name = string("op_1660_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_1660_dilations_0 = const()[name = string("op_1660_dilations_0"), val = tensor([1, 1])]; int32 var_1660_groups_0 = const()[name = string("op_1660_groups_0"), val = int32(1)]; tensor var_1643_to_fp16 = const()[name = string("op_1643_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(141089792)))]; tensor var_1660_cast_fp16 = conv(dilations = var_1660_dilations_0, groups = var_1660_groups_0, pad = var_1660_pad_0, pad_type = var_1660_pad_type_0, strides = var_1660_strides_0, weight = var_1643_to_fp16, x = var_1639_cast_fp16_0)[name = string("op_1660_cast_fp16")]; tensor x_87_cast_fp16 = mul(x = var_1655_cast_fp16, y = var_1660_cast_fp16)[name = string("x_87_cast_fp16")]; string hidden_states_29_pad_type_0 = const()[name = string("hidden_states_29_pad_type_0"), val = string("valid")]; tensor hidden_states_29_strides_0 = const()[name = string("hidden_states_29_strides_0"), val = tensor([1, 1])]; tensor hidden_states_29_pad_0 = const()[name = string("hidden_states_29_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_29_dilations_0 = const()[name = string("hidden_states_29_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_29_groups_0 = const()[name = string("hidden_states_29_groups_0"), val = int32(1)]; tensor var_1642_to_fp16 = const()[name = string("op_1642_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(149478464)))]; tensor hidden_states_29_cast_fp16 = conv(dilations = hidden_states_29_dilations_0, groups = hidden_states_29_groups_0, pad = hidden_states_29_pad_0, pad_type = hidden_states_29_pad_type_0, strides = hidden_states_29_strides_0, weight = var_1642_to_fp16, x = x_87_cast_fp16)[name = string("hidden_states_29_cast_fp16")]; tensor x_89_cast_fp16 = add(x = x_79_cast_fp16, y = hidden_states_29_cast_fp16)[name = string("x_89_cast_fp16")]; int32 var_1673 = const()[name = string("op_1673"), val = int32(1)]; fp16 const_54_promoted_to_fp16 = const()[name = string("const_54_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_1676_cast_fp16 = mul(x = x_89_cast_fp16, y = const_54_promoted_to_fp16)[name = string("op_1676_cast_fp16")]; bool x_91_interleave_0 = const()[name = string("x_91_interleave_0"), val = bool(false)]; tensor x_91_cast_fp16 = concat(axis = var_1673, interleave = x_91_interleave_0, values = (x_89_cast_fp16, var_1676_cast_fp16))[name = string("x_91_cast_fp16")]; tensor out_61_axes_0 = const()[name = string("out_61_axes_0"), val = tensor([1])]; fp16 var_1686_to_fp16 = const()[name = string("op_1686_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_61_cast_fp16 = layer_norm(axes = out_61_axes_0, epsilon = var_1686_to_fp16, x = x_91_cast_fp16)[name = string("out_61_cast_fp16")]; tensor layer_layers_5_input_layernorm_weight_to_fp16 = const()[name = string("layer_layers_5_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(157867136)))]; tensor out_63_cast_fp16 = mul(x = out_61_cast_fp16, y = layer_layers_5_input_layernorm_weight_to_fp16)[name = string("out_63_cast_fp16")]; tensor var_1692_split_sizes_0 = const()[name = string("op_1692_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_1692_axis_0 = const()[name = string("op_1692_axis_0"), val = int32(1)]; tensor var_1692_cast_fp16_0, tensor var_1692_cast_fp16_1 = split(axis = var_1692_axis_0, split_sizes = var_1692_split_sizes_0, x = out_63_cast_fp16)[name = string("op_1692_cast_fp16")]; string query_states_21_pad_type_0 = const()[name = string("query_states_21_pad_type_0"), val = string("valid")]; tensor query_states_21_strides_0 = const()[name = string("query_states_21_strides_0"), val = tensor([1, 1])]; tensor query_states_21_pad_0 = const()[name = string("query_states_21_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_states_21_dilations_0 = const()[name = string("query_states_21_dilations_0"), val = tensor([1, 1])]; int32 query_states_21_groups_0 = const()[name = string("query_states_21_groups_0"), val = int32(1)]; tensor var_1714_to_fp16 = const()[name = string("op_1714_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(157871296)))]; tensor query_states_21_cast_fp16 = conv(dilations = query_states_21_dilations_0, groups = query_states_21_groups_0, pad = query_states_21_pad_0, pad_type = query_states_21_pad_type_0, strides = query_states_21_strides_0, weight = var_1714_to_fp16, x = var_1692_cast_fp16_0)[name = string("query_states_21_cast_fp16")]; string key_states_21_pad_type_0 = const()[name = string("key_states_21_pad_type_0"), val = string("valid")]; tensor key_states_21_strides_0 = const()[name = string("key_states_21_strides_0"), val = tensor([1, 1])]; tensor key_states_21_pad_0 = const()[name = string("key_states_21_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_states_21_dilations_0 = const()[name = string("key_states_21_dilations_0"), val = tensor([1, 1])]; int32 key_states_21_groups_0 = const()[name = string("key_states_21_groups_0"), val = int32(1)]; tensor var_1725_to_fp16 = const()[name = string("op_1725_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(159968512)))]; tensor key_states_21_cast_fp16 = conv(dilations = key_states_21_dilations_0, groups = key_states_21_groups_0, pad = key_states_21_pad_0, pad_type = key_states_21_pad_type_0, strides = key_states_21_strides_0, weight = var_1725_to_fp16, x = var_1692_cast_fp16_0)[name = string("key_states_21_cast_fp16")]; string value_states_21_pad_type_0 = const()[name = string("value_states_21_pad_type_0"), val = string("valid")]; tensor value_states_21_strides_0 = const()[name = string("value_states_21_strides_0"), val = tensor([1, 1])]; tensor value_states_21_pad_0 = const()[name = string("value_states_21_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_states_21_dilations_0 = const()[name = string("value_states_21_dilations_0"), val = tensor([1, 1])]; int32 value_states_21_groups_0 = const()[name = string("value_states_21_groups_0"), val = int32(1)]; tensor var_1736_to_fp16 = const()[name = string("op_1736_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(160230720)))]; tensor value_states_21_cast_fp16 = conv(dilations = value_states_21_dilations_0, groups = value_states_21_groups_0, pad = value_states_21_pad_0, pad_type = value_states_21_pad_type_0, strides = value_states_21_strides_0, weight = var_1736_to_fp16, x = var_1692_cast_fp16_0)[name = string("value_states_21_cast_fp16")]; tensor var_1744 = const()[name = string("op_1744"), val = tensor([1, 16, 64, 1])]; tensor embed_21_cast_fp16 = reshape(shape = var_1744, x = query_states_21_cast_fp16)[name = string("embed_21_cast_fp16")]; tensor var_1748 = const()[name = string("op_1748"), val = tensor([1, 2, 64, 1])]; tensor var_1749_cast_fp16 = reshape(shape = var_1748, x = key_states_21_cast_fp16)[name = string("op_1749_cast_fp16")]; tensor embed_23_perm_0 = const()[name = string("embed_23_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_1755 = const()[name = string("op_1755"), val = tensor([1, 2, 64, 1])]; tensor var_1756_cast_fp16 = reshape(shape = var_1755, x = value_states_21_cast_fp16)[name = string("op_1756_cast_fp16")]; tensor value_states_23_perm_0 = const()[name = string("value_states_23_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_1760_cast_fp16 = mul(x = embed_21_cast_fp16, y = cos_1_cast_fp16)[name = string("op_1760_cast_fp16")]; tensor var_1761_split_sizes_0 = const()[name = string("op_1761_split_sizes_0"), val = tensor([32, 32])]; int32 var_1761_axis_0 = const()[name = string("op_1761_axis_0"), val = int32(-2)]; tensor var_1761_cast_fp16_0, tensor var_1761_cast_fp16_1 = split(axis = var_1761_axis_0, split_sizes = var_1761_split_sizes_0, x = embed_21_cast_fp16)[name = string("op_1761_cast_fp16")]; fp16 const_57_promoted_to_fp16 = const()[name = string("const_57_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_1763_cast_fp16 = mul(x = var_1761_cast_fp16_1, y = const_57_promoted_to_fp16)[name = string("op_1763_cast_fp16")]; int32 var_1765 = const()[name = string("op_1765"), val = int32(-2)]; bool var_1766_interleave_0 = const()[name = string("op_1766_interleave_0"), val = bool(false)]; tensor var_1766_cast_fp16 = concat(axis = var_1765, interleave = var_1766_interleave_0, values = (var_1763_cast_fp16, var_1761_cast_fp16_0))[name = string("op_1766_cast_fp16")]; tensor var_1767_cast_fp16 = mul(x = var_1766_cast_fp16, y = sin_1_cast_fp16)[name = string("op_1767_cast_fp16")]; tensor query_states_23_cast_fp16 = add(x = var_1760_cast_fp16, y = var_1767_cast_fp16)[name = string("query_states_23_cast_fp16")]; tensor embed_23_cast_fp16 = transpose(perm = embed_23_perm_0, x = var_1749_cast_fp16)[name = string("transpose_56")]; tensor var_1770_cast_fp16 = mul(x = embed_23_cast_fp16, y = cos_cast_fp16)[name = string("op_1770_cast_fp16")]; tensor var_1771_split_sizes_0 = const()[name = string("op_1771_split_sizes_0"), val = tensor([32, 32])]; int32 var_1771_axis_0 = const()[name = string("op_1771_axis_0"), val = int32(-1)]; tensor var_1771_cast_fp16_0, tensor var_1771_cast_fp16_1 = split(axis = var_1771_axis_0, split_sizes = var_1771_split_sizes_0, x = embed_23_cast_fp16)[name = string("op_1771_cast_fp16")]; fp16 const_58_promoted_to_fp16 = const()[name = string("const_58_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_1773_cast_fp16 = mul(x = var_1771_cast_fp16_1, y = const_58_promoted_to_fp16)[name = string("op_1773_cast_fp16")]; int32 var_1775 = const()[name = string("op_1775"), val = int32(-1)]; bool var_1776_interleave_0 = const()[name = string("op_1776_interleave_0"), val = bool(false)]; tensor var_1776_cast_fp16 = concat(axis = var_1775, interleave = var_1776_interleave_0, values = (var_1773_cast_fp16, var_1771_cast_fp16_0))[name = string("op_1776_cast_fp16")]; tensor var_1777_cast_fp16 = mul(x = var_1776_cast_fp16, y = sin_cast_fp16)[name = string("op_1777_cast_fp16")]; tensor key_states_23_cast_fp16 = add(x = var_1770_cast_fp16, y = var_1777_cast_fp16)[name = string("key_states_23_cast_fp16")]; tensor expand_dims_51 = const()[name = string("expand_dims_51"), val = tensor([5])]; tensor expand_dims_52 = const()[name = string("expand_dims_52"), val = tensor([0])]; tensor expand_dims_54 = const()[name = string("expand_dims_54"), val = tensor([6])]; tensor concat_43_values3_0 = const()[name = string("concat_43_values3_0"), val = tensor([0])]; int32 concat_43_axis_0 = const()[name = string("concat_43_axis_0"), val = int32(0)]; bool concat_43_interleave_0 = const()[name = string("concat_43_interleave_0"), val = bool(false)]; tensor concat_43 = concat(axis = concat_43_axis_0, interleave = concat_43_interleave_0, values = (expand_dims_51, expand_dims_52, position_id, concat_43_values3_0))[name = string("concat_43")]; tensor concat_44_values1_0 = const()[name = string("concat_44_values1_0"), val = tensor([0])]; tensor concat_44_values3_0 = const()[name = string("concat_44_values3_0"), val = tensor([0])]; int32 concat_44_axis_0 = const()[name = string("concat_44_axis_0"), val = int32(0)]; bool concat_44_interleave_0 = const()[name = string("concat_44_interleave_0"), val = bool(false)]; tensor concat_44 = concat(axis = concat_44_axis_0, interleave = concat_44_interleave_0, values = (expand_dims_54, concat_44_values1_0, var_426, concat_44_values3_0))[name = string("concat_44")]; tensor key_cache_internal_tensor_assign_6_stride_0 = const()[name = string("key_cache_internal_tensor_assign_6_stride_0"), val = tensor([1, 1, 1, 1])]; tensor key_cache_internal_tensor_assign_6_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_6_begin_mask_0"), val = tensor([false, false, false, true])]; tensor key_cache_internal_tensor_assign_6_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_6_end_mask_0"), val = tensor([false, true, false, true])]; tensor key_cache_internal_tensor_assign_6_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_6_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor key_cache_internal_tensor_assign_6_cast_fp16 = slice_update(begin = concat_43, begin_mask = key_cache_internal_tensor_assign_6_begin_mask_0, end = concat_44, end_mask = key_cache_internal_tensor_assign_6_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_6_squeeze_mask_0, stride = key_cache_internal_tensor_assign_6_stride_0, update = key_states_23_cast_fp16, x = coreml_update_state_56)[name = string("key_cache_internal_tensor_assign_6_cast_fp16")]; write_state(data = key_cache_internal_tensor_assign_6_cast_fp16, input = key_cache)[name = string("coreml_update_state_176_write_state")]; tensor coreml_update_state_58 = read_state(input = key_cache)[name = string("coreml_update_state_176")]; tensor value_cache_internal_tensor_assign_6_stride_0 = const()[name = string("value_cache_internal_tensor_assign_6_stride_0"), val = tensor([1, 1, 1, 1])]; tensor value_cache_internal_tensor_assign_6_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_6_begin_mask_0"), val = tensor([false, false, false, true])]; tensor value_cache_internal_tensor_assign_6_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_6_end_mask_0"), val = tensor([false, true, false, true])]; tensor value_cache_internal_tensor_assign_6_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_6_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor value_states_23_cast_fp16 = transpose(perm = value_states_23_perm_0, x = var_1756_cast_fp16)[name = string("transpose_55")]; tensor value_cache_internal_tensor_assign_6_cast_fp16 = slice_update(begin = concat_43, begin_mask = value_cache_internal_tensor_assign_6_begin_mask_0, end = concat_44, end_mask = value_cache_internal_tensor_assign_6_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_6_squeeze_mask_0, stride = value_cache_internal_tensor_assign_6_stride_0, update = value_states_23_cast_fp16, x = coreml_update_state_57)[name = string("value_cache_internal_tensor_assign_6_cast_fp16")]; write_state(data = value_cache_internal_tensor_assign_6_cast_fp16, input = value_cache)[name = string("coreml_update_state_177_write_state")]; tensor coreml_update_state_59 = read_state(input = value_cache)[name = string("coreml_update_state_177")]; tensor var_1820_begin_0 = const()[name = string("op_1820_begin_0"), val = tensor([5, 0, 0, 0])]; tensor var_1820_end_0 = const()[name = string("op_1820_end_0"), val = tensor([6, 2, 2048, 64])]; tensor var_1820_end_mask_0 = const()[name = string("op_1820_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_1820_cast_fp16 = slice_by_index(begin = var_1820_begin_0, end = var_1820_end_0, end_mask = var_1820_end_mask_0, x = coreml_update_state_58)[name = string("op_1820_cast_fp16")]; tensor tile_10 = const()[name = string("tile_10"), val = tensor([1, 1])]; int32 var_1823_axis_0 = const()[name = string("op_1823_axis_0"), val = int32(1)]; tensor var_1823_cast_fp16_0, tensor var_1823_cast_fp16_1 = split(axis = var_1823_axis_0, split_sizes = tile_10, x = var_1820_cast_fp16)[name = string("op_1823_cast_fp16")]; tensor var_1830_begin_0 = const()[name = string("op_1830_begin_0"), val = tensor([5, 0, 0, 0])]; tensor var_1830_end_0 = const()[name = string("op_1830_end_0"), val = tensor([6, 2, 2048, 64])]; tensor var_1830_end_mask_0 = const()[name = string("op_1830_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_1830_cast_fp16 = slice_by_index(begin = var_1830_begin_0, end = var_1830_end_0, end_mask = var_1830_end_mask_0, x = coreml_update_state_59)[name = string("op_1830_cast_fp16")]; tensor tile_11 = const()[name = string("tile_11"), val = tensor([1, 1])]; int32 var_1833_axis_0 = const()[name = string("op_1833_axis_0"), val = int32(1)]; tensor var_1833_cast_fp16_0, tensor var_1833_cast_fp16_1 = split(axis = var_1833_axis_0, split_sizes = tile_11, x = var_1830_cast_fp16)[name = string("op_1833_cast_fp16")]; tensor var_1836_split_sizes_0 = const()[name = string("op_1836_split_sizes_0"), val = tensor([8, 8])]; int32 var_1836_axis_0 = const()[name = string("op_1836_axis_0"), val = int32(1)]; tensor var_1836_cast_fp16_0, tensor var_1836_cast_fp16_1 = split(axis = var_1836_axis_0, split_sizes = var_1836_split_sizes_0, x = query_states_23_cast_fp16)[name = string("op_1836_cast_fp16")]; bool attn_weights_81_transpose_x_0 = const()[name = string("attn_weights_81_transpose_x_0"), val = bool(false)]; bool attn_weights_81_transpose_y_0 = const()[name = string("attn_weights_81_transpose_y_0"), val = bool(false)]; tensor attn_weights_81_cast_fp16 = matmul(transpose_x = attn_weights_81_transpose_x_0, transpose_y = attn_weights_81_transpose_y_0, x = var_1823_cast_fp16_0, y = var_1836_cast_fp16_0)[name = string("attn_weights_81_cast_fp16")]; fp16 _inversed_attn_weights_83_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_83_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_83_cast_fp16 = mul(x = attn_weights_81_cast_fp16, y = _inversed_attn_weights_83_y_0_to_fp16)[name = string("_inversed_attn_weights_83_cast_fp16")]; tensor attn_weights_85_cast_fp16 = add(x = _inversed_attn_weights_83_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_85_cast_fp16")]; int32 var_1843 = const()[name = string("op_1843"), val = int32(2)]; tensor attn_weights_87_cast_fp16 = softmax(axis = var_1843, x = attn_weights_85_cast_fp16)[name = string("attn_weights_87_cast_fp16")]; bool var_1849_transpose_x_1 = const()[name = string("op_1849_transpose_x_1"), val = bool(true)]; bool var_1849_transpose_y_1 = const()[name = string("op_1849_transpose_y_1"), val = bool(false)]; tensor var_1849_cast_fp16 = matmul(transpose_x = var_1849_transpose_x_1, transpose_y = var_1849_transpose_y_1, x = attn_weights_87_cast_fp16, y = var_1833_cast_fp16_0)[name = string("op_1849_cast_fp16")]; bool attn_weights_89_transpose_x_0 = const()[name = string("attn_weights_89_transpose_x_0"), val = bool(false)]; bool attn_weights_89_transpose_y_0 = const()[name = string("attn_weights_89_transpose_y_0"), val = bool(false)]; tensor attn_weights_89_cast_fp16 = matmul(transpose_x = attn_weights_89_transpose_x_0, transpose_y = attn_weights_89_transpose_y_0, x = var_1823_cast_fp16_1, y = var_1836_cast_fp16_1)[name = string("attn_weights_89_cast_fp16")]; fp16 _inversed_attn_weights_91_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_91_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_91_cast_fp16 = mul(x = attn_weights_89_cast_fp16, y = _inversed_attn_weights_91_y_0_to_fp16)[name = string("_inversed_attn_weights_91_cast_fp16")]; tensor attn_weights_93_cast_fp16 = add(x = _inversed_attn_weights_91_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_93_cast_fp16")]; int32 var_1855 = const()[name = string("op_1855"), val = int32(2)]; tensor attn_weights_95_cast_fp16 = softmax(axis = var_1855, x = attn_weights_93_cast_fp16)[name = string("attn_weights_95_cast_fp16")]; bool attn_output_31_transpose_x_1 = const()[name = string("attn_output_31_transpose_x_1"), val = bool(true)]; bool attn_output_31_transpose_y_1 = const()[name = string("attn_output_31_transpose_y_1"), val = bool(false)]; tensor attn_output_31_cast_fp16 = matmul(transpose_x = attn_output_31_transpose_x_1, transpose_y = attn_output_31_transpose_y_1, x = attn_weights_95_cast_fp16, y = var_1833_cast_fp16_1)[name = string("attn_output_31_cast_fp16")]; int32 var_1863 = const()[name = string("op_1863"), val = int32(1)]; bool attn_output_33_interleave_0 = const()[name = string("attn_output_33_interleave_0"), val = bool(false)]; tensor attn_output_33_cast_fp16 = concat(axis = var_1863, interleave = attn_output_33_interleave_0, values = (var_1849_cast_fp16, attn_output_31_cast_fp16))[name = string("attn_output_33_cast_fp16")]; tensor var_1867_perm_0 = const()[name = string("op_1867_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_1872 = const()[name = string("op_1872"), val = tensor([1, 1024, 1, 1])]; tensor var_1867_cast_fp16 = transpose(perm = var_1867_perm_0, x = attn_output_33_cast_fp16)[name = string("transpose_54")]; tensor x_95_cast_fp16 = reshape(shape = var_1872, x = var_1867_cast_fp16)[name = string("x_95_cast_fp16")]; string hidden_states_33_pad_type_0 = const()[name = string("hidden_states_33_pad_type_0"), val = string("valid")]; tensor hidden_states_33_strides_0 = const()[name = string("hidden_states_33_strides_0"), val = tensor([1, 1])]; tensor hidden_states_33_pad_0 = const()[name = string("hidden_states_33_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_33_dilations_0 = const()[name = string("hidden_states_33_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_33_groups_0 = const()[name = string("hidden_states_33_groups_0"), val = int32(1)]; tensor var_1879_to_fp16 = const()[name = string("op_1879_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(160492928)))]; tensor hidden_states_33_cast_fp16 = conv(dilations = hidden_states_33_dilations_0, groups = hidden_states_33_groups_0, pad = hidden_states_33_pad_0, pad_type = hidden_states_33_pad_type_0, strides = hidden_states_33_strides_0, weight = var_1879_to_fp16, x = x_95_cast_fp16)[name = string("hidden_states_33_cast_fp16")]; tensor x_97_cast_fp16 = add(x = x_89_cast_fp16, y = hidden_states_33_cast_fp16)[name = string("x_97_cast_fp16")]; int32 var_1891 = const()[name = string("op_1891"), val = int32(1)]; fp16 const_63_promoted_to_fp16 = const()[name = string("const_63_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_1894_cast_fp16 = mul(x = x_97_cast_fp16, y = const_63_promoted_to_fp16)[name = string("op_1894_cast_fp16")]; bool x_99_interleave_0 = const()[name = string("x_99_interleave_0"), val = bool(false)]; tensor x_99_cast_fp16 = concat(axis = var_1891, interleave = x_99_interleave_0, values = (x_97_cast_fp16, var_1894_cast_fp16))[name = string("x_99_cast_fp16")]; tensor out_67_axes_0 = const()[name = string("out_67_axes_0"), val = tensor([1])]; fp16 var_1904_to_fp16 = const()[name = string("op_1904_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_67_cast_fp16 = layer_norm(axes = out_67_axes_0, epsilon = var_1904_to_fp16, x = x_99_cast_fp16)[name = string("out_67_cast_fp16")]; tensor layer_layers_5_post_attention_layernorm_weight_to_fp16 = const()[name = string("layer_layers_5_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(162590144)))]; tensor out_69_cast_fp16 = mul(x = out_67_cast_fp16, y = layer_layers_5_post_attention_layernorm_weight_to_fp16)[name = string("out_69_cast_fp16")]; tensor var_1910_split_sizes_0 = const()[name = string("op_1910_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_1910_axis_0 = const()[name = string("op_1910_axis_0"), val = int32(1)]; tensor var_1910_cast_fp16_0, tensor var_1910_cast_fp16_1 = split(axis = var_1910_axis_0, split_sizes = var_1910_split_sizes_0, x = out_69_cast_fp16)[name = string("op_1910_cast_fp16")]; string input_11_pad_type_0 = const()[name = string("input_11_pad_type_0"), val = string("valid")]; tensor input_11_strides_0 = const()[name = string("input_11_strides_0"), val = tensor([1, 1])]; tensor input_11_pad_0 = const()[name = string("input_11_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_11_dilations_0 = const()[name = string("input_11_dilations_0"), val = tensor([1, 1])]; int32 input_11_groups_0 = const()[name = string("input_11_groups_0"), val = int32(1)]; tensor var_1915_to_fp16 = const()[name = string("op_1915_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(162594304)))]; tensor input_11_cast_fp16 = conv(dilations = input_11_dilations_0, groups = input_11_groups_0, pad = input_11_pad_0, pad_type = input_11_pad_type_0, strides = input_11_strides_0, weight = var_1915_to_fp16, x = var_1910_cast_fp16_0)[name = string("input_11_cast_fp16")]; tensor var_1926_cast_fp16 = silu(x = input_11_cast_fp16)[name = string("op_1926_cast_fp16")]; string var_1931_pad_type_0 = const()[name = string("op_1931_pad_type_0"), val = string("valid")]; tensor var_1931_strides_0 = const()[name = string("op_1931_strides_0"), val = tensor([1, 1])]; tensor var_1931_pad_0 = const()[name = string("op_1931_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_1931_dilations_0 = const()[name = string("op_1931_dilations_0"), val = tensor([1, 1])]; int32 var_1931_groups_0 = const()[name = string("op_1931_groups_0"), val = int32(1)]; tensor var_1914_to_fp16 = const()[name = string("op_1914_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(170982976)))]; tensor var_1931_cast_fp16 = conv(dilations = var_1931_dilations_0, groups = var_1931_groups_0, pad = var_1931_pad_0, pad_type = var_1931_pad_type_0, strides = var_1931_strides_0, weight = var_1914_to_fp16, x = var_1910_cast_fp16_0)[name = string("op_1931_cast_fp16")]; tensor x_105_cast_fp16 = mul(x = var_1926_cast_fp16, y = var_1931_cast_fp16)[name = string("x_105_cast_fp16")]; string hidden_states_35_pad_type_0 = const()[name = string("hidden_states_35_pad_type_0"), val = string("valid")]; tensor hidden_states_35_strides_0 = const()[name = string("hidden_states_35_strides_0"), val = tensor([1, 1])]; tensor hidden_states_35_pad_0 = const()[name = string("hidden_states_35_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_35_dilations_0 = const()[name = string("hidden_states_35_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_35_groups_0 = const()[name = string("hidden_states_35_groups_0"), val = int32(1)]; tensor var_1913_to_fp16 = const()[name = string("op_1913_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(179371648)))]; tensor hidden_states_35_cast_fp16 = conv(dilations = hidden_states_35_dilations_0, groups = hidden_states_35_groups_0, pad = hidden_states_35_pad_0, pad_type = hidden_states_35_pad_type_0, strides = hidden_states_35_strides_0, weight = var_1913_to_fp16, x = x_105_cast_fp16)[name = string("hidden_states_35_cast_fp16")]; tensor x_107_cast_fp16 = add(x = x_97_cast_fp16, y = hidden_states_35_cast_fp16)[name = string("x_107_cast_fp16")]; int32 var_1944 = const()[name = string("op_1944"), val = int32(1)]; fp16 const_64_promoted_to_fp16 = const()[name = string("const_64_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_1947_cast_fp16 = mul(x = x_107_cast_fp16, y = const_64_promoted_to_fp16)[name = string("op_1947_cast_fp16")]; bool x_109_interleave_0 = const()[name = string("x_109_interleave_0"), val = bool(false)]; tensor x_109_cast_fp16 = concat(axis = var_1944, interleave = x_109_interleave_0, values = (x_107_cast_fp16, var_1947_cast_fp16))[name = string("x_109_cast_fp16")]; tensor out_73_axes_0 = const()[name = string("out_73_axes_0"), val = tensor([1])]; fp16 var_1957_to_fp16 = const()[name = string("op_1957_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_73_cast_fp16 = layer_norm(axes = out_73_axes_0, epsilon = var_1957_to_fp16, x = x_109_cast_fp16)[name = string("out_73_cast_fp16")]; tensor layer_layers_6_input_layernorm_weight_to_fp16 = const()[name = string("layer_layers_6_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(187760320)))]; tensor out_75_cast_fp16 = mul(x = out_73_cast_fp16, y = layer_layers_6_input_layernorm_weight_to_fp16)[name = string("out_75_cast_fp16")]; tensor var_1963_split_sizes_0 = const()[name = string("op_1963_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_1963_axis_0 = const()[name = string("op_1963_axis_0"), val = int32(1)]; tensor var_1963_cast_fp16_0, tensor var_1963_cast_fp16_1 = split(axis = var_1963_axis_0, split_sizes = var_1963_split_sizes_0, x = out_75_cast_fp16)[name = string("op_1963_cast_fp16")]; string query_states_25_pad_type_0 = const()[name = string("query_states_25_pad_type_0"), val = string("valid")]; tensor query_states_25_strides_0 = const()[name = string("query_states_25_strides_0"), val = tensor([1, 1])]; tensor query_states_25_pad_0 = const()[name = string("query_states_25_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_states_25_dilations_0 = const()[name = string("query_states_25_dilations_0"), val = tensor([1, 1])]; int32 query_states_25_groups_0 = const()[name = string("query_states_25_groups_0"), val = int32(1)]; tensor var_1985_to_fp16 = const()[name = string("op_1985_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(187764480)))]; tensor query_states_25_cast_fp16 = conv(dilations = query_states_25_dilations_0, groups = query_states_25_groups_0, pad = query_states_25_pad_0, pad_type = query_states_25_pad_type_0, strides = query_states_25_strides_0, weight = var_1985_to_fp16, x = var_1963_cast_fp16_0)[name = string("query_states_25_cast_fp16")]; string key_states_25_pad_type_0 = const()[name = string("key_states_25_pad_type_0"), val = string("valid")]; tensor key_states_25_strides_0 = const()[name = string("key_states_25_strides_0"), val = tensor([1, 1])]; tensor key_states_25_pad_0 = const()[name = string("key_states_25_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_states_25_dilations_0 = const()[name = string("key_states_25_dilations_0"), val = tensor([1, 1])]; int32 key_states_25_groups_0 = const()[name = string("key_states_25_groups_0"), val = int32(1)]; tensor var_1996_to_fp16 = const()[name = string("op_1996_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(189861696)))]; tensor key_states_25_cast_fp16 = conv(dilations = key_states_25_dilations_0, groups = key_states_25_groups_0, pad = key_states_25_pad_0, pad_type = key_states_25_pad_type_0, strides = key_states_25_strides_0, weight = var_1996_to_fp16, x = var_1963_cast_fp16_0)[name = string("key_states_25_cast_fp16")]; string value_states_25_pad_type_0 = const()[name = string("value_states_25_pad_type_0"), val = string("valid")]; tensor value_states_25_strides_0 = const()[name = string("value_states_25_strides_0"), val = tensor([1, 1])]; tensor value_states_25_pad_0 = const()[name = string("value_states_25_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_states_25_dilations_0 = const()[name = string("value_states_25_dilations_0"), val = tensor([1, 1])]; int32 value_states_25_groups_0 = const()[name = string("value_states_25_groups_0"), val = int32(1)]; tensor var_2007_to_fp16 = const()[name = string("op_2007_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(190123904)))]; tensor value_states_25_cast_fp16 = conv(dilations = value_states_25_dilations_0, groups = value_states_25_groups_0, pad = value_states_25_pad_0, pad_type = value_states_25_pad_type_0, strides = value_states_25_strides_0, weight = var_2007_to_fp16, x = var_1963_cast_fp16_0)[name = string("value_states_25_cast_fp16")]; tensor var_2015 = const()[name = string("op_2015"), val = tensor([1, 16, 64, 1])]; tensor embed_25_cast_fp16 = reshape(shape = var_2015, x = query_states_25_cast_fp16)[name = string("embed_25_cast_fp16")]; tensor var_2019 = const()[name = string("op_2019"), val = tensor([1, 2, 64, 1])]; tensor var_2020_cast_fp16 = reshape(shape = var_2019, x = key_states_25_cast_fp16)[name = string("op_2020_cast_fp16")]; tensor embed_27_perm_0 = const()[name = string("embed_27_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_2026 = const()[name = string("op_2026"), val = tensor([1, 2, 64, 1])]; tensor var_2027_cast_fp16 = reshape(shape = var_2026, x = value_states_25_cast_fp16)[name = string("op_2027_cast_fp16")]; tensor value_states_27_perm_0 = const()[name = string("value_states_27_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_2031_cast_fp16 = mul(x = embed_25_cast_fp16, y = cos_1_cast_fp16)[name = string("op_2031_cast_fp16")]; tensor var_2032_split_sizes_0 = const()[name = string("op_2032_split_sizes_0"), val = tensor([32, 32])]; int32 var_2032_axis_0 = const()[name = string("op_2032_axis_0"), val = int32(-2)]; tensor var_2032_cast_fp16_0, tensor var_2032_cast_fp16_1 = split(axis = var_2032_axis_0, split_sizes = var_2032_split_sizes_0, x = embed_25_cast_fp16)[name = string("op_2032_cast_fp16")]; fp16 const_67_promoted_to_fp16 = const()[name = string("const_67_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_2034_cast_fp16 = mul(x = var_2032_cast_fp16_1, y = const_67_promoted_to_fp16)[name = string("op_2034_cast_fp16")]; int32 var_2036 = const()[name = string("op_2036"), val = int32(-2)]; bool var_2037_interleave_0 = const()[name = string("op_2037_interleave_0"), val = bool(false)]; tensor var_2037_cast_fp16 = concat(axis = var_2036, interleave = var_2037_interleave_0, values = (var_2034_cast_fp16, var_2032_cast_fp16_0))[name = string("op_2037_cast_fp16")]; tensor var_2038_cast_fp16 = mul(x = var_2037_cast_fp16, y = sin_1_cast_fp16)[name = string("op_2038_cast_fp16")]; tensor query_states_27_cast_fp16 = add(x = var_2031_cast_fp16, y = var_2038_cast_fp16)[name = string("query_states_27_cast_fp16")]; tensor embed_27_cast_fp16 = transpose(perm = embed_27_perm_0, x = var_2020_cast_fp16)[name = string("transpose_53")]; tensor var_2041_cast_fp16 = mul(x = embed_27_cast_fp16, y = cos_cast_fp16)[name = string("op_2041_cast_fp16")]; tensor var_2042_split_sizes_0 = const()[name = string("op_2042_split_sizes_0"), val = tensor([32, 32])]; int32 var_2042_axis_0 = const()[name = string("op_2042_axis_0"), val = int32(-1)]; tensor var_2042_cast_fp16_0, tensor var_2042_cast_fp16_1 = split(axis = var_2042_axis_0, split_sizes = var_2042_split_sizes_0, x = embed_27_cast_fp16)[name = string("op_2042_cast_fp16")]; fp16 const_68_promoted_to_fp16 = const()[name = string("const_68_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_2044_cast_fp16 = mul(x = var_2042_cast_fp16_1, y = const_68_promoted_to_fp16)[name = string("op_2044_cast_fp16")]; int32 var_2046 = const()[name = string("op_2046"), val = int32(-1)]; bool var_2047_interleave_0 = const()[name = string("op_2047_interleave_0"), val = bool(false)]; tensor var_2047_cast_fp16 = concat(axis = var_2046, interleave = var_2047_interleave_0, values = (var_2044_cast_fp16, var_2042_cast_fp16_0))[name = string("op_2047_cast_fp16")]; tensor var_2048_cast_fp16 = mul(x = var_2047_cast_fp16, y = sin_cast_fp16)[name = string("op_2048_cast_fp16")]; tensor key_states_27_cast_fp16 = add(x = var_2041_cast_fp16, y = var_2048_cast_fp16)[name = string("key_states_27_cast_fp16")]; tensor expand_dims_61 = const()[name = string("expand_dims_61"), val = tensor([6])]; tensor expand_dims_62 = const()[name = string("expand_dims_62"), val = tensor([0])]; tensor expand_dims_64 = const()[name = string("expand_dims_64"), val = tensor([7])]; tensor concat_51_values3_0 = const()[name = string("concat_51_values3_0"), val = tensor([0])]; int32 concat_51_axis_0 = const()[name = string("concat_51_axis_0"), val = int32(0)]; bool concat_51_interleave_0 = const()[name = string("concat_51_interleave_0"), val = bool(false)]; tensor concat_51 = concat(axis = concat_51_axis_0, interleave = concat_51_interleave_0, values = (expand_dims_61, expand_dims_62, position_id, concat_51_values3_0))[name = string("concat_51")]; tensor concat_52_values1_0 = const()[name = string("concat_52_values1_0"), val = tensor([0])]; tensor concat_52_values3_0 = const()[name = string("concat_52_values3_0"), val = tensor([0])]; int32 concat_52_axis_0 = const()[name = string("concat_52_axis_0"), val = int32(0)]; bool concat_52_interleave_0 = const()[name = string("concat_52_interleave_0"), val = bool(false)]; tensor concat_52 = concat(axis = concat_52_axis_0, interleave = concat_52_interleave_0, values = (expand_dims_64, concat_52_values1_0, var_426, concat_52_values3_0))[name = string("concat_52")]; tensor key_cache_internal_tensor_assign_7_stride_0 = const()[name = string("key_cache_internal_tensor_assign_7_stride_0"), val = tensor([1, 1, 1, 1])]; tensor key_cache_internal_tensor_assign_7_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_7_begin_mask_0"), val = tensor([false, false, false, true])]; tensor key_cache_internal_tensor_assign_7_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_7_end_mask_0"), val = tensor([false, true, false, true])]; tensor key_cache_internal_tensor_assign_7_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_7_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor key_cache_internal_tensor_assign_7_cast_fp16 = slice_update(begin = concat_51, begin_mask = key_cache_internal_tensor_assign_7_begin_mask_0, end = concat_52, end_mask = key_cache_internal_tensor_assign_7_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_7_squeeze_mask_0, stride = key_cache_internal_tensor_assign_7_stride_0, update = key_states_27_cast_fp16, x = coreml_update_state_58)[name = string("key_cache_internal_tensor_assign_7_cast_fp16")]; write_state(data = key_cache_internal_tensor_assign_7_cast_fp16, input = key_cache)[name = string("coreml_update_state_178_write_state")]; tensor coreml_update_state_60 = read_state(input = key_cache)[name = string("coreml_update_state_178")]; tensor value_cache_internal_tensor_assign_7_stride_0 = const()[name = string("value_cache_internal_tensor_assign_7_stride_0"), val = tensor([1, 1, 1, 1])]; tensor value_cache_internal_tensor_assign_7_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_7_begin_mask_0"), val = tensor([false, false, false, true])]; tensor value_cache_internal_tensor_assign_7_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_7_end_mask_0"), val = tensor([false, true, false, true])]; tensor value_cache_internal_tensor_assign_7_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_7_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor value_states_27_cast_fp16 = transpose(perm = value_states_27_perm_0, x = var_2027_cast_fp16)[name = string("transpose_52")]; tensor value_cache_internal_tensor_assign_7_cast_fp16 = slice_update(begin = concat_51, begin_mask = value_cache_internal_tensor_assign_7_begin_mask_0, end = concat_52, end_mask = value_cache_internal_tensor_assign_7_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_7_squeeze_mask_0, stride = value_cache_internal_tensor_assign_7_stride_0, update = value_states_27_cast_fp16, x = coreml_update_state_59)[name = string("value_cache_internal_tensor_assign_7_cast_fp16")]; write_state(data = value_cache_internal_tensor_assign_7_cast_fp16, input = value_cache)[name = string("coreml_update_state_179_write_state")]; tensor coreml_update_state_61 = read_state(input = value_cache)[name = string("coreml_update_state_179")]; tensor var_2091_begin_0 = const()[name = string("op_2091_begin_0"), val = tensor([6, 0, 0, 0])]; tensor var_2091_end_0 = const()[name = string("op_2091_end_0"), val = tensor([7, 2, 2048, 64])]; tensor var_2091_end_mask_0 = const()[name = string("op_2091_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_2091_cast_fp16 = slice_by_index(begin = var_2091_begin_0, end = var_2091_end_0, end_mask = var_2091_end_mask_0, x = coreml_update_state_60)[name = string("op_2091_cast_fp16")]; tensor tile_12 = const()[name = string("tile_12"), val = tensor([1, 1])]; int32 var_2094_axis_0 = const()[name = string("op_2094_axis_0"), val = int32(1)]; tensor var_2094_cast_fp16_0, tensor var_2094_cast_fp16_1 = split(axis = var_2094_axis_0, split_sizes = tile_12, x = var_2091_cast_fp16)[name = string("op_2094_cast_fp16")]; tensor var_2101_begin_0 = const()[name = string("op_2101_begin_0"), val = tensor([6, 0, 0, 0])]; tensor var_2101_end_0 = const()[name = string("op_2101_end_0"), val = tensor([7, 2, 2048, 64])]; tensor var_2101_end_mask_0 = const()[name = string("op_2101_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_2101_cast_fp16 = slice_by_index(begin = var_2101_begin_0, end = var_2101_end_0, end_mask = var_2101_end_mask_0, x = coreml_update_state_61)[name = string("op_2101_cast_fp16")]; tensor tile_13 = const()[name = string("tile_13"), val = tensor([1, 1])]; int32 var_2104_axis_0 = const()[name = string("op_2104_axis_0"), val = int32(1)]; tensor var_2104_cast_fp16_0, tensor var_2104_cast_fp16_1 = split(axis = var_2104_axis_0, split_sizes = tile_13, x = var_2101_cast_fp16)[name = string("op_2104_cast_fp16")]; tensor var_2107_split_sizes_0 = const()[name = string("op_2107_split_sizes_0"), val = tensor([8, 8])]; int32 var_2107_axis_0 = const()[name = string("op_2107_axis_0"), val = int32(1)]; tensor var_2107_cast_fp16_0, tensor var_2107_cast_fp16_1 = split(axis = var_2107_axis_0, split_sizes = var_2107_split_sizes_0, x = query_states_27_cast_fp16)[name = string("op_2107_cast_fp16")]; bool attn_weights_97_transpose_x_0 = const()[name = string("attn_weights_97_transpose_x_0"), val = bool(false)]; bool attn_weights_97_transpose_y_0 = const()[name = string("attn_weights_97_transpose_y_0"), val = bool(false)]; tensor attn_weights_97_cast_fp16 = matmul(transpose_x = attn_weights_97_transpose_x_0, transpose_y = attn_weights_97_transpose_y_0, x = var_2094_cast_fp16_0, y = var_2107_cast_fp16_0)[name = string("attn_weights_97_cast_fp16")]; fp16 _inversed_attn_weights_99_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_99_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_99_cast_fp16 = mul(x = attn_weights_97_cast_fp16, y = _inversed_attn_weights_99_y_0_to_fp16)[name = string("_inversed_attn_weights_99_cast_fp16")]; tensor attn_weights_101_cast_fp16 = add(x = _inversed_attn_weights_99_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_101_cast_fp16")]; int32 var_2114 = const()[name = string("op_2114"), val = int32(2)]; tensor attn_weights_103_cast_fp16 = softmax(axis = var_2114, x = attn_weights_101_cast_fp16)[name = string("attn_weights_103_cast_fp16")]; bool var_2120_transpose_x_1 = const()[name = string("op_2120_transpose_x_1"), val = bool(true)]; bool var_2120_transpose_y_1 = const()[name = string("op_2120_transpose_y_1"), val = bool(false)]; tensor var_2120_cast_fp16 = matmul(transpose_x = var_2120_transpose_x_1, transpose_y = var_2120_transpose_y_1, x = attn_weights_103_cast_fp16, y = var_2104_cast_fp16_0)[name = string("op_2120_cast_fp16")]; bool attn_weights_105_transpose_x_0 = const()[name = string("attn_weights_105_transpose_x_0"), val = bool(false)]; bool attn_weights_105_transpose_y_0 = const()[name = string("attn_weights_105_transpose_y_0"), val = bool(false)]; tensor attn_weights_105_cast_fp16 = matmul(transpose_x = attn_weights_105_transpose_x_0, transpose_y = attn_weights_105_transpose_y_0, x = var_2094_cast_fp16_1, y = var_2107_cast_fp16_1)[name = string("attn_weights_105_cast_fp16")]; fp16 _inversed_attn_weights_107_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_107_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_107_cast_fp16 = mul(x = attn_weights_105_cast_fp16, y = _inversed_attn_weights_107_y_0_to_fp16)[name = string("_inversed_attn_weights_107_cast_fp16")]; tensor attn_weights_109_cast_fp16 = add(x = _inversed_attn_weights_107_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_109_cast_fp16")]; int32 var_2126 = const()[name = string("op_2126"), val = int32(2)]; tensor attn_weights_111_cast_fp16 = softmax(axis = var_2126, x = attn_weights_109_cast_fp16)[name = string("attn_weights_111_cast_fp16")]; bool attn_output_37_transpose_x_1 = const()[name = string("attn_output_37_transpose_x_1"), val = bool(true)]; bool attn_output_37_transpose_y_1 = const()[name = string("attn_output_37_transpose_y_1"), val = bool(false)]; tensor attn_output_37_cast_fp16 = matmul(transpose_x = attn_output_37_transpose_x_1, transpose_y = attn_output_37_transpose_y_1, x = attn_weights_111_cast_fp16, y = var_2104_cast_fp16_1)[name = string("attn_output_37_cast_fp16")]; int32 var_2134 = const()[name = string("op_2134"), val = int32(1)]; bool attn_output_39_interleave_0 = const()[name = string("attn_output_39_interleave_0"), val = bool(false)]; tensor attn_output_39_cast_fp16 = concat(axis = var_2134, interleave = attn_output_39_interleave_0, values = (var_2120_cast_fp16, attn_output_37_cast_fp16))[name = string("attn_output_39_cast_fp16")]; tensor var_2138_perm_0 = const()[name = string("op_2138_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_2143 = const()[name = string("op_2143"), val = tensor([1, 1024, 1, 1])]; tensor var_2138_cast_fp16 = transpose(perm = var_2138_perm_0, x = attn_output_39_cast_fp16)[name = string("transpose_51")]; tensor x_113_cast_fp16 = reshape(shape = var_2143, x = var_2138_cast_fp16)[name = string("x_113_cast_fp16")]; string hidden_states_39_pad_type_0 = const()[name = string("hidden_states_39_pad_type_0"), val = string("valid")]; tensor hidden_states_39_strides_0 = const()[name = string("hidden_states_39_strides_0"), val = tensor([1, 1])]; tensor hidden_states_39_pad_0 = const()[name = string("hidden_states_39_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_39_dilations_0 = const()[name = string("hidden_states_39_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_39_groups_0 = const()[name = string("hidden_states_39_groups_0"), val = int32(1)]; tensor var_2150_to_fp16 = const()[name = string("op_2150_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(190386112)))]; tensor hidden_states_39_cast_fp16 = conv(dilations = hidden_states_39_dilations_0, groups = hidden_states_39_groups_0, pad = hidden_states_39_pad_0, pad_type = hidden_states_39_pad_type_0, strides = hidden_states_39_strides_0, weight = var_2150_to_fp16, x = x_113_cast_fp16)[name = string("hidden_states_39_cast_fp16")]; tensor x_115_cast_fp16 = add(x = x_107_cast_fp16, y = hidden_states_39_cast_fp16)[name = string("x_115_cast_fp16")]; int32 var_2162 = const()[name = string("op_2162"), val = int32(1)]; fp16 const_73_promoted_to_fp16 = const()[name = string("const_73_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_2165_cast_fp16 = mul(x = x_115_cast_fp16, y = const_73_promoted_to_fp16)[name = string("op_2165_cast_fp16")]; bool x_117_interleave_0 = const()[name = string("x_117_interleave_0"), val = bool(false)]; tensor x_117_cast_fp16 = concat(axis = var_2162, interleave = x_117_interleave_0, values = (x_115_cast_fp16, var_2165_cast_fp16))[name = string("x_117_cast_fp16")]; tensor out_79_axes_0 = const()[name = string("out_79_axes_0"), val = tensor([1])]; fp16 var_2175_to_fp16 = const()[name = string("op_2175_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_79_cast_fp16 = layer_norm(axes = out_79_axes_0, epsilon = var_2175_to_fp16, x = x_117_cast_fp16)[name = string("out_79_cast_fp16")]; tensor layer_layers_6_post_attention_layernorm_weight_to_fp16 = const()[name = string("layer_layers_6_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(192483328)))]; tensor out_81_cast_fp16 = mul(x = out_79_cast_fp16, y = layer_layers_6_post_attention_layernorm_weight_to_fp16)[name = string("out_81_cast_fp16")]; tensor var_2181_split_sizes_0 = const()[name = string("op_2181_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_2181_axis_0 = const()[name = string("op_2181_axis_0"), val = int32(1)]; tensor var_2181_cast_fp16_0, tensor var_2181_cast_fp16_1 = split(axis = var_2181_axis_0, split_sizes = var_2181_split_sizes_0, x = out_81_cast_fp16)[name = string("op_2181_cast_fp16")]; string input_13_pad_type_0 = const()[name = string("input_13_pad_type_0"), val = string("valid")]; tensor input_13_strides_0 = const()[name = string("input_13_strides_0"), val = tensor([1, 1])]; tensor input_13_pad_0 = const()[name = string("input_13_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_13_dilations_0 = const()[name = string("input_13_dilations_0"), val = tensor([1, 1])]; int32 input_13_groups_0 = const()[name = string("input_13_groups_0"), val = int32(1)]; tensor var_2186_to_fp16 = const()[name = string("op_2186_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(192487488)))]; tensor input_13_cast_fp16 = conv(dilations = input_13_dilations_0, groups = input_13_groups_0, pad = input_13_pad_0, pad_type = input_13_pad_type_0, strides = input_13_strides_0, weight = var_2186_to_fp16, x = var_2181_cast_fp16_0)[name = string("input_13_cast_fp16")]; tensor var_2197_cast_fp16 = silu(x = input_13_cast_fp16)[name = string("op_2197_cast_fp16")]; string var_2202_pad_type_0 = const()[name = string("op_2202_pad_type_0"), val = string("valid")]; tensor var_2202_strides_0 = const()[name = string("op_2202_strides_0"), val = tensor([1, 1])]; tensor var_2202_pad_0 = const()[name = string("op_2202_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_2202_dilations_0 = const()[name = string("op_2202_dilations_0"), val = tensor([1, 1])]; int32 var_2202_groups_0 = const()[name = string("op_2202_groups_0"), val = int32(1)]; tensor var_2185_to_fp16 = const()[name = string("op_2185_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(200876160)))]; tensor var_2202_cast_fp16 = conv(dilations = var_2202_dilations_0, groups = var_2202_groups_0, pad = var_2202_pad_0, pad_type = var_2202_pad_type_0, strides = var_2202_strides_0, weight = var_2185_to_fp16, x = var_2181_cast_fp16_0)[name = string("op_2202_cast_fp16")]; tensor x_123_cast_fp16 = mul(x = var_2197_cast_fp16, y = var_2202_cast_fp16)[name = string("x_123_cast_fp16")]; string hidden_states_41_pad_type_0 = const()[name = string("hidden_states_41_pad_type_0"), val = string("valid")]; tensor hidden_states_41_strides_0 = const()[name = string("hidden_states_41_strides_0"), val = tensor([1, 1])]; tensor hidden_states_41_pad_0 = const()[name = string("hidden_states_41_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_41_dilations_0 = const()[name = string("hidden_states_41_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_41_groups_0 = const()[name = string("hidden_states_41_groups_0"), val = int32(1)]; tensor var_2184_to_fp16 = const()[name = string("op_2184_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(209264832)))]; tensor hidden_states_41_cast_fp16 = conv(dilations = hidden_states_41_dilations_0, groups = hidden_states_41_groups_0, pad = hidden_states_41_pad_0, pad_type = hidden_states_41_pad_type_0, strides = hidden_states_41_strides_0, weight = var_2184_to_fp16, x = x_123_cast_fp16)[name = string("hidden_states_41_cast_fp16")]; tensor x_125_cast_fp16 = add(x = x_115_cast_fp16, y = hidden_states_41_cast_fp16)[name = string("x_125_cast_fp16")]; int32 var_2215 = const()[name = string("op_2215"), val = int32(1)]; fp16 const_74_promoted_to_fp16 = const()[name = string("const_74_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_2218_cast_fp16 = mul(x = x_125_cast_fp16, y = const_74_promoted_to_fp16)[name = string("op_2218_cast_fp16")]; bool x_127_interleave_0 = const()[name = string("x_127_interleave_0"), val = bool(false)]; tensor x_127_cast_fp16 = concat(axis = var_2215, interleave = x_127_interleave_0, values = (x_125_cast_fp16, var_2218_cast_fp16))[name = string("x_127_cast_fp16")]; tensor out_85_axes_0 = const()[name = string("out_85_axes_0"), val = tensor([1])]; fp16 var_2228_to_fp16 = const()[name = string("op_2228_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_85_cast_fp16 = layer_norm(axes = out_85_axes_0, epsilon = var_2228_to_fp16, x = x_127_cast_fp16)[name = string("out_85_cast_fp16")]; tensor layer_layers_7_input_layernorm_weight_to_fp16 = const()[name = string("layer_layers_7_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(217653504)))]; tensor out_87_cast_fp16 = mul(x = out_85_cast_fp16, y = layer_layers_7_input_layernorm_weight_to_fp16)[name = string("out_87_cast_fp16")]; tensor var_2234_split_sizes_0 = const()[name = string("op_2234_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_2234_axis_0 = const()[name = string("op_2234_axis_0"), val = int32(1)]; tensor var_2234_cast_fp16_0, tensor var_2234_cast_fp16_1 = split(axis = var_2234_axis_0, split_sizes = var_2234_split_sizes_0, x = out_87_cast_fp16)[name = string("op_2234_cast_fp16")]; string query_states_29_pad_type_0 = const()[name = string("query_states_29_pad_type_0"), val = string("valid")]; tensor query_states_29_strides_0 = const()[name = string("query_states_29_strides_0"), val = tensor([1, 1])]; tensor query_states_29_pad_0 = const()[name = string("query_states_29_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_states_29_dilations_0 = const()[name = string("query_states_29_dilations_0"), val = tensor([1, 1])]; int32 query_states_29_groups_0 = const()[name = string("query_states_29_groups_0"), val = int32(1)]; tensor var_2256_to_fp16 = const()[name = string("op_2256_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(217657664)))]; tensor query_states_29_cast_fp16 = conv(dilations = query_states_29_dilations_0, groups = query_states_29_groups_0, pad = query_states_29_pad_0, pad_type = query_states_29_pad_type_0, strides = query_states_29_strides_0, weight = var_2256_to_fp16, x = var_2234_cast_fp16_0)[name = string("query_states_29_cast_fp16")]; string key_states_29_pad_type_0 = const()[name = string("key_states_29_pad_type_0"), val = string("valid")]; tensor key_states_29_strides_0 = const()[name = string("key_states_29_strides_0"), val = tensor([1, 1])]; tensor key_states_29_pad_0 = const()[name = string("key_states_29_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_states_29_dilations_0 = const()[name = string("key_states_29_dilations_0"), val = tensor([1, 1])]; int32 key_states_29_groups_0 = const()[name = string("key_states_29_groups_0"), val = int32(1)]; tensor var_2267_to_fp16 = const()[name = string("op_2267_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(219754880)))]; tensor key_states_29_cast_fp16 = conv(dilations = key_states_29_dilations_0, groups = key_states_29_groups_0, pad = key_states_29_pad_0, pad_type = key_states_29_pad_type_0, strides = key_states_29_strides_0, weight = var_2267_to_fp16, x = var_2234_cast_fp16_0)[name = string("key_states_29_cast_fp16")]; string value_states_29_pad_type_0 = const()[name = string("value_states_29_pad_type_0"), val = string("valid")]; tensor value_states_29_strides_0 = const()[name = string("value_states_29_strides_0"), val = tensor([1, 1])]; tensor value_states_29_pad_0 = const()[name = string("value_states_29_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_states_29_dilations_0 = const()[name = string("value_states_29_dilations_0"), val = tensor([1, 1])]; int32 value_states_29_groups_0 = const()[name = string("value_states_29_groups_0"), val = int32(1)]; tensor var_2278_to_fp16 = const()[name = string("op_2278_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(220017088)))]; tensor value_states_29_cast_fp16 = conv(dilations = value_states_29_dilations_0, groups = value_states_29_groups_0, pad = value_states_29_pad_0, pad_type = value_states_29_pad_type_0, strides = value_states_29_strides_0, weight = var_2278_to_fp16, x = var_2234_cast_fp16_0)[name = string("value_states_29_cast_fp16")]; tensor var_2286 = const()[name = string("op_2286"), val = tensor([1, 16, 64, 1])]; tensor embed_29_cast_fp16 = reshape(shape = var_2286, x = query_states_29_cast_fp16)[name = string("embed_29_cast_fp16")]; tensor var_2290 = const()[name = string("op_2290"), val = tensor([1, 2, 64, 1])]; tensor var_2291_cast_fp16 = reshape(shape = var_2290, x = key_states_29_cast_fp16)[name = string("op_2291_cast_fp16")]; tensor embed_31_perm_0 = const()[name = string("embed_31_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_2297 = const()[name = string("op_2297"), val = tensor([1, 2, 64, 1])]; tensor var_2298_cast_fp16 = reshape(shape = var_2297, x = value_states_29_cast_fp16)[name = string("op_2298_cast_fp16")]; tensor value_states_31_perm_0 = const()[name = string("value_states_31_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_2302_cast_fp16 = mul(x = embed_29_cast_fp16, y = cos_1_cast_fp16)[name = string("op_2302_cast_fp16")]; tensor var_2303_split_sizes_0 = const()[name = string("op_2303_split_sizes_0"), val = tensor([32, 32])]; int32 var_2303_axis_0 = const()[name = string("op_2303_axis_0"), val = int32(-2)]; tensor var_2303_cast_fp16_0, tensor var_2303_cast_fp16_1 = split(axis = var_2303_axis_0, split_sizes = var_2303_split_sizes_0, x = embed_29_cast_fp16)[name = string("op_2303_cast_fp16")]; fp16 const_77_promoted_to_fp16 = const()[name = string("const_77_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_2305_cast_fp16 = mul(x = var_2303_cast_fp16_1, y = const_77_promoted_to_fp16)[name = string("op_2305_cast_fp16")]; int32 var_2307 = const()[name = string("op_2307"), val = int32(-2)]; bool var_2308_interleave_0 = const()[name = string("op_2308_interleave_0"), val = bool(false)]; tensor var_2308_cast_fp16 = concat(axis = var_2307, interleave = var_2308_interleave_0, values = (var_2305_cast_fp16, var_2303_cast_fp16_0))[name = string("op_2308_cast_fp16")]; tensor var_2309_cast_fp16 = mul(x = var_2308_cast_fp16, y = sin_1_cast_fp16)[name = string("op_2309_cast_fp16")]; tensor query_states_31_cast_fp16 = add(x = var_2302_cast_fp16, y = var_2309_cast_fp16)[name = string("query_states_31_cast_fp16")]; tensor embed_31_cast_fp16 = transpose(perm = embed_31_perm_0, x = var_2291_cast_fp16)[name = string("transpose_50")]; tensor var_2312_cast_fp16 = mul(x = embed_31_cast_fp16, y = cos_cast_fp16)[name = string("op_2312_cast_fp16")]; tensor var_2313_split_sizes_0 = const()[name = string("op_2313_split_sizes_0"), val = tensor([32, 32])]; int32 var_2313_axis_0 = const()[name = string("op_2313_axis_0"), val = int32(-1)]; tensor var_2313_cast_fp16_0, tensor var_2313_cast_fp16_1 = split(axis = var_2313_axis_0, split_sizes = var_2313_split_sizes_0, x = embed_31_cast_fp16)[name = string("op_2313_cast_fp16")]; fp16 const_78_promoted_to_fp16 = const()[name = string("const_78_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_2315_cast_fp16 = mul(x = var_2313_cast_fp16_1, y = const_78_promoted_to_fp16)[name = string("op_2315_cast_fp16")]; int32 var_2317 = const()[name = string("op_2317"), val = int32(-1)]; bool var_2318_interleave_0 = const()[name = string("op_2318_interleave_0"), val = bool(false)]; tensor var_2318_cast_fp16 = concat(axis = var_2317, interleave = var_2318_interleave_0, values = (var_2315_cast_fp16, var_2313_cast_fp16_0))[name = string("op_2318_cast_fp16")]; tensor var_2319_cast_fp16 = mul(x = var_2318_cast_fp16, y = sin_cast_fp16)[name = string("op_2319_cast_fp16")]; tensor key_states_31_cast_fp16 = add(x = var_2312_cast_fp16, y = var_2319_cast_fp16)[name = string("key_states_31_cast_fp16")]; tensor expand_dims_71 = const()[name = string("expand_dims_71"), val = tensor([7])]; tensor expand_dims_72 = const()[name = string("expand_dims_72"), val = tensor([0])]; tensor expand_dims_74 = const()[name = string("expand_dims_74"), val = tensor([8])]; tensor concat_59_values3_0 = const()[name = string("concat_59_values3_0"), val = tensor([0])]; int32 concat_59_axis_0 = const()[name = string("concat_59_axis_0"), val = int32(0)]; bool concat_59_interleave_0 = const()[name = string("concat_59_interleave_0"), val = bool(false)]; tensor concat_59 = concat(axis = concat_59_axis_0, interleave = concat_59_interleave_0, values = (expand_dims_71, expand_dims_72, position_id, concat_59_values3_0))[name = string("concat_59")]; tensor concat_60_values1_0 = const()[name = string("concat_60_values1_0"), val = tensor([0])]; tensor concat_60_values3_0 = const()[name = string("concat_60_values3_0"), val = tensor([0])]; int32 concat_60_axis_0 = const()[name = string("concat_60_axis_0"), val = int32(0)]; bool concat_60_interleave_0 = const()[name = string("concat_60_interleave_0"), val = bool(false)]; tensor concat_60 = concat(axis = concat_60_axis_0, interleave = concat_60_interleave_0, values = (expand_dims_74, concat_60_values1_0, var_426, concat_60_values3_0))[name = string("concat_60")]; tensor key_cache_internal_tensor_assign_8_stride_0 = const()[name = string("key_cache_internal_tensor_assign_8_stride_0"), val = tensor([1, 1, 1, 1])]; tensor key_cache_internal_tensor_assign_8_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_8_begin_mask_0"), val = tensor([false, false, false, true])]; tensor key_cache_internal_tensor_assign_8_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_8_end_mask_0"), val = tensor([false, true, false, true])]; tensor key_cache_internal_tensor_assign_8_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_8_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor key_cache_internal_tensor_assign_8_cast_fp16 = slice_update(begin = concat_59, begin_mask = key_cache_internal_tensor_assign_8_begin_mask_0, end = concat_60, end_mask = key_cache_internal_tensor_assign_8_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_8_squeeze_mask_0, stride = key_cache_internal_tensor_assign_8_stride_0, update = key_states_31_cast_fp16, x = coreml_update_state_60)[name = string("key_cache_internal_tensor_assign_8_cast_fp16")]; write_state(data = key_cache_internal_tensor_assign_8_cast_fp16, input = key_cache)[name = string("coreml_update_state_180_write_state")]; tensor coreml_update_state_62 = read_state(input = key_cache)[name = string("coreml_update_state_180")]; tensor value_cache_internal_tensor_assign_8_stride_0 = const()[name = string("value_cache_internal_tensor_assign_8_stride_0"), val = tensor([1, 1, 1, 1])]; tensor value_cache_internal_tensor_assign_8_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_8_begin_mask_0"), val = tensor([false, false, false, true])]; tensor value_cache_internal_tensor_assign_8_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_8_end_mask_0"), val = tensor([false, true, false, true])]; tensor value_cache_internal_tensor_assign_8_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_8_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor value_states_31_cast_fp16 = transpose(perm = value_states_31_perm_0, x = var_2298_cast_fp16)[name = string("transpose_49")]; tensor value_cache_internal_tensor_assign_8_cast_fp16 = slice_update(begin = concat_59, begin_mask = value_cache_internal_tensor_assign_8_begin_mask_0, end = concat_60, end_mask = value_cache_internal_tensor_assign_8_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_8_squeeze_mask_0, stride = value_cache_internal_tensor_assign_8_stride_0, update = value_states_31_cast_fp16, x = coreml_update_state_61)[name = string("value_cache_internal_tensor_assign_8_cast_fp16")]; write_state(data = value_cache_internal_tensor_assign_8_cast_fp16, input = value_cache)[name = string("coreml_update_state_181_write_state")]; tensor coreml_update_state_63 = read_state(input = value_cache)[name = string("coreml_update_state_181")]; tensor var_2362_begin_0 = const()[name = string("op_2362_begin_0"), val = tensor([7, 0, 0, 0])]; tensor var_2362_end_0 = const()[name = string("op_2362_end_0"), val = tensor([8, 2, 2048, 64])]; tensor var_2362_end_mask_0 = const()[name = string("op_2362_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_2362_cast_fp16 = slice_by_index(begin = var_2362_begin_0, end = var_2362_end_0, end_mask = var_2362_end_mask_0, x = coreml_update_state_62)[name = string("op_2362_cast_fp16")]; tensor tile_14 = const()[name = string("tile_14"), val = tensor([1, 1])]; int32 var_2365_axis_0 = const()[name = string("op_2365_axis_0"), val = int32(1)]; tensor var_2365_cast_fp16_0, tensor var_2365_cast_fp16_1 = split(axis = var_2365_axis_0, split_sizes = tile_14, x = var_2362_cast_fp16)[name = string("op_2365_cast_fp16")]; tensor var_2372_begin_0 = const()[name = string("op_2372_begin_0"), val = tensor([7, 0, 0, 0])]; tensor var_2372_end_0 = const()[name = string("op_2372_end_0"), val = tensor([8, 2, 2048, 64])]; tensor var_2372_end_mask_0 = const()[name = string("op_2372_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_2372_cast_fp16 = slice_by_index(begin = var_2372_begin_0, end = var_2372_end_0, end_mask = var_2372_end_mask_0, x = coreml_update_state_63)[name = string("op_2372_cast_fp16")]; tensor tile_15 = const()[name = string("tile_15"), val = tensor([1, 1])]; int32 var_2375_axis_0 = const()[name = string("op_2375_axis_0"), val = int32(1)]; tensor var_2375_cast_fp16_0, tensor var_2375_cast_fp16_1 = split(axis = var_2375_axis_0, split_sizes = tile_15, x = var_2372_cast_fp16)[name = string("op_2375_cast_fp16")]; tensor var_2378_split_sizes_0 = const()[name = string("op_2378_split_sizes_0"), val = tensor([8, 8])]; int32 var_2378_axis_0 = const()[name = string("op_2378_axis_0"), val = int32(1)]; tensor var_2378_cast_fp16_0, tensor var_2378_cast_fp16_1 = split(axis = var_2378_axis_0, split_sizes = var_2378_split_sizes_0, x = query_states_31_cast_fp16)[name = string("op_2378_cast_fp16")]; bool attn_weights_113_transpose_x_0 = const()[name = string("attn_weights_113_transpose_x_0"), val = bool(false)]; bool attn_weights_113_transpose_y_0 = const()[name = string("attn_weights_113_transpose_y_0"), val = bool(false)]; tensor attn_weights_113_cast_fp16 = matmul(transpose_x = attn_weights_113_transpose_x_0, transpose_y = attn_weights_113_transpose_y_0, x = var_2365_cast_fp16_0, y = var_2378_cast_fp16_0)[name = string("attn_weights_113_cast_fp16")]; fp16 _inversed_attn_weights_115_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_115_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_115_cast_fp16 = mul(x = attn_weights_113_cast_fp16, y = _inversed_attn_weights_115_y_0_to_fp16)[name = string("_inversed_attn_weights_115_cast_fp16")]; tensor attn_weights_117_cast_fp16 = add(x = _inversed_attn_weights_115_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_117_cast_fp16")]; int32 var_2385 = const()[name = string("op_2385"), val = int32(2)]; tensor attn_weights_119_cast_fp16 = softmax(axis = var_2385, x = attn_weights_117_cast_fp16)[name = string("attn_weights_119_cast_fp16")]; bool var_2391_transpose_x_1 = const()[name = string("op_2391_transpose_x_1"), val = bool(true)]; bool var_2391_transpose_y_1 = const()[name = string("op_2391_transpose_y_1"), val = bool(false)]; tensor var_2391_cast_fp16 = matmul(transpose_x = var_2391_transpose_x_1, transpose_y = var_2391_transpose_y_1, x = attn_weights_119_cast_fp16, y = var_2375_cast_fp16_0)[name = string("op_2391_cast_fp16")]; bool attn_weights_121_transpose_x_0 = const()[name = string("attn_weights_121_transpose_x_0"), val = bool(false)]; bool attn_weights_121_transpose_y_0 = const()[name = string("attn_weights_121_transpose_y_0"), val = bool(false)]; tensor attn_weights_121_cast_fp16 = matmul(transpose_x = attn_weights_121_transpose_x_0, transpose_y = attn_weights_121_transpose_y_0, x = var_2365_cast_fp16_1, y = var_2378_cast_fp16_1)[name = string("attn_weights_121_cast_fp16")]; fp16 _inversed_attn_weights_123_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_123_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_123_cast_fp16 = mul(x = attn_weights_121_cast_fp16, y = _inversed_attn_weights_123_y_0_to_fp16)[name = string("_inversed_attn_weights_123_cast_fp16")]; tensor attn_weights_125_cast_fp16 = add(x = _inversed_attn_weights_123_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_125_cast_fp16")]; int32 var_2397 = const()[name = string("op_2397"), val = int32(2)]; tensor attn_weights_127_cast_fp16 = softmax(axis = var_2397, x = attn_weights_125_cast_fp16)[name = string("attn_weights_127_cast_fp16")]; bool attn_output_43_transpose_x_1 = const()[name = string("attn_output_43_transpose_x_1"), val = bool(true)]; bool attn_output_43_transpose_y_1 = const()[name = string("attn_output_43_transpose_y_1"), val = bool(false)]; tensor attn_output_43_cast_fp16 = matmul(transpose_x = attn_output_43_transpose_x_1, transpose_y = attn_output_43_transpose_y_1, x = attn_weights_127_cast_fp16, y = var_2375_cast_fp16_1)[name = string("attn_output_43_cast_fp16")]; int32 var_2405 = const()[name = string("op_2405"), val = int32(1)]; bool attn_output_45_interleave_0 = const()[name = string("attn_output_45_interleave_0"), val = bool(false)]; tensor attn_output_45_cast_fp16 = concat(axis = var_2405, interleave = attn_output_45_interleave_0, values = (var_2391_cast_fp16, attn_output_43_cast_fp16))[name = string("attn_output_45_cast_fp16")]; tensor var_2409_perm_0 = const()[name = string("op_2409_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_2414 = const()[name = string("op_2414"), val = tensor([1, 1024, 1, 1])]; tensor var_2409_cast_fp16 = transpose(perm = var_2409_perm_0, x = attn_output_45_cast_fp16)[name = string("transpose_48")]; tensor x_131_cast_fp16 = reshape(shape = var_2414, x = var_2409_cast_fp16)[name = string("x_131_cast_fp16")]; string hidden_states_45_pad_type_0 = const()[name = string("hidden_states_45_pad_type_0"), val = string("valid")]; tensor hidden_states_45_strides_0 = const()[name = string("hidden_states_45_strides_0"), val = tensor([1, 1])]; tensor hidden_states_45_pad_0 = const()[name = string("hidden_states_45_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_45_dilations_0 = const()[name = string("hidden_states_45_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_45_groups_0 = const()[name = string("hidden_states_45_groups_0"), val = int32(1)]; tensor var_2421_to_fp16 = const()[name = string("op_2421_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(220279296)))]; tensor hidden_states_45_cast_fp16 = conv(dilations = hidden_states_45_dilations_0, groups = hidden_states_45_groups_0, pad = hidden_states_45_pad_0, pad_type = hidden_states_45_pad_type_0, strides = hidden_states_45_strides_0, weight = var_2421_to_fp16, x = x_131_cast_fp16)[name = string("hidden_states_45_cast_fp16")]; tensor x_133_cast_fp16 = add(x = x_125_cast_fp16, y = hidden_states_45_cast_fp16)[name = string("x_133_cast_fp16")]; int32 var_2433 = const()[name = string("op_2433"), val = int32(1)]; fp16 const_83_promoted_to_fp16 = const()[name = string("const_83_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_2436_cast_fp16 = mul(x = x_133_cast_fp16, y = const_83_promoted_to_fp16)[name = string("op_2436_cast_fp16")]; bool x_135_interleave_0 = const()[name = string("x_135_interleave_0"), val = bool(false)]; tensor x_135_cast_fp16 = concat(axis = var_2433, interleave = x_135_interleave_0, values = (x_133_cast_fp16, var_2436_cast_fp16))[name = string("x_135_cast_fp16")]; tensor out_91_axes_0 = const()[name = string("out_91_axes_0"), val = tensor([1])]; fp16 var_2446_to_fp16 = const()[name = string("op_2446_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_91_cast_fp16 = layer_norm(axes = out_91_axes_0, epsilon = var_2446_to_fp16, x = x_135_cast_fp16)[name = string("out_91_cast_fp16")]; tensor layer_layers_7_post_attention_layernorm_weight_to_fp16 = const()[name = string("layer_layers_7_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(222376512)))]; tensor out_93_cast_fp16 = mul(x = out_91_cast_fp16, y = layer_layers_7_post_attention_layernorm_weight_to_fp16)[name = string("out_93_cast_fp16")]; tensor var_2452_split_sizes_0 = const()[name = string("op_2452_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_2452_axis_0 = const()[name = string("op_2452_axis_0"), val = int32(1)]; tensor var_2452_cast_fp16_0, tensor var_2452_cast_fp16_1 = split(axis = var_2452_axis_0, split_sizes = var_2452_split_sizes_0, x = out_93_cast_fp16)[name = string("op_2452_cast_fp16")]; string input_15_pad_type_0 = const()[name = string("input_15_pad_type_0"), val = string("valid")]; tensor input_15_strides_0 = const()[name = string("input_15_strides_0"), val = tensor([1, 1])]; tensor input_15_pad_0 = const()[name = string("input_15_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_15_dilations_0 = const()[name = string("input_15_dilations_0"), val = tensor([1, 1])]; int32 input_15_groups_0 = const()[name = string("input_15_groups_0"), val = int32(1)]; tensor var_2457_to_fp16 = const()[name = string("op_2457_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(222380672)))]; tensor input_15_cast_fp16 = conv(dilations = input_15_dilations_0, groups = input_15_groups_0, pad = input_15_pad_0, pad_type = input_15_pad_type_0, strides = input_15_strides_0, weight = var_2457_to_fp16, x = var_2452_cast_fp16_0)[name = string("input_15_cast_fp16")]; tensor var_2468_cast_fp16 = silu(x = input_15_cast_fp16)[name = string("op_2468_cast_fp16")]; string var_2473_pad_type_0 = const()[name = string("op_2473_pad_type_0"), val = string("valid")]; tensor var_2473_strides_0 = const()[name = string("op_2473_strides_0"), val = tensor([1, 1])]; tensor var_2473_pad_0 = const()[name = string("op_2473_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_2473_dilations_0 = const()[name = string("op_2473_dilations_0"), val = tensor([1, 1])]; int32 var_2473_groups_0 = const()[name = string("op_2473_groups_0"), val = int32(1)]; tensor var_2456_to_fp16 = const()[name = string("op_2456_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(230769344)))]; tensor var_2473_cast_fp16 = conv(dilations = var_2473_dilations_0, groups = var_2473_groups_0, pad = var_2473_pad_0, pad_type = var_2473_pad_type_0, strides = var_2473_strides_0, weight = var_2456_to_fp16, x = var_2452_cast_fp16_0)[name = string("op_2473_cast_fp16")]; tensor x_141_cast_fp16 = mul(x = var_2468_cast_fp16, y = var_2473_cast_fp16)[name = string("x_141_cast_fp16")]; string hidden_states_47_pad_type_0 = const()[name = string("hidden_states_47_pad_type_0"), val = string("valid")]; tensor hidden_states_47_strides_0 = const()[name = string("hidden_states_47_strides_0"), val = tensor([1, 1])]; tensor hidden_states_47_pad_0 = const()[name = string("hidden_states_47_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_47_dilations_0 = const()[name = string("hidden_states_47_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_47_groups_0 = const()[name = string("hidden_states_47_groups_0"), val = int32(1)]; tensor var_2455_to_fp16 = const()[name = string("op_2455_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(239158016)))]; tensor hidden_states_47_cast_fp16 = conv(dilations = hidden_states_47_dilations_0, groups = hidden_states_47_groups_0, pad = hidden_states_47_pad_0, pad_type = hidden_states_47_pad_type_0, strides = hidden_states_47_strides_0, weight = var_2455_to_fp16, x = x_141_cast_fp16)[name = string("hidden_states_47_cast_fp16")]; tensor x_143_cast_fp16 = add(x = x_133_cast_fp16, y = hidden_states_47_cast_fp16)[name = string("x_143_cast_fp16")]; int32 var_2486 = const()[name = string("op_2486"), val = int32(1)]; fp16 const_84_promoted_to_fp16 = const()[name = string("const_84_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_2489_cast_fp16 = mul(x = x_143_cast_fp16, y = const_84_promoted_to_fp16)[name = string("op_2489_cast_fp16")]; bool x_145_interleave_0 = const()[name = string("x_145_interleave_0"), val = bool(false)]; tensor x_145_cast_fp16 = concat(axis = var_2486, interleave = x_145_interleave_0, values = (x_143_cast_fp16, var_2489_cast_fp16))[name = string("x_145_cast_fp16")]; tensor out_97_axes_0 = const()[name = string("out_97_axes_0"), val = tensor([1])]; fp16 var_2499_to_fp16 = const()[name = string("op_2499_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_97_cast_fp16 = layer_norm(axes = out_97_axes_0, epsilon = var_2499_to_fp16, x = x_145_cast_fp16)[name = string("out_97_cast_fp16")]; tensor layer_layers_8_input_layernorm_weight_to_fp16 = const()[name = string("layer_layers_8_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(247546688)))]; tensor out_99_cast_fp16 = mul(x = out_97_cast_fp16, y = layer_layers_8_input_layernorm_weight_to_fp16)[name = string("out_99_cast_fp16")]; tensor var_2505_split_sizes_0 = const()[name = string("op_2505_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_2505_axis_0 = const()[name = string("op_2505_axis_0"), val = int32(1)]; tensor var_2505_cast_fp16_0, tensor var_2505_cast_fp16_1 = split(axis = var_2505_axis_0, split_sizes = var_2505_split_sizes_0, x = out_99_cast_fp16)[name = string("op_2505_cast_fp16")]; string query_states_33_pad_type_0 = const()[name = string("query_states_33_pad_type_0"), val = string("valid")]; tensor query_states_33_strides_0 = const()[name = string("query_states_33_strides_0"), val = tensor([1, 1])]; tensor query_states_33_pad_0 = const()[name = string("query_states_33_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_states_33_dilations_0 = const()[name = string("query_states_33_dilations_0"), val = tensor([1, 1])]; int32 query_states_33_groups_0 = const()[name = string("query_states_33_groups_0"), val = int32(1)]; tensor var_2527_to_fp16 = const()[name = string("op_2527_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(247550848)))]; tensor query_states_33_cast_fp16 = conv(dilations = query_states_33_dilations_0, groups = query_states_33_groups_0, pad = query_states_33_pad_0, pad_type = query_states_33_pad_type_0, strides = query_states_33_strides_0, weight = var_2527_to_fp16, x = var_2505_cast_fp16_0)[name = string("query_states_33_cast_fp16")]; string key_states_33_pad_type_0 = const()[name = string("key_states_33_pad_type_0"), val = string("valid")]; tensor key_states_33_strides_0 = const()[name = string("key_states_33_strides_0"), val = tensor([1, 1])]; tensor key_states_33_pad_0 = const()[name = string("key_states_33_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_states_33_dilations_0 = const()[name = string("key_states_33_dilations_0"), val = tensor([1, 1])]; int32 key_states_33_groups_0 = const()[name = string("key_states_33_groups_0"), val = int32(1)]; tensor var_2538_to_fp16 = const()[name = string("op_2538_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(249648064)))]; tensor key_states_33_cast_fp16 = conv(dilations = key_states_33_dilations_0, groups = key_states_33_groups_0, pad = key_states_33_pad_0, pad_type = key_states_33_pad_type_0, strides = key_states_33_strides_0, weight = var_2538_to_fp16, x = var_2505_cast_fp16_0)[name = string("key_states_33_cast_fp16")]; string value_states_33_pad_type_0 = const()[name = string("value_states_33_pad_type_0"), val = string("valid")]; tensor value_states_33_strides_0 = const()[name = string("value_states_33_strides_0"), val = tensor([1, 1])]; tensor value_states_33_pad_0 = const()[name = string("value_states_33_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_states_33_dilations_0 = const()[name = string("value_states_33_dilations_0"), val = tensor([1, 1])]; int32 value_states_33_groups_0 = const()[name = string("value_states_33_groups_0"), val = int32(1)]; tensor var_2549_to_fp16 = const()[name = string("op_2549_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(249910272)))]; tensor value_states_33_cast_fp16 = conv(dilations = value_states_33_dilations_0, groups = value_states_33_groups_0, pad = value_states_33_pad_0, pad_type = value_states_33_pad_type_0, strides = value_states_33_strides_0, weight = var_2549_to_fp16, x = var_2505_cast_fp16_0)[name = string("value_states_33_cast_fp16")]; tensor var_2557 = const()[name = string("op_2557"), val = tensor([1, 16, 64, 1])]; tensor embed_33_cast_fp16 = reshape(shape = var_2557, x = query_states_33_cast_fp16)[name = string("embed_33_cast_fp16")]; tensor var_2561 = const()[name = string("op_2561"), val = tensor([1, 2, 64, 1])]; tensor var_2562_cast_fp16 = reshape(shape = var_2561, x = key_states_33_cast_fp16)[name = string("op_2562_cast_fp16")]; tensor embed_35_perm_0 = const()[name = string("embed_35_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_2568 = const()[name = string("op_2568"), val = tensor([1, 2, 64, 1])]; tensor var_2569_cast_fp16 = reshape(shape = var_2568, x = value_states_33_cast_fp16)[name = string("op_2569_cast_fp16")]; tensor value_states_35_perm_0 = const()[name = string("value_states_35_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_2573_cast_fp16 = mul(x = embed_33_cast_fp16, y = cos_1_cast_fp16)[name = string("op_2573_cast_fp16")]; tensor var_2574_split_sizes_0 = const()[name = string("op_2574_split_sizes_0"), val = tensor([32, 32])]; int32 var_2574_axis_0 = const()[name = string("op_2574_axis_0"), val = int32(-2)]; tensor var_2574_cast_fp16_0, tensor var_2574_cast_fp16_1 = split(axis = var_2574_axis_0, split_sizes = var_2574_split_sizes_0, x = embed_33_cast_fp16)[name = string("op_2574_cast_fp16")]; fp16 const_87_promoted_to_fp16 = const()[name = string("const_87_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_2576_cast_fp16 = mul(x = var_2574_cast_fp16_1, y = const_87_promoted_to_fp16)[name = string("op_2576_cast_fp16")]; int32 var_2578 = const()[name = string("op_2578"), val = int32(-2)]; bool var_2579_interleave_0 = const()[name = string("op_2579_interleave_0"), val = bool(false)]; tensor var_2579_cast_fp16 = concat(axis = var_2578, interleave = var_2579_interleave_0, values = (var_2576_cast_fp16, var_2574_cast_fp16_0))[name = string("op_2579_cast_fp16")]; tensor var_2580_cast_fp16 = mul(x = var_2579_cast_fp16, y = sin_1_cast_fp16)[name = string("op_2580_cast_fp16")]; tensor query_states_35_cast_fp16 = add(x = var_2573_cast_fp16, y = var_2580_cast_fp16)[name = string("query_states_35_cast_fp16")]; tensor embed_35_cast_fp16 = transpose(perm = embed_35_perm_0, x = var_2562_cast_fp16)[name = string("transpose_47")]; tensor var_2583_cast_fp16 = mul(x = embed_35_cast_fp16, y = cos_cast_fp16)[name = string("op_2583_cast_fp16")]; tensor var_2584_split_sizes_0 = const()[name = string("op_2584_split_sizes_0"), val = tensor([32, 32])]; int32 var_2584_axis_0 = const()[name = string("op_2584_axis_0"), val = int32(-1)]; tensor var_2584_cast_fp16_0, tensor var_2584_cast_fp16_1 = split(axis = var_2584_axis_0, split_sizes = var_2584_split_sizes_0, x = embed_35_cast_fp16)[name = string("op_2584_cast_fp16")]; fp16 const_88_promoted_to_fp16 = const()[name = string("const_88_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_2586_cast_fp16 = mul(x = var_2584_cast_fp16_1, y = const_88_promoted_to_fp16)[name = string("op_2586_cast_fp16")]; int32 var_2588 = const()[name = string("op_2588"), val = int32(-1)]; bool var_2589_interleave_0 = const()[name = string("op_2589_interleave_0"), val = bool(false)]; tensor var_2589_cast_fp16 = concat(axis = var_2588, interleave = var_2589_interleave_0, values = (var_2586_cast_fp16, var_2584_cast_fp16_0))[name = string("op_2589_cast_fp16")]; tensor var_2590_cast_fp16 = mul(x = var_2589_cast_fp16, y = sin_cast_fp16)[name = string("op_2590_cast_fp16")]; tensor key_states_35_cast_fp16 = add(x = var_2583_cast_fp16, y = var_2590_cast_fp16)[name = string("key_states_35_cast_fp16")]; tensor expand_dims_81 = const()[name = string("expand_dims_81"), val = tensor([8])]; tensor expand_dims_82 = const()[name = string("expand_dims_82"), val = tensor([0])]; tensor expand_dims_84 = const()[name = string("expand_dims_84"), val = tensor([9])]; tensor concat_67_values3_0 = const()[name = string("concat_67_values3_0"), val = tensor([0])]; int32 concat_67_axis_0 = const()[name = string("concat_67_axis_0"), val = int32(0)]; bool concat_67_interleave_0 = const()[name = string("concat_67_interleave_0"), val = bool(false)]; tensor concat_67 = concat(axis = concat_67_axis_0, interleave = concat_67_interleave_0, values = (expand_dims_81, expand_dims_82, position_id, concat_67_values3_0))[name = string("concat_67")]; tensor concat_68_values1_0 = const()[name = string("concat_68_values1_0"), val = tensor([0])]; tensor concat_68_values3_0 = const()[name = string("concat_68_values3_0"), val = tensor([0])]; int32 concat_68_axis_0 = const()[name = string("concat_68_axis_0"), val = int32(0)]; bool concat_68_interleave_0 = const()[name = string("concat_68_interleave_0"), val = bool(false)]; tensor concat_68 = concat(axis = concat_68_axis_0, interleave = concat_68_interleave_0, values = (expand_dims_84, concat_68_values1_0, var_426, concat_68_values3_0))[name = string("concat_68")]; tensor key_cache_internal_tensor_assign_9_stride_0 = const()[name = string("key_cache_internal_tensor_assign_9_stride_0"), val = tensor([1, 1, 1, 1])]; tensor key_cache_internal_tensor_assign_9_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_9_begin_mask_0"), val = tensor([false, false, false, true])]; tensor key_cache_internal_tensor_assign_9_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_9_end_mask_0"), val = tensor([false, true, false, true])]; tensor key_cache_internal_tensor_assign_9_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_9_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor key_cache_internal_tensor_assign_9_cast_fp16 = slice_update(begin = concat_67, begin_mask = key_cache_internal_tensor_assign_9_begin_mask_0, end = concat_68, end_mask = key_cache_internal_tensor_assign_9_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_9_squeeze_mask_0, stride = key_cache_internal_tensor_assign_9_stride_0, update = key_states_35_cast_fp16, x = coreml_update_state_62)[name = string("key_cache_internal_tensor_assign_9_cast_fp16")]; write_state(data = key_cache_internal_tensor_assign_9_cast_fp16, input = key_cache)[name = string("coreml_update_state_182_write_state")]; tensor coreml_update_state_64 = read_state(input = key_cache)[name = string("coreml_update_state_182")]; tensor value_cache_internal_tensor_assign_9_stride_0 = const()[name = string("value_cache_internal_tensor_assign_9_stride_0"), val = tensor([1, 1, 1, 1])]; tensor value_cache_internal_tensor_assign_9_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_9_begin_mask_0"), val = tensor([false, false, false, true])]; tensor value_cache_internal_tensor_assign_9_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_9_end_mask_0"), val = tensor([false, true, false, true])]; tensor value_cache_internal_tensor_assign_9_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_9_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor value_states_35_cast_fp16 = transpose(perm = value_states_35_perm_0, x = var_2569_cast_fp16)[name = string("transpose_46")]; tensor value_cache_internal_tensor_assign_9_cast_fp16 = slice_update(begin = concat_67, begin_mask = value_cache_internal_tensor_assign_9_begin_mask_0, end = concat_68, end_mask = value_cache_internal_tensor_assign_9_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_9_squeeze_mask_0, stride = value_cache_internal_tensor_assign_9_stride_0, update = value_states_35_cast_fp16, x = coreml_update_state_63)[name = string("value_cache_internal_tensor_assign_9_cast_fp16")]; write_state(data = value_cache_internal_tensor_assign_9_cast_fp16, input = value_cache)[name = string("coreml_update_state_183_write_state")]; tensor coreml_update_state_65 = read_state(input = value_cache)[name = string("coreml_update_state_183")]; tensor var_2633_begin_0 = const()[name = string("op_2633_begin_0"), val = tensor([8, 0, 0, 0])]; tensor var_2633_end_0 = const()[name = string("op_2633_end_0"), val = tensor([9, 2, 2048, 64])]; tensor var_2633_end_mask_0 = const()[name = string("op_2633_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_2633_cast_fp16 = slice_by_index(begin = var_2633_begin_0, end = var_2633_end_0, end_mask = var_2633_end_mask_0, x = coreml_update_state_64)[name = string("op_2633_cast_fp16")]; tensor tile_16 = const()[name = string("tile_16"), val = tensor([1, 1])]; int32 var_2636_axis_0 = const()[name = string("op_2636_axis_0"), val = int32(1)]; tensor var_2636_cast_fp16_0, tensor var_2636_cast_fp16_1 = split(axis = var_2636_axis_0, split_sizes = tile_16, x = var_2633_cast_fp16)[name = string("op_2636_cast_fp16")]; tensor var_2643_begin_0 = const()[name = string("op_2643_begin_0"), val = tensor([8, 0, 0, 0])]; tensor var_2643_end_0 = const()[name = string("op_2643_end_0"), val = tensor([9, 2, 2048, 64])]; tensor var_2643_end_mask_0 = const()[name = string("op_2643_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_2643_cast_fp16 = slice_by_index(begin = var_2643_begin_0, end = var_2643_end_0, end_mask = var_2643_end_mask_0, x = coreml_update_state_65)[name = string("op_2643_cast_fp16")]; tensor tile_17 = const()[name = string("tile_17"), val = tensor([1, 1])]; int32 var_2646_axis_0 = const()[name = string("op_2646_axis_0"), val = int32(1)]; tensor var_2646_cast_fp16_0, tensor var_2646_cast_fp16_1 = split(axis = var_2646_axis_0, split_sizes = tile_17, x = var_2643_cast_fp16)[name = string("op_2646_cast_fp16")]; tensor var_2649_split_sizes_0 = const()[name = string("op_2649_split_sizes_0"), val = tensor([8, 8])]; int32 var_2649_axis_0 = const()[name = string("op_2649_axis_0"), val = int32(1)]; tensor var_2649_cast_fp16_0, tensor var_2649_cast_fp16_1 = split(axis = var_2649_axis_0, split_sizes = var_2649_split_sizes_0, x = query_states_35_cast_fp16)[name = string("op_2649_cast_fp16")]; bool attn_weights_129_transpose_x_0 = const()[name = string("attn_weights_129_transpose_x_0"), val = bool(false)]; bool attn_weights_129_transpose_y_0 = const()[name = string("attn_weights_129_transpose_y_0"), val = bool(false)]; tensor attn_weights_129_cast_fp16 = matmul(transpose_x = attn_weights_129_transpose_x_0, transpose_y = attn_weights_129_transpose_y_0, x = var_2636_cast_fp16_0, y = var_2649_cast_fp16_0)[name = string("attn_weights_129_cast_fp16")]; fp16 _inversed_attn_weights_131_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_131_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_131_cast_fp16 = mul(x = attn_weights_129_cast_fp16, y = _inversed_attn_weights_131_y_0_to_fp16)[name = string("_inversed_attn_weights_131_cast_fp16")]; tensor attn_weights_133_cast_fp16 = add(x = _inversed_attn_weights_131_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_133_cast_fp16")]; int32 var_2656 = const()[name = string("op_2656"), val = int32(2)]; tensor attn_weights_135_cast_fp16 = softmax(axis = var_2656, x = attn_weights_133_cast_fp16)[name = string("attn_weights_135_cast_fp16")]; bool var_2662_transpose_x_1 = const()[name = string("op_2662_transpose_x_1"), val = bool(true)]; bool var_2662_transpose_y_1 = const()[name = string("op_2662_transpose_y_1"), val = bool(false)]; tensor var_2662_cast_fp16 = matmul(transpose_x = var_2662_transpose_x_1, transpose_y = var_2662_transpose_y_1, x = attn_weights_135_cast_fp16, y = var_2646_cast_fp16_0)[name = string("op_2662_cast_fp16")]; bool attn_weights_137_transpose_x_0 = const()[name = string("attn_weights_137_transpose_x_0"), val = bool(false)]; bool attn_weights_137_transpose_y_0 = const()[name = string("attn_weights_137_transpose_y_0"), val = bool(false)]; tensor attn_weights_137_cast_fp16 = matmul(transpose_x = attn_weights_137_transpose_x_0, transpose_y = attn_weights_137_transpose_y_0, x = var_2636_cast_fp16_1, y = var_2649_cast_fp16_1)[name = string("attn_weights_137_cast_fp16")]; fp16 _inversed_attn_weights_139_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_139_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_139_cast_fp16 = mul(x = attn_weights_137_cast_fp16, y = _inversed_attn_weights_139_y_0_to_fp16)[name = string("_inversed_attn_weights_139_cast_fp16")]; tensor attn_weights_141_cast_fp16 = add(x = _inversed_attn_weights_139_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_141_cast_fp16")]; int32 var_2668 = const()[name = string("op_2668"), val = int32(2)]; tensor attn_weights_143_cast_fp16 = softmax(axis = var_2668, x = attn_weights_141_cast_fp16)[name = string("attn_weights_143_cast_fp16")]; bool attn_output_49_transpose_x_1 = const()[name = string("attn_output_49_transpose_x_1"), val = bool(true)]; bool attn_output_49_transpose_y_1 = const()[name = string("attn_output_49_transpose_y_1"), val = bool(false)]; tensor attn_output_49_cast_fp16 = matmul(transpose_x = attn_output_49_transpose_x_1, transpose_y = attn_output_49_transpose_y_1, x = attn_weights_143_cast_fp16, y = var_2646_cast_fp16_1)[name = string("attn_output_49_cast_fp16")]; int32 var_2676 = const()[name = string("op_2676"), val = int32(1)]; bool attn_output_51_interleave_0 = const()[name = string("attn_output_51_interleave_0"), val = bool(false)]; tensor attn_output_51_cast_fp16 = concat(axis = var_2676, interleave = attn_output_51_interleave_0, values = (var_2662_cast_fp16, attn_output_49_cast_fp16))[name = string("attn_output_51_cast_fp16")]; tensor var_2680_perm_0 = const()[name = string("op_2680_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_2685 = const()[name = string("op_2685"), val = tensor([1, 1024, 1, 1])]; tensor var_2680_cast_fp16 = transpose(perm = var_2680_perm_0, x = attn_output_51_cast_fp16)[name = string("transpose_45")]; tensor x_149_cast_fp16 = reshape(shape = var_2685, x = var_2680_cast_fp16)[name = string("x_149_cast_fp16")]; string hidden_states_51_pad_type_0 = const()[name = string("hidden_states_51_pad_type_0"), val = string("valid")]; tensor hidden_states_51_strides_0 = const()[name = string("hidden_states_51_strides_0"), val = tensor([1, 1])]; tensor hidden_states_51_pad_0 = const()[name = string("hidden_states_51_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_51_dilations_0 = const()[name = string("hidden_states_51_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_51_groups_0 = const()[name = string("hidden_states_51_groups_0"), val = int32(1)]; tensor var_2692_to_fp16 = const()[name = string("op_2692_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(250172480)))]; tensor hidden_states_51_cast_fp16 = conv(dilations = hidden_states_51_dilations_0, groups = hidden_states_51_groups_0, pad = hidden_states_51_pad_0, pad_type = hidden_states_51_pad_type_0, strides = hidden_states_51_strides_0, weight = var_2692_to_fp16, x = x_149_cast_fp16)[name = string("hidden_states_51_cast_fp16")]; tensor x_151_cast_fp16 = add(x = x_143_cast_fp16, y = hidden_states_51_cast_fp16)[name = string("x_151_cast_fp16")]; int32 var_2704 = const()[name = string("op_2704"), val = int32(1)]; fp16 const_93_promoted_to_fp16 = const()[name = string("const_93_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_2707_cast_fp16 = mul(x = x_151_cast_fp16, y = const_93_promoted_to_fp16)[name = string("op_2707_cast_fp16")]; bool x_153_interleave_0 = const()[name = string("x_153_interleave_0"), val = bool(false)]; tensor x_153_cast_fp16 = concat(axis = var_2704, interleave = x_153_interleave_0, values = (x_151_cast_fp16, var_2707_cast_fp16))[name = string("x_153_cast_fp16")]; tensor out_103_axes_0 = const()[name = string("out_103_axes_0"), val = tensor([1])]; fp16 var_2717_to_fp16 = const()[name = string("op_2717_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_103_cast_fp16 = layer_norm(axes = out_103_axes_0, epsilon = var_2717_to_fp16, x = x_153_cast_fp16)[name = string("out_103_cast_fp16")]; tensor layer_layers_8_post_attention_layernorm_weight_to_fp16 = const()[name = string("layer_layers_8_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(252269696)))]; tensor out_105_cast_fp16 = mul(x = out_103_cast_fp16, y = layer_layers_8_post_attention_layernorm_weight_to_fp16)[name = string("out_105_cast_fp16")]; tensor var_2723_split_sizes_0 = const()[name = string("op_2723_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_2723_axis_0 = const()[name = string("op_2723_axis_0"), val = int32(1)]; tensor var_2723_cast_fp16_0, tensor var_2723_cast_fp16_1 = split(axis = var_2723_axis_0, split_sizes = var_2723_split_sizes_0, x = out_105_cast_fp16)[name = string("op_2723_cast_fp16")]; string input_17_pad_type_0 = const()[name = string("input_17_pad_type_0"), val = string("valid")]; tensor input_17_strides_0 = const()[name = string("input_17_strides_0"), val = tensor([1, 1])]; tensor input_17_pad_0 = const()[name = string("input_17_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_17_dilations_0 = const()[name = string("input_17_dilations_0"), val = tensor([1, 1])]; int32 input_17_groups_0 = const()[name = string("input_17_groups_0"), val = int32(1)]; tensor var_2728_to_fp16 = const()[name = string("op_2728_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(252273856)))]; tensor input_17_cast_fp16 = conv(dilations = input_17_dilations_0, groups = input_17_groups_0, pad = input_17_pad_0, pad_type = input_17_pad_type_0, strides = input_17_strides_0, weight = var_2728_to_fp16, x = var_2723_cast_fp16_0)[name = string("input_17_cast_fp16")]; tensor var_2739_cast_fp16 = silu(x = input_17_cast_fp16)[name = string("op_2739_cast_fp16")]; string var_2744_pad_type_0 = const()[name = string("op_2744_pad_type_0"), val = string("valid")]; tensor var_2744_strides_0 = const()[name = string("op_2744_strides_0"), val = tensor([1, 1])]; tensor var_2744_pad_0 = const()[name = string("op_2744_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_2744_dilations_0 = const()[name = string("op_2744_dilations_0"), val = tensor([1, 1])]; int32 var_2744_groups_0 = const()[name = string("op_2744_groups_0"), val = int32(1)]; tensor var_2727_to_fp16 = const()[name = string("op_2727_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(260662528)))]; tensor var_2744_cast_fp16 = conv(dilations = var_2744_dilations_0, groups = var_2744_groups_0, pad = var_2744_pad_0, pad_type = var_2744_pad_type_0, strides = var_2744_strides_0, weight = var_2727_to_fp16, x = var_2723_cast_fp16_0)[name = string("op_2744_cast_fp16")]; tensor x_159_cast_fp16 = mul(x = var_2739_cast_fp16, y = var_2744_cast_fp16)[name = string("x_159_cast_fp16")]; string hidden_states_53_pad_type_0 = const()[name = string("hidden_states_53_pad_type_0"), val = string("valid")]; tensor hidden_states_53_strides_0 = const()[name = string("hidden_states_53_strides_0"), val = tensor([1, 1])]; tensor hidden_states_53_pad_0 = const()[name = string("hidden_states_53_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_53_dilations_0 = const()[name = string("hidden_states_53_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_53_groups_0 = const()[name = string("hidden_states_53_groups_0"), val = int32(1)]; tensor var_2726_to_fp16 = const()[name = string("op_2726_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(269051200)))]; tensor hidden_states_53_cast_fp16 = conv(dilations = hidden_states_53_dilations_0, groups = hidden_states_53_groups_0, pad = hidden_states_53_pad_0, pad_type = hidden_states_53_pad_type_0, strides = hidden_states_53_strides_0, weight = var_2726_to_fp16, x = x_159_cast_fp16)[name = string("hidden_states_53_cast_fp16")]; tensor x_161_cast_fp16 = add(x = x_151_cast_fp16, y = hidden_states_53_cast_fp16)[name = string("x_161_cast_fp16")]; int32 var_2757 = const()[name = string("op_2757"), val = int32(1)]; fp16 const_94_promoted_to_fp16 = const()[name = string("const_94_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_2760_cast_fp16 = mul(x = x_161_cast_fp16, y = const_94_promoted_to_fp16)[name = string("op_2760_cast_fp16")]; bool x_163_interleave_0 = const()[name = string("x_163_interleave_0"), val = bool(false)]; tensor x_163_cast_fp16 = concat(axis = var_2757, interleave = x_163_interleave_0, values = (x_161_cast_fp16, var_2760_cast_fp16))[name = string("x_163_cast_fp16")]; tensor out_109_axes_0 = const()[name = string("out_109_axes_0"), val = tensor([1])]; fp16 var_2770_to_fp16 = const()[name = string("op_2770_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_109_cast_fp16 = layer_norm(axes = out_109_axes_0, epsilon = var_2770_to_fp16, x = x_163_cast_fp16)[name = string("out_109_cast_fp16")]; tensor layer_layers_9_input_layernorm_weight_to_fp16 = const()[name = string("layer_layers_9_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(277439872)))]; tensor out_111_cast_fp16 = mul(x = out_109_cast_fp16, y = layer_layers_9_input_layernorm_weight_to_fp16)[name = string("out_111_cast_fp16")]; tensor var_2776_split_sizes_0 = const()[name = string("op_2776_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_2776_axis_0 = const()[name = string("op_2776_axis_0"), val = int32(1)]; tensor var_2776_cast_fp16_0, tensor var_2776_cast_fp16_1 = split(axis = var_2776_axis_0, split_sizes = var_2776_split_sizes_0, x = out_111_cast_fp16)[name = string("op_2776_cast_fp16")]; string query_states_37_pad_type_0 = const()[name = string("query_states_37_pad_type_0"), val = string("valid")]; tensor query_states_37_strides_0 = const()[name = string("query_states_37_strides_0"), val = tensor([1, 1])]; tensor query_states_37_pad_0 = const()[name = string("query_states_37_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_states_37_dilations_0 = const()[name = string("query_states_37_dilations_0"), val = tensor([1, 1])]; int32 query_states_37_groups_0 = const()[name = string("query_states_37_groups_0"), val = int32(1)]; tensor var_2798_to_fp16 = const()[name = string("op_2798_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(277444032)))]; tensor query_states_37_cast_fp16 = conv(dilations = query_states_37_dilations_0, groups = query_states_37_groups_0, pad = query_states_37_pad_0, pad_type = query_states_37_pad_type_0, strides = query_states_37_strides_0, weight = var_2798_to_fp16, x = var_2776_cast_fp16_0)[name = string("query_states_37_cast_fp16")]; string key_states_37_pad_type_0 = const()[name = string("key_states_37_pad_type_0"), val = string("valid")]; tensor key_states_37_strides_0 = const()[name = string("key_states_37_strides_0"), val = tensor([1, 1])]; tensor key_states_37_pad_0 = const()[name = string("key_states_37_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_states_37_dilations_0 = const()[name = string("key_states_37_dilations_0"), val = tensor([1, 1])]; int32 key_states_37_groups_0 = const()[name = string("key_states_37_groups_0"), val = int32(1)]; tensor var_2809_to_fp16 = const()[name = string("op_2809_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(279541248)))]; tensor key_states_37_cast_fp16 = conv(dilations = key_states_37_dilations_0, groups = key_states_37_groups_0, pad = key_states_37_pad_0, pad_type = key_states_37_pad_type_0, strides = key_states_37_strides_0, weight = var_2809_to_fp16, x = var_2776_cast_fp16_0)[name = string("key_states_37_cast_fp16")]; string value_states_37_pad_type_0 = const()[name = string("value_states_37_pad_type_0"), val = string("valid")]; tensor value_states_37_strides_0 = const()[name = string("value_states_37_strides_0"), val = tensor([1, 1])]; tensor value_states_37_pad_0 = const()[name = string("value_states_37_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_states_37_dilations_0 = const()[name = string("value_states_37_dilations_0"), val = tensor([1, 1])]; int32 value_states_37_groups_0 = const()[name = string("value_states_37_groups_0"), val = int32(1)]; tensor var_2820_to_fp16 = const()[name = string("op_2820_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(279803456)))]; tensor value_states_37_cast_fp16 = conv(dilations = value_states_37_dilations_0, groups = value_states_37_groups_0, pad = value_states_37_pad_0, pad_type = value_states_37_pad_type_0, strides = value_states_37_strides_0, weight = var_2820_to_fp16, x = var_2776_cast_fp16_0)[name = string("value_states_37_cast_fp16")]; tensor var_2828 = const()[name = string("op_2828"), val = tensor([1, 16, 64, 1])]; tensor embed_37_cast_fp16 = reshape(shape = var_2828, x = query_states_37_cast_fp16)[name = string("embed_37_cast_fp16")]; tensor var_2832 = const()[name = string("op_2832"), val = tensor([1, 2, 64, 1])]; tensor var_2833_cast_fp16 = reshape(shape = var_2832, x = key_states_37_cast_fp16)[name = string("op_2833_cast_fp16")]; tensor embed_39_perm_0 = const()[name = string("embed_39_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_2839 = const()[name = string("op_2839"), val = tensor([1, 2, 64, 1])]; tensor var_2840_cast_fp16 = reshape(shape = var_2839, x = value_states_37_cast_fp16)[name = string("op_2840_cast_fp16")]; tensor value_states_39_perm_0 = const()[name = string("value_states_39_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_2844_cast_fp16 = mul(x = embed_37_cast_fp16, y = cos_1_cast_fp16)[name = string("op_2844_cast_fp16")]; tensor var_2845_split_sizes_0 = const()[name = string("op_2845_split_sizes_0"), val = tensor([32, 32])]; int32 var_2845_axis_0 = const()[name = string("op_2845_axis_0"), val = int32(-2)]; tensor var_2845_cast_fp16_0, tensor var_2845_cast_fp16_1 = split(axis = var_2845_axis_0, split_sizes = var_2845_split_sizes_0, x = embed_37_cast_fp16)[name = string("op_2845_cast_fp16")]; fp16 const_97_promoted_to_fp16 = const()[name = string("const_97_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_2847_cast_fp16 = mul(x = var_2845_cast_fp16_1, y = const_97_promoted_to_fp16)[name = string("op_2847_cast_fp16")]; int32 var_2849 = const()[name = string("op_2849"), val = int32(-2)]; bool var_2850_interleave_0 = const()[name = string("op_2850_interleave_0"), val = bool(false)]; tensor var_2850_cast_fp16 = concat(axis = var_2849, interleave = var_2850_interleave_0, values = (var_2847_cast_fp16, var_2845_cast_fp16_0))[name = string("op_2850_cast_fp16")]; tensor var_2851_cast_fp16 = mul(x = var_2850_cast_fp16, y = sin_1_cast_fp16)[name = string("op_2851_cast_fp16")]; tensor query_states_39_cast_fp16 = add(x = var_2844_cast_fp16, y = var_2851_cast_fp16)[name = string("query_states_39_cast_fp16")]; tensor embed_39_cast_fp16 = transpose(perm = embed_39_perm_0, x = var_2833_cast_fp16)[name = string("transpose_44")]; tensor var_2854_cast_fp16 = mul(x = embed_39_cast_fp16, y = cos_cast_fp16)[name = string("op_2854_cast_fp16")]; tensor var_2855_split_sizes_0 = const()[name = string("op_2855_split_sizes_0"), val = tensor([32, 32])]; int32 var_2855_axis_0 = const()[name = string("op_2855_axis_0"), val = int32(-1)]; tensor var_2855_cast_fp16_0, tensor var_2855_cast_fp16_1 = split(axis = var_2855_axis_0, split_sizes = var_2855_split_sizes_0, x = embed_39_cast_fp16)[name = string("op_2855_cast_fp16")]; fp16 const_98_promoted_to_fp16 = const()[name = string("const_98_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_2857_cast_fp16 = mul(x = var_2855_cast_fp16_1, y = const_98_promoted_to_fp16)[name = string("op_2857_cast_fp16")]; int32 var_2859 = const()[name = string("op_2859"), val = int32(-1)]; bool var_2860_interleave_0 = const()[name = string("op_2860_interleave_0"), val = bool(false)]; tensor var_2860_cast_fp16 = concat(axis = var_2859, interleave = var_2860_interleave_0, values = (var_2857_cast_fp16, var_2855_cast_fp16_0))[name = string("op_2860_cast_fp16")]; tensor var_2861_cast_fp16 = mul(x = var_2860_cast_fp16, y = sin_cast_fp16)[name = string("op_2861_cast_fp16")]; tensor key_states_39_cast_fp16 = add(x = var_2854_cast_fp16, y = var_2861_cast_fp16)[name = string("key_states_39_cast_fp16")]; tensor expand_dims_91 = const()[name = string("expand_dims_91"), val = tensor([9])]; tensor expand_dims_92 = const()[name = string("expand_dims_92"), val = tensor([0])]; tensor expand_dims_94 = const()[name = string("expand_dims_94"), val = tensor([10])]; tensor concat_75_values3_0 = const()[name = string("concat_75_values3_0"), val = tensor([0])]; int32 concat_75_axis_0 = const()[name = string("concat_75_axis_0"), val = int32(0)]; bool concat_75_interleave_0 = const()[name = string("concat_75_interleave_0"), val = bool(false)]; tensor concat_75 = concat(axis = concat_75_axis_0, interleave = concat_75_interleave_0, values = (expand_dims_91, expand_dims_92, position_id, concat_75_values3_0))[name = string("concat_75")]; tensor concat_76_values1_0 = const()[name = string("concat_76_values1_0"), val = tensor([0])]; tensor concat_76_values3_0 = const()[name = string("concat_76_values3_0"), val = tensor([0])]; int32 concat_76_axis_0 = const()[name = string("concat_76_axis_0"), val = int32(0)]; bool concat_76_interleave_0 = const()[name = string("concat_76_interleave_0"), val = bool(false)]; tensor concat_76 = concat(axis = concat_76_axis_0, interleave = concat_76_interleave_0, values = (expand_dims_94, concat_76_values1_0, var_426, concat_76_values3_0))[name = string("concat_76")]; tensor key_cache_internal_tensor_assign_10_stride_0 = const()[name = string("key_cache_internal_tensor_assign_10_stride_0"), val = tensor([1, 1, 1, 1])]; tensor key_cache_internal_tensor_assign_10_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_10_begin_mask_0"), val = tensor([false, false, false, true])]; tensor key_cache_internal_tensor_assign_10_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_10_end_mask_0"), val = tensor([false, true, false, true])]; tensor key_cache_internal_tensor_assign_10_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_10_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor key_cache_internal_tensor_assign_10_cast_fp16 = slice_update(begin = concat_75, begin_mask = key_cache_internal_tensor_assign_10_begin_mask_0, end = concat_76, end_mask = key_cache_internal_tensor_assign_10_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_10_squeeze_mask_0, stride = key_cache_internal_tensor_assign_10_stride_0, update = key_states_39_cast_fp16, x = coreml_update_state_64)[name = string("key_cache_internal_tensor_assign_10_cast_fp16")]; write_state(data = key_cache_internal_tensor_assign_10_cast_fp16, input = key_cache)[name = string("coreml_update_state_184_write_state")]; tensor coreml_update_state_66 = read_state(input = key_cache)[name = string("coreml_update_state_184")]; tensor value_cache_internal_tensor_assign_10_stride_0 = const()[name = string("value_cache_internal_tensor_assign_10_stride_0"), val = tensor([1, 1, 1, 1])]; tensor value_cache_internal_tensor_assign_10_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_10_begin_mask_0"), val = tensor([false, false, false, true])]; tensor value_cache_internal_tensor_assign_10_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_10_end_mask_0"), val = tensor([false, true, false, true])]; tensor value_cache_internal_tensor_assign_10_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_10_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor value_states_39_cast_fp16 = transpose(perm = value_states_39_perm_0, x = var_2840_cast_fp16)[name = string("transpose_43")]; tensor value_cache_internal_tensor_assign_10_cast_fp16 = slice_update(begin = concat_75, begin_mask = value_cache_internal_tensor_assign_10_begin_mask_0, end = concat_76, end_mask = value_cache_internal_tensor_assign_10_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_10_squeeze_mask_0, stride = value_cache_internal_tensor_assign_10_stride_0, update = value_states_39_cast_fp16, x = coreml_update_state_65)[name = string("value_cache_internal_tensor_assign_10_cast_fp16")]; write_state(data = value_cache_internal_tensor_assign_10_cast_fp16, input = value_cache)[name = string("coreml_update_state_185_write_state")]; tensor coreml_update_state_67 = read_state(input = value_cache)[name = string("coreml_update_state_185")]; tensor var_2904_begin_0 = const()[name = string("op_2904_begin_0"), val = tensor([9, 0, 0, 0])]; tensor var_2904_end_0 = const()[name = string("op_2904_end_0"), val = tensor([10, 2, 2048, 64])]; tensor var_2904_end_mask_0 = const()[name = string("op_2904_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_2904_cast_fp16 = slice_by_index(begin = var_2904_begin_0, end = var_2904_end_0, end_mask = var_2904_end_mask_0, x = coreml_update_state_66)[name = string("op_2904_cast_fp16")]; tensor tile_18 = const()[name = string("tile_18"), val = tensor([1, 1])]; int32 var_2907_axis_0 = const()[name = string("op_2907_axis_0"), val = int32(1)]; tensor var_2907_cast_fp16_0, tensor var_2907_cast_fp16_1 = split(axis = var_2907_axis_0, split_sizes = tile_18, x = var_2904_cast_fp16)[name = string("op_2907_cast_fp16")]; tensor var_2914_begin_0 = const()[name = string("op_2914_begin_0"), val = tensor([9, 0, 0, 0])]; tensor var_2914_end_0 = const()[name = string("op_2914_end_0"), val = tensor([10, 2, 2048, 64])]; tensor var_2914_end_mask_0 = const()[name = string("op_2914_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_2914_cast_fp16 = slice_by_index(begin = var_2914_begin_0, end = var_2914_end_0, end_mask = var_2914_end_mask_0, x = coreml_update_state_67)[name = string("op_2914_cast_fp16")]; tensor tile_19 = const()[name = string("tile_19"), val = tensor([1, 1])]; int32 var_2917_axis_0 = const()[name = string("op_2917_axis_0"), val = int32(1)]; tensor var_2917_cast_fp16_0, tensor var_2917_cast_fp16_1 = split(axis = var_2917_axis_0, split_sizes = tile_19, x = var_2914_cast_fp16)[name = string("op_2917_cast_fp16")]; tensor var_2920_split_sizes_0 = const()[name = string("op_2920_split_sizes_0"), val = tensor([8, 8])]; int32 var_2920_axis_0 = const()[name = string("op_2920_axis_0"), val = int32(1)]; tensor var_2920_cast_fp16_0, tensor var_2920_cast_fp16_1 = split(axis = var_2920_axis_0, split_sizes = var_2920_split_sizes_0, x = query_states_39_cast_fp16)[name = string("op_2920_cast_fp16")]; bool attn_weights_145_transpose_x_0 = const()[name = string("attn_weights_145_transpose_x_0"), val = bool(false)]; bool attn_weights_145_transpose_y_0 = const()[name = string("attn_weights_145_transpose_y_0"), val = bool(false)]; tensor attn_weights_145_cast_fp16 = matmul(transpose_x = attn_weights_145_transpose_x_0, transpose_y = attn_weights_145_transpose_y_0, x = var_2907_cast_fp16_0, y = var_2920_cast_fp16_0)[name = string("attn_weights_145_cast_fp16")]; fp16 _inversed_attn_weights_147_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_147_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_147_cast_fp16 = mul(x = attn_weights_145_cast_fp16, y = _inversed_attn_weights_147_y_0_to_fp16)[name = string("_inversed_attn_weights_147_cast_fp16")]; tensor attn_weights_149_cast_fp16 = add(x = _inversed_attn_weights_147_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_149_cast_fp16")]; int32 var_2927 = const()[name = string("op_2927"), val = int32(2)]; tensor attn_weights_151_cast_fp16 = softmax(axis = var_2927, x = attn_weights_149_cast_fp16)[name = string("attn_weights_151_cast_fp16")]; bool var_2933_transpose_x_1 = const()[name = string("op_2933_transpose_x_1"), val = bool(true)]; bool var_2933_transpose_y_1 = const()[name = string("op_2933_transpose_y_1"), val = bool(false)]; tensor var_2933_cast_fp16 = matmul(transpose_x = var_2933_transpose_x_1, transpose_y = var_2933_transpose_y_1, x = attn_weights_151_cast_fp16, y = var_2917_cast_fp16_0)[name = string("op_2933_cast_fp16")]; bool attn_weights_153_transpose_x_0 = const()[name = string("attn_weights_153_transpose_x_0"), val = bool(false)]; bool attn_weights_153_transpose_y_0 = const()[name = string("attn_weights_153_transpose_y_0"), val = bool(false)]; tensor attn_weights_153_cast_fp16 = matmul(transpose_x = attn_weights_153_transpose_x_0, transpose_y = attn_weights_153_transpose_y_0, x = var_2907_cast_fp16_1, y = var_2920_cast_fp16_1)[name = string("attn_weights_153_cast_fp16")]; fp16 _inversed_attn_weights_155_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_155_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_155_cast_fp16 = mul(x = attn_weights_153_cast_fp16, y = _inversed_attn_weights_155_y_0_to_fp16)[name = string("_inversed_attn_weights_155_cast_fp16")]; tensor attn_weights_157_cast_fp16 = add(x = _inversed_attn_weights_155_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_157_cast_fp16")]; int32 var_2939 = const()[name = string("op_2939"), val = int32(2)]; tensor attn_weights_159_cast_fp16 = softmax(axis = var_2939, x = attn_weights_157_cast_fp16)[name = string("attn_weights_159_cast_fp16")]; bool attn_output_55_transpose_x_1 = const()[name = string("attn_output_55_transpose_x_1"), val = bool(true)]; bool attn_output_55_transpose_y_1 = const()[name = string("attn_output_55_transpose_y_1"), val = bool(false)]; tensor attn_output_55_cast_fp16 = matmul(transpose_x = attn_output_55_transpose_x_1, transpose_y = attn_output_55_transpose_y_1, x = attn_weights_159_cast_fp16, y = var_2917_cast_fp16_1)[name = string("attn_output_55_cast_fp16")]; int32 var_2947 = const()[name = string("op_2947"), val = int32(1)]; bool attn_output_57_interleave_0 = const()[name = string("attn_output_57_interleave_0"), val = bool(false)]; tensor attn_output_57_cast_fp16 = concat(axis = var_2947, interleave = attn_output_57_interleave_0, values = (var_2933_cast_fp16, attn_output_55_cast_fp16))[name = string("attn_output_57_cast_fp16")]; tensor var_2951_perm_0 = const()[name = string("op_2951_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_2956 = const()[name = string("op_2956"), val = tensor([1, 1024, 1, 1])]; tensor var_2951_cast_fp16 = transpose(perm = var_2951_perm_0, x = attn_output_57_cast_fp16)[name = string("transpose_42")]; tensor x_167_cast_fp16 = reshape(shape = var_2956, x = var_2951_cast_fp16)[name = string("x_167_cast_fp16")]; string hidden_states_57_pad_type_0 = const()[name = string("hidden_states_57_pad_type_0"), val = string("valid")]; tensor hidden_states_57_strides_0 = const()[name = string("hidden_states_57_strides_0"), val = tensor([1, 1])]; tensor hidden_states_57_pad_0 = const()[name = string("hidden_states_57_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_57_dilations_0 = const()[name = string("hidden_states_57_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_57_groups_0 = const()[name = string("hidden_states_57_groups_0"), val = int32(1)]; tensor var_2963_to_fp16 = const()[name = string("op_2963_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(280065664)))]; tensor hidden_states_57_cast_fp16 = conv(dilations = hidden_states_57_dilations_0, groups = hidden_states_57_groups_0, pad = hidden_states_57_pad_0, pad_type = hidden_states_57_pad_type_0, strides = hidden_states_57_strides_0, weight = var_2963_to_fp16, x = x_167_cast_fp16)[name = string("hidden_states_57_cast_fp16")]; tensor x_169_cast_fp16 = add(x = x_161_cast_fp16, y = hidden_states_57_cast_fp16)[name = string("x_169_cast_fp16")]; int32 var_2975 = const()[name = string("op_2975"), val = int32(1)]; fp16 const_103_promoted_to_fp16 = const()[name = string("const_103_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_2978_cast_fp16 = mul(x = x_169_cast_fp16, y = const_103_promoted_to_fp16)[name = string("op_2978_cast_fp16")]; bool x_171_interleave_0 = const()[name = string("x_171_interleave_0"), val = bool(false)]; tensor x_171_cast_fp16 = concat(axis = var_2975, interleave = x_171_interleave_0, values = (x_169_cast_fp16, var_2978_cast_fp16))[name = string("x_171_cast_fp16")]; tensor out_115_axes_0 = const()[name = string("out_115_axes_0"), val = tensor([1])]; fp16 var_2988_to_fp16 = const()[name = string("op_2988_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_115_cast_fp16 = layer_norm(axes = out_115_axes_0, epsilon = var_2988_to_fp16, x = x_171_cast_fp16)[name = string("out_115_cast_fp16")]; tensor layer_layers_9_post_attention_layernorm_weight_to_fp16 = const()[name = string("layer_layers_9_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(282162880)))]; tensor out_117_cast_fp16 = mul(x = out_115_cast_fp16, y = layer_layers_9_post_attention_layernorm_weight_to_fp16)[name = string("out_117_cast_fp16")]; tensor var_2994_split_sizes_0 = const()[name = string("op_2994_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_2994_axis_0 = const()[name = string("op_2994_axis_0"), val = int32(1)]; tensor var_2994_cast_fp16_0, tensor var_2994_cast_fp16_1 = split(axis = var_2994_axis_0, split_sizes = var_2994_split_sizes_0, x = out_117_cast_fp16)[name = string("op_2994_cast_fp16")]; string input_19_pad_type_0 = const()[name = string("input_19_pad_type_0"), val = string("valid")]; tensor input_19_strides_0 = const()[name = string("input_19_strides_0"), val = tensor([1, 1])]; tensor input_19_pad_0 = const()[name = string("input_19_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_19_dilations_0 = const()[name = string("input_19_dilations_0"), val = tensor([1, 1])]; int32 input_19_groups_0 = const()[name = string("input_19_groups_0"), val = int32(1)]; tensor var_2999_to_fp16 = const()[name = string("op_2999_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(282167040)))]; tensor input_19_cast_fp16 = conv(dilations = input_19_dilations_0, groups = input_19_groups_0, pad = input_19_pad_0, pad_type = input_19_pad_type_0, strides = input_19_strides_0, weight = var_2999_to_fp16, x = var_2994_cast_fp16_0)[name = string("input_19_cast_fp16")]; tensor var_3010_cast_fp16 = silu(x = input_19_cast_fp16)[name = string("op_3010_cast_fp16")]; string var_3015_pad_type_0 = const()[name = string("op_3015_pad_type_0"), val = string("valid")]; tensor var_3015_strides_0 = const()[name = string("op_3015_strides_0"), val = tensor([1, 1])]; tensor var_3015_pad_0 = const()[name = string("op_3015_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_3015_dilations_0 = const()[name = string("op_3015_dilations_0"), val = tensor([1, 1])]; int32 var_3015_groups_0 = const()[name = string("op_3015_groups_0"), val = int32(1)]; tensor var_2998_to_fp16 = const()[name = string("op_2998_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(290555712)))]; tensor var_3015_cast_fp16 = conv(dilations = var_3015_dilations_0, groups = var_3015_groups_0, pad = var_3015_pad_0, pad_type = var_3015_pad_type_0, strides = var_3015_strides_0, weight = var_2998_to_fp16, x = var_2994_cast_fp16_0)[name = string("op_3015_cast_fp16")]; tensor x_177_cast_fp16 = mul(x = var_3010_cast_fp16, y = var_3015_cast_fp16)[name = string("x_177_cast_fp16")]; string hidden_states_59_pad_type_0 = const()[name = string("hidden_states_59_pad_type_0"), val = string("valid")]; tensor hidden_states_59_strides_0 = const()[name = string("hidden_states_59_strides_0"), val = tensor([1, 1])]; tensor hidden_states_59_pad_0 = const()[name = string("hidden_states_59_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_59_dilations_0 = const()[name = string("hidden_states_59_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_59_groups_0 = const()[name = string("hidden_states_59_groups_0"), val = int32(1)]; tensor var_2997_to_fp16 = const()[name = string("op_2997_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(298944384)))]; tensor hidden_states_59_cast_fp16 = conv(dilations = hidden_states_59_dilations_0, groups = hidden_states_59_groups_0, pad = hidden_states_59_pad_0, pad_type = hidden_states_59_pad_type_0, strides = hidden_states_59_strides_0, weight = var_2997_to_fp16, x = x_177_cast_fp16)[name = string("hidden_states_59_cast_fp16")]; tensor x_179_cast_fp16 = add(x = x_169_cast_fp16, y = hidden_states_59_cast_fp16)[name = string("x_179_cast_fp16")]; int32 var_3028 = const()[name = string("op_3028"), val = int32(1)]; fp16 const_104_promoted_to_fp16 = const()[name = string("const_104_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_3031_cast_fp16 = mul(x = x_179_cast_fp16, y = const_104_promoted_to_fp16)[name = string("op_3031_cast_fp16")]; bool x_181_interleave_0 = const()[name = string("x_181_interleave_0"), val = bool(false)]; tensor x_181_cast_fp16 = concat(axis = var_3028, interleave = x_181_interleave_0, values = (x_179_cast_fp16, var_3031_cast_fp16))[name = string("x_181_cast_fp16")]; tensor out_121_axes_0 = const()[name = string("out_121_axes_0"), val = tensor([1])]; fp16 var_3041_to_fp16 = const()[name = string("op_3041_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_121_cast_fp16 = layer_norm(axes = out_121_axes_0, epsilon = var_3041_to_fp16, x = x_181_cast_fp16)[name = string("out_121_cast_fp16")]; tensor layer_layers_10_input_layernorm_weight_to_fp16 = const()[name = string("layer_layers_10_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(307333056)))]; tensor out_123_cast_fp16 = mul(x = out_121_cast_fp16, y = layer_layers_10_input_layernorm_weight_to_fp16)[name = string("out_123_cast_fp16")]; tensor var_3047_split_sizes_0 = const()[name = string("op_3047_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_3047_axis_0 = const()[name = string("op_3047_axis_0"), val = int32(1)]; tensor var_3047_cast_fp16_0, tensor var_3047_cast_fp16_1 = split(axis = var_3047_axis_0, split_sizes = var_3047_split_sizes_0, x = out_123_cast_fp16)[name = string("op_3047_cast_fp16")]; string query_states_41_pad_type_0 = const()[name = string("query_states_41_pad_type_0"), val = string("valid")]; tensor query_states_41_strides_0 = const()[name = string("query_states_41_strides_0"), val = tensor([1, 1])]; tensor query_states_41_pad_0 = const()[name = string("query_states_41_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_states_41_dilations_0 = const()[name = string("query_states_41_dilations_0"), val = tensor([1, 1])]; int32 query_states_41_groups_0 = const()[name = string("query_states_41_groups_0"), val = int32(1)]; tensor var_3069_to_fp16 = const()[name = string("op_3069_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(307337216)))]; tensor query_states_41_cast_fp16 = conv(dilations = query_states_41_dilations_0, groups = query_states_41_groups_0, pad = query_states_41_pad_0, pad_type = query_states_41_pad_type_0, strides = query_states_41_strides_0, weight = var_3069_to_fp16, x = var_3047_cast_fp16_0)[name = string("query_states_41_cast_fp16")]; string key_states_41_pad_type_0 = const()[name = string("key_states_41_pad_type_0"), val = string("valid")]; tensor key_states_41_strides_0 = const()[name = string("key_states_41_strides_0"), val = tensor([1, 1])]; tensor key_states_41_pad_0 = const()[name = string("key_states_41_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_states_41_dilations_0 = const()[name = string("key_states_41_dilations_0"), val = tensor([1, 1])]; int32 key_states_41_groups_0 = const()[name = string("key_states_41_groups_0"), val = int32(1)]; tensor var_3080_to_fp16 = const()[name = string("op_3080_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(309434432)))]; tensor key_states_41_cast_fp16 = conv(dilations = key_states_41_dilations_0, groups = key_states_41_groups_0, pad = key_states_41_pad_0, pad_type = key_states_41_pad_type_0, strides = key_states_41_strides_0, weight = var_3080_to_fp16, x = var_3047_cast_fp16_0)[name = string("key_states_41_cast_fp16")]; string value_states_41_pad_type_0 = const()[name = string("value_states_41_pad_type_0"), val = string("valid")]; tensor value_states_41_strides_0 = const()[name = string("value_states_41_strides_0"), val = tensor([1, 1])]; tensor value_states_41_pad_0 = const()[name = string("value_states_41_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_states_41_dilations_0 = const()[name = string("value_states_41_dilations_0"), val = tensor([1, 1])]; int32 value_states_41_groups_0 = const()[name = string("value_states_41_groups_0"), val = int32(1)]; tensor var_3091_to_fp16 = const()[name = string("op_3091_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(309696640)))]; tensor value_states_41_cast_fp16 = conv(dilations = value_states_41_dilations_0, groups = value_states_41_groups_0, pad = value_states_41_pad_0, pad_type = value_states_41_pad_type_0, strides = value_states_41_strides_0, weight = var_3091_to_fp16, x = var_3047_cast_fp16_0)[name = string("value_states_41_cast_fp16")]; tensor var_3099 = const()[name = string("op_3099"), val = tensor([1, 16, 64, 1])]; tensor embed_41_cast_fp16 = reshape(shape = var_3099, x = query_states_41_cast_fp16)[name = string("embed_41_cast_fp16")]; tensor var_3103 = const()[name = string("op_3103"), val = tensor([1, 2, 64, 1])]; tensor var_3104_cast_fp16 = reshape(shape = var_3103, x = key_states_41_cast_fp16)[name = string("op_3104_cast_fp16")]; tensor embed_43_perm_0 = const()[name = string("embed_43_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_3110 = const()[name = string("op_3110"), val = tensor([1, 2, 64, 1])]; tensor var_3111_cast_fp16 = reshape(shape = var_3110, x = value_states_41_cast_fp16)[name = string("op_3111_cast_fp16")]; tensor value_states_43_perm_0 = const()[name = string("value_states_43_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_3115_cast_fp16 = mul(x = embed_41_cast_fp16, y = cos_1_cast_fp16)[name = string("op_3115_cast_fp16")]; tensor var_3116_split_sizes_0 = const()[name = string("op_3116_split_sizes_0"), val = tensor([32, 32])]; int32 var_3116_axis_0 = const()[name = string("op_3116_axis_0"), val = int32(-2)]; tensor var_3116_cast_fp16_0, tensor var_3116_cast_fp16_1 = split(axis = var_3116_axis_0, split_sizes = var_3116_split_sizes_0, x = embed_41_cast_fp16)[name = string("op_3116_cast_fp16")]; fp16 const_107_promoted_to_fp16 = const()[name = string("const_107_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_3118_cast_fp16 = mul(x = var_3116_cast_fp16_1, y = const_107_promoted_to_fp16)[name = string("op_3118_cast_fp16")]; int32 var_3120 = const()[name = string("op_3120"), val = int32(-2)]; bool var_3121_interleave_0 = const()[name = string("op_3121_interleave_0"), val = bool(false)]; tensor var_3121_cast_fp16 = concat(axis = var_3120, interleave = var_3121_interleave_0, values = (var_3118_cast_fp16, var_3116_cast_fp16_0))[name = string("op_3121_cast_fp16")]; tensor var_3122_cast_fp16 = mul(x = var_3121_cast_fp16, y = sin_1_cast_fp16)[name = string("op_3122_cast_fp16")]; tensor query_states_43_cast_fp16 = add(x = var_3115_cast_fp16, y = var_3122_cast_fp16)[name = string("query_states_43_cast_fp16")]; tensor embed_43_cast_fp16 = transpose(perm = embed_43_perm_0, x = var_3104_cast_fp16)[name = string("transpose_41")]; tensor var_3125_cast_fp16 = mul(x = embed_43_cast_fp16, y = cos_cast_fp16)[name = string("op_3125_cast_fp16")]; tensor var_3126_split_sizes_0 = const()[name = string("op_3126_split_sizes_0"), val = tensor([32, 32])]; int32 var_3126_axis_0 = const()[name = string("op_3126_axis_0"), val = int32(-1)]; tensor var_3126_cast_fp16_0, tensor var_3126_cast_fp16_1 = split(axis = var_3126_axis_0, split_sizes = var_3126_split_sizes_0, x = embed_43_cast_fp16)[name = string("op_3126_cast_fp16")]; fp16 const_108_promoted_to_fp16 = const()[name = string("const_108_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_3128_cast_fp16 = mul(x = var_3126_cast_fp16_1, y = const_108_promoted_to_fp16)[name = string("op_3128_cast_fp16")]; int32 var_3130 = const()[name = string("op_3130"), val = int32(-1)]; bool var_3131_interleave_0 = const()[name = string("op_3131_interleave_0"), val = bool(false)]; tensor var_3131_cast_fp16 = concat(axis = var_3130, interleave = var_3131_interleave_0, values = (var_3128_cast_fp16, var_3126_cast_fp16_0))[name = string("op_3131_cast_fp16")]; tensor var_3132_cast_fp16 = mul(x = var_3131_cast_fp16, y = sin_cast_fp16)[name = string("op_3132_cast_fp16")]; tensor key_states_43_cast_fp16 = add(x = var_3125_cast_fp16, y = var_3132_cast_fp16)[name = string("key_states_43_cast_fp16")]; tensor expand_dims_101 = const()[name = string("expand_dims_101"), val = tensor([10])]; tensor expand_dims_102 = const()[name = string("expand_dims_102"), val = tensor([0])]; tensor expand_dims_104 = const()[name = string("expand_dims_104"), val = tensor([11])]; tensor concat_83_values3_0 = const()[name = string("concat_83_values3_0"), val = tensor([0])]; int32 concat_83_axis_0 = const()[name = string("concat_83_axis_0"), val = int32(0)]; bool concat_83_interleave_0 = const()[name = string("concat_83_interleave_0"), val = bool(false)]; tensor concat_83 = concat(axis = concat_83_axis_0, interleave = concat_83_interleave_0, values = (expand_dims_101, expand_dims_102, position_id, concat_83_values3_0))[name = string("concat_83")]; tensor concat_84_values1_0 = const()[name = string("concat_84_values1_0"), val = tensor([0])]; tensor concat_84_values3_0 = const()[name = string("concat_84_values3_0"), val = tensor([0])]; int32 concat_84_axis_0 = const()[name = string("concat_84_axis_0"), val = int32(0)]; bool concat_84_interleave_0 = const()[name = string("concat_84_interleave_0"), val = bool(false)]; tensor concat_84 = concat(axis = concat_84_axis_0, interleave = concat_84_interleave_0, values = (expand_dims_104, concat_84_values1_0, var_426, concat_84_values3_0))[name = string("concat_84")]; tensor key_cache_internal_tensor_assign_11_stride_0 = const()[name = string("key_cache_internal_tensor_assign_11_stride_0"), val = tensor([1, 1, 1, 1])]; tensor key_cache_internal_tensor_assign_11_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_11_begin_mask_0"), val = tensor([false, false, false, true])]; tensor key_cache_internal_tensor_assign_11_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_11_end_mask_0"), val = tensor([false, true, false, true])]; tensor key_cache_internal_tensor_assign_11_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_11_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor key_cache_internal_tensor_assign_11_cast_fp16 = slice_update(begin = concat_83, begin_mask = key_cache_internal_tensor_assign_11_begin_mask_0, end = concat_84, end_mask = key_cache_internal_tensor_assign_11_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_11_squeeze_mask_0, stride = key_cache_internal_tensor_assign_11_stride_0, update = key_states_43_cast_fp16, x = coreml_update_state_66)[name = string("key_cache_internal_tensor_assign_11_cast_fp16")]; write_state(data = key_cache_internal_tensor_assign_11_cast_fp16, input = key_cache)[name = string("coreml_update_state_186_write_state")]; tensor coreml_update_state_68 = read_state(input = key_cache)[name = string("coreml_update_state_186")]; tensor value_cache_internal_tensor_assign_11_stride_0 = const()[name = string("value_cache_internal_tensor_assign_11_stride_0"), val = tensor([1, 1, 1, 1])]; tensor value_cache_internal_tensor_assign_11_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_11_begin_mask_0"), val = tensor([false, false, false, true])]; tensor value_cache_internal_tensor_assign_11_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_11_end_mask_0"), val = tensor([false, true, false, true])]; tensor value_cache_internal_tensor_assign_11_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_11_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor value_states_43_cast_fp16 = transpose(perm = value_states_43_perm_0, x = var_3111_cast_fp16)[name = string("transpose_40")]; tensor value_cache_internal_tensor_assign_11_cast_fp16 = slice_update(begin = concat_83, begin_mask = value_cache_internal_tensor_assign_11_begin_mask_0, end = concat_84, end_mask = value_cache_internal_tensor_assign_11_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_11_squeeze_mask_0, stride = value_cache_internal_tensor_assign_11_stride_0, update = value_states_43_cast_fp16, x = coreml_update_state_67)[name = string("value_cache_internal_tensor_assign_11_cast_fp16")]; write_state(data = value_cache_internal_tensor_assign_11_cast_fp16, input = value_cache)[name = string("coreml_update_state_187_write_state")]; tensor coreml_update_state_69 = read_state(input = value_cache)[name = string("coreml_update_state_187")]; tensor var_3175_begin_0 = const()[name = string("op_3175_begin_0"), val = tensor([10, 0, 0, 0])]; tensor var_3175_end_0 = const()[name = string("op_3175_end_0"), val = tensor([11, 2, 2048, 64])]; tensor var_3175_end_mask_0 = const()[name = string("op_3175_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_3175_cast_fp16 = slice_by_index(begin = var_3175_begin_0, end = var_3175_end_0, end_mask = var_3175_end_mask_0, x = coreml_update_state_68)[name = string("op_3175_cast_fp16")]; tensor tile_20 = const()[name = string("tile_20"), val = tensor([1, 1])]; int32 var_3178_axis_0 = const()[name = string("op_3178_axis_0"), val = int32(1)]; tensor var_3178_cast_fp16_0, tensor var_3178_cast_fp16_1 = split(axis = var_3178_axis_0, split_sizes = tile_20, x = var_3175_cast_fp16)[name = string("op_3178_cast_fp16")]; tensor var_3185_begin_0 = const()[name = string("op_3185_begin_0"), val = tensor([10, 0, 0, 0])]; tensor var_3185_end_0 = const()[name = string("op_3185_end_0"), val = tensor([11, 2, 2048, 64])]; tensor var_3185_end_mask_0 = const()[name = string("op_3185_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_3185_cast_fp16 = slice_by_index(begin = var_3185_begin_0, end = var_3185_end_0, end_mask = var_3185_end_mask_0, x = coreml_update_state_69)[name = string("op_3185_cast_fp16")]; tensor tile_21 = const()[name = string("tile_21"), val = tensor([1, 1])]; int32 var_3188_axis_0 = const()[name = string("op_3188_axis_0"), val = int32(1)]; tensor var_3188_cast_fp16_0, tensor var_3188_cast_fp16_1 = split(axis = var_3188_axis_0, split_sizes = tile_21, x = var_3185_cast_fp16)[name = string("op_3188_cast_fp16")]; tensor var_3191_split_sizes_0 = const()[name = string("op_3191_split_sizes_0"), val = tensor([8, 8])]; int32 var_3191_axis_0 = const()[name = string("op_3191_axis_0"), val = int32(1)]; tensor var_3191_cast_fp16_0, tensor var_3191_cast_fp16_1 = split(axis = var_3191_axis_0, split_sizes = var_3191_split_sizes_0, x = query_states_43_cast_fp16)[name = string("op_3191_cast_fp16")]; bool attn_weights_161_transpose_x_0 = const()[name = string("attn_weights_161_transpose_x_0"), val = bool(false)]; bool attn_weights_161_transpose_y_0 = const()[name = string("attn_weights_161_transpose_y_0"), val = bool(false)]; tensor attn_weights_161_cast_fp16 = matmul(transpose_x = attn_weights_161_transpose_x_0, transpose_y = attn_weights_161_transpose_y_0, x = var_3178_cast_fp16_0, y = var_3191_cast_fp16_0)[name = string("attn_weights_161_cast_fp16")]; fp16 _inversed_attn_weights_163_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_163_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_163_cast_fp16 = mul(x = attn_weights_161_cast_fp16, y = _inversed_attn_weights_163_y_0_to_fp16)[name = string("_inversed_attn_weights_163_cast_fp16")]; tensor attn_weights_165_cast_fp16 = add(x = _inversed_attn_weights_163_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_165_cast_fp16")]; int32 var_3198 = const()[name = string("op_3198"), val = int32(2)]; tensor attn_weights_167_cast_fp16 = softmax(axis = var_3198, x = attn_weights_165_cast_fp16)[name = string("attn_weights_167_cast_fp16")]; bool var_3204_transpose_x_1 = const()[name = string("op_3204_transpose_x_1"), val = bool(true)]; bool var_3204_transpose_y_1 = const()[name = string("op_3204_transpose_y_1"), val = bool(false)]; tensor var_3204_cast_fp16 = matmul(transpose_x = var_3204_transpose_x_1, transpose_y = var_3204_transpose_y_1, x = attn_weights_167_cast_fp16, y = var_3188_cast_fp16_0)[name = string("op_3204_cast_fp16")]; bool attn_weights_169_transpose_x_0 = const()[name = string("attn_weights_169_transpose_x_0"), val = bool(false)]; bool attn_weights_169_transpose_y_0 = const()[name = string("attn_weights_169_transpose_y_0"), val = bool(false)]; tensor attn_weights_169_cast_fp16 = matmul(transpose_x = attn_weights_169_transpose_x_0, transpose_y = attn_weights_169_transpose_y_0, x = var_3178_cast_fp16_1, y = var_3191_cast_fp16_1)[name = string("attn_weights_169_cast_fp16")]; fp16 _inversed_attn_weights_171_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_171_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_171_cast_fp16 = mul(x = attn_weights_169_cast_fp16, y = _inversed_attn_weights_171_y_0_to_fp16)[name = string("_inversed_attn_weights_171_cast_fp16")]; tensor attn_weights_173_cast_fp16 = add(x = _inversed_attn_weights_171_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_173_cast_fp16")]; int32 var_3210 = const()[name = string("op_3210"), val = int32(2)]; tensor attn_weights_175_cast_fp16 = softmax(axis = var_3210, x = attn_weights_173_cast_fp16)[name = string("attn_weights_175_cast_fp16")]; bool attn_output_61_transpose_x_1 = const()[name = string("attn_output_61_transpose_x_1"), val = bool(true)]; bool attn_output_61_transpose_y_1 = const()[name = string("attn_output_61_transpose_y_1"), val = bool(false)]; tensor attn_output_61_cast_fp16 = matmul(transpose_x = attn_output_61_transpose_x_1, transpose_y = attn_output_61_transpose_y_1, x = attn_weights_175_cast_fp16, y = var_3188_cast_fp16_1)[name = string("attn_output_61_cast_fp16")]; int32 var_3218 = const()[name = string("op_3218"), val = int32(1)]; bool attn_output_63_interleave_0 = const()[name = string("attn_output_63_interleave_0"), val = bool(false)]; tensor attn_output_63_cast_fp16 = concat(axis = var_3218, interleave = attn_output_63_interleave_0, values = (var_3204_cast_fp16, attn_output_61_cast_fp16))[name = string("attn_output_63_cast_fp16")]; tensor var_3222_perm_0 = const()[name = string("op_3222_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_3227 = const()[name = string("op_3227"), val = tensor([1, 1024, 1, 1])]; tensor var_3222_cast_fp16 = transpose(perm = var_3222_perm_0, x = attn_output_63_cast_fp16)[name = string("transpose_39")]; tensor x_185_cast_fp16 = reshape(shape = var_3227, x = var_3222_cast_fp16)[name = string("x_185_cast_fp16")]; string hidden_states_63_pad_type_0 = const()[name = string("hidden_states_63_pad_type_0"), val = string("valid")]; tensor hidden_states_63_strides_0 = const()[name = string("hidden_states_63_strides_0"), val = tensor([1, 1])]; tensor hidden_states_63_pad_0 = const()[name = string("hidden_states_63_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_63_dilations_0 = const()[name = string("hidden_states_63_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_63_groups_0 = const()[name = string("hidden_states_63_groups_0"), val = int32(1)]; tensor var_3234_to_fp16 = const()[name = string("op_3234_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(309958848)))]; tensor hidden_states_63_cast_fp16 = conv(dilations = hidden_states_63_dilations_0, groups = hidden_states_63_groups_0, pad = hidden_states_63_pad_0, pad_type = hidden_states_63_pad_type_0, strides = hidden_states_63_strides_0, weight = var_3234_to_fp16, x = x_185_cast_fp16)[name = string("hidden_states_63_cast_fp16")]; tensor x_187_cast_fp16 = add(x = x_179_cast_fp16, y = hidden_states_63_cast_fp16)[name = string("x_187_cast_fp16")]; int32 var_3246 = const()[name = string("op_3246"), val = int32(1)]; fp16 const_113_promoted_to_fp16 = const()[name = string("const_113_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_3249_cast_fp16 = mul(x = x_187_cast_fp16, y = const_113_promoted_to_fp16)[name = string("op_3249_cast_fp16")]; bool x_189_interleave_0 = const()[name = string("x_189_interleave_0"), val = bool(false)]; tensor x_189_cast_fp16 = concat(axis = var_3246, interleave = x_189_interleave_0, values = (x_187_cast_fp16, var_3249_cast_fp16))[name = string("x_189_cast_fp16")]; tensor out_127_axes_0 = const()[name = string("out_127_axes_0"), val = tensor([1])]; fp16 var_3259_to_fp16 = const()[name = string("op_3259_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_127_cast_fp16 = layer_norm(axes = out_127_axes_0, epsilon = var_3259_to_fp16, x = x_189_cast_fp16)[name = string("out_127_cast_fp16")]; tensor layer_layers_10_post_attention_layernorm_weight_to_fp16 = const()[name = string("layer_layers_10_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(312056064)))]; tensor out_129_cast_fp16 = mul(x = out_127_cast_fp16, y = layer_layers_10_post_attention_layernorm_weight_to_fp16)[name = string("out_129_cast_fp16")]; tensor var_3265_split_sizes_0 = const()[name = string("op_3265_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_3265_axis_0 = const()[name = string("op_3265_axis_0"), val = int32(1)]; tensor var_3265_cast_fp16_0, tensor var_3265_cast_fp16_1 = split(axis = var_3265_axis_0, split_sizes = var_3265_split_sizes_0, x = out_129_cast_fp16)[name = string("op_3265_cast_fp16")]; string input_21_pad_type_0 = const()[name = string("input_21_pad_type_0"), val = string("valid")]; tensor input_21_strides_0 = const()[name = string("input_21_strides_0"), val = tensor([1, 1])]; tensor input_21_pad_0 = const()[name = string("input_21_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_21_dilations_0 = const()[name = string("input_21_dilations_0"), val = tensor([1, 1])]; int32 input_21_groups_0 = const()[name = string("input_21_groups_0"), val = int32(1)]; tensor var_3270_to_fp16 = const()[name = string("op_3270_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(312060224)))]; tensor input_21_cast_fp16 = conv(dilations = input_21_dilations_0, groups = input_21_groups_0, pad = input_21_pad_0, pad_type = input_21_pad_type_0, strides = input_21_strides_0, weight = var_3270_to_fp16, x = var_3265_cast_fp16_0)[name = string("input_21_cast_fp16")]; tensor var_3281_cast_fp16 = silu(x = input_21_cast_fp16)[name = string("op_3281_cast_fp16")]; string var_3286_pad_type_0 = const()[name = string("op_3286_pad_type_0"), val = string("valid")]; tensor var_3286_strides_0 = const()[name = string("op_3286_strides_0"), val = tensor([1, 1])]; tensor var_3286_pad_0 = const()[name = string("op_3286_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_3286_dilations_0 = const()[name = string("op_3286_dilations_0"), val = tensor([1, 1])]; int32 var_3286_groups_0 = const()[name = string("op_3286_groups_0"), val = int32(1)]; tensor var_3269_to_fp16 = const()[name = string("op_3269_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(320448896)))]; tensor var_3286_cast_fp16 = conv(dilations = var_3286_dilations_0, groups = var_3286_groups_0, pad = var_3286_pad_0, pad_type = var_3286_pad_type_0, strides = var_3286_strides_0, weight = var_3269_to_fp16, x = var_3265_cast_fp16_0)[name = string("op_3286_cast_fp16")]; tensor x_195_cast_fp16 = mul(x = var_3281_cast_fp16, y = var_3286_cast_fp16)[name = string("x_195_cast_fp16")]; string hidden_states_65_pad_type_0 = const()[name = string("hidden_states_65_pad_type_0"), val = string("valid")]; tensor hidden_states_65_strides_0 = const()[name = string("hidden_states_65_strides_0"), val = tensor([1, 1])]; tensor hidden_states_65_pad_0 = const()[name = string("hidden_states_65_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_65_dilations_0 = const()[name = string("hidden_states_65_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_65_groups_0 = const()[name = string("hidden_states_65_groups_0"), val = int32(1)]; tensor var_3268_to_fp16 = const()[name = string("op_3268_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(328837568)))]; tensor hidden_states_65_cast_fp16 = conv(dilations = hidden_states_65_dilations_0, groups = hidden_states_65_groups_0, pad = hidden_states_65_pad_0, pad_type = hidden_states_65_pad_type_0, strides = hidden_states_65_strides_0, weight = var_3268_to_fp16, x = x_195_cast_fp16)[name = string("hidden_states_65_cast_fp16")]; tensor x_197_cast_fp16 = add(x = x_187_cast_fp16, y = hidden_states_65_cast_fp16)[name = string("x_197_cast_fp16")]; int32 var_3299 = const()[name = string("op_3299"), val = int32(1)]; fp16 const_114_promoted_to_fp16 = const()[name = string("const_114_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_3302_cast_fp16 = mul(x = x_197_cast_fp16, y = const_114_promoted_to_fp16)[name = string("op_3302_cast_fp16")]; bool x_199_interleave_0 = const()[name = string("x_199_interleave_0"), val = bool(false)]; tensor x_199_cast_fp16 = concat(axis = var_3299, interleave = x_199_interleave_0, values = (x_197_cast_fp16, var_3302_cast_fp16))[name = string("x_199_cast_fp16")]; tensor out_133_axes_0 = const()[name = string("out_133_axes_0"), val = tensor([1])]; fp16 var_3312_to_fp16 = const()[name = string("op_3312_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_133_cast_fp16 = layer_norm(axes = out_133_axes_0, epsilon = var_3312_to_fp16, x = x_199_cast_fp16)[name = string("out_133_cast_fp16")]; tensor layer_layers_11_input_layernorm_weight_to_fp16 = const()[name = string("layer_layers_11_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(337226240)))]; tensor out_135_cast_fp16 = mul(x = out_133_cast_fp16, y = layer_layers_11_input_layernorm_weight_to_fp16)[name = string("out_135_cast_fp16")]; tensor var_3318_split_sizes_0 = const()[name = string("op_3318_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_3318_axis_0 = const()[name = string("op_3318_axis_0"), val = int32(1)]; tensor var_3318_cast_fp16_0, tensor var_3318_cast_fp16_1 = split(axis = var_3318_axis_0, split_sizes = var_3318_split_sizes_0, x = out_135_cast_fp16)[name = string("op_3318_cast_fp16")]; string query_states_45_pad_type_0 = const()[name = string("query_states_45_pad_type_0"), val = string("valid")]; tensor query_states_45_strides_0 = const()[name = string("query_states_45_strides_0"), val = tensor([1, 1])]; tensor query_states_45_pad_0 = const()[name = string("query_states_45_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_states_45_dilations_0 = const()[name = string("query_states_45_dilations_0"), val = tensor([1, 1])]; int32 query_states_45_groups_0 = const()[name = string("query_states_45_groups_0"), val = int32(1)]; tensor var_3340_to_fp16 = const()[name = string("op_3340_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(337230400)))]; tensor query_states_45_cast_fp16 = conv(dilations = query_states_45_dilations_0, groups = query_states_45_groups_0, pad = query_states_45_pad_0, pad_type = query_states_45_pad_type_0, strides = query_states_45_strides_0, weight = var_3340_to_fp16, x = var_3318_cast_fp16_0)[name = string("query_states_45_cast_fp16")]; string key_states_45_pad_type_0 = const()[name = string("key_states_45_pad_type_0"), val = string("valid")]; tensor key_states_45_strides_0 = const()[name = string("key_states_45_strides_0"), val = tensor([1, 1])]; tensor key_states_45_pad_0 = const()[name = string("key_states_45_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_states_45_dilations_0 = const()[name = string("key_states_45_dilations_0"), val = tensor([1, 1])]; int32 key_states_45_groups_0 = const()[name = string("key_states_45_groups_0"), val = int32(1)]; tensor var_3351_to_fp16 = const()[name = string("op_3351_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(339327616)))]; tensor key_states_45_cast_fp16 = conv(dilations = key_states_45_dilations_0, groups = key_states_45_groups_0, pad = key_states_45_pad_0, pad_type = key_states_45_pad_type_0, strides = key_states_45_strides_0, weight = var_3351_to_fp16, x = var_3318_cast_fp16_0)[name = string("key_states_45_cast_fp16")]; string value_states_45_pad_type_0 = const()[name = string("value_states_45_pad_type_0"), val = string("valid")]; tensor value_states_45_strides_0 = const()[name = string("value_states_45_strides_0"), val = tensor([1, 1])]; tensor value_states_45_pad_0 = const()[name = string("value_states_45_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_states_45_dilations_0 = const()[name = string("value_states_45_dilations_0"), val = tensor([1, 1])]; int32 value_states_45_groups_0 = const()[name = string("value_states_45_groups_0"), val = int32(1)]; tensor var_3362_to_fp16 = const()[name = string("op_3362_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(339589824)))]; tensor value_states_45_cast_fp16 = conv(dilations = value_states_45_dilations_0, groups = value_states_45_groups_0, pad = value_states_45_pad_0, pad_type = value_states_45_pad_type_0, strides = value_states_45_strides_0, weight = var_3362_to_fp16, x = var_3318_cast_fp16_0)[name = string("value_states_45_cast_fp16")]; tensor var_3370 = const()[name = string("op_3370"), val = tensor([1, 16, 64, 1])]; tensor embed_45_cast_fp16 = reshape(shape = var_3370, x = query_states_45_cast_fp16)[name = string("embed_45_cast_fp16")]; tensor var_3374 = const()[name = string("op_3374"), val = tensor([1, 2, 64, 1])]; tensor var_3375_cast_fp16 = reshape(shape = var_3374, x = key_states_45_cast_fp16)[name = string("op_3375_cast_fp16")]; tensor embed_47_perm_0 = const()[name = string("embed_47_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_3381 = const()[name = string("op_3381"), val = tensor([1, 2, 64, 1])]; tensor var_3382_cast_fp16 = reshape(shape = var_3381, x = value_states_45_cast_fp16)[name = string("op_3382_cast_fp16")]; tensor value_states_47_perm_0 = const()[name = string("value_states_47_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_3386_cast_fp16 = mul(x = embed_45_cast_fp16, y = cos_1_cast_fp16)[name = string("op_3386_cast_fp16")]; tensor var_3387_split_sizes_0 = const()[name = string("op_3387_split_sizes_0"), val = tensor([32, 32])]; int32 var_3387_axis_0 = const()[name = string("op_3387_axis_0"), val = int32(-2)]; tensor var_3387_cast_fp16_0, tensor var_3387_cast_fp16_1 = split(axis = var_3387_axis_0, split_sizes = var_3387_split_sizes_0, x = embed_45_cast_fp16)[name = string("op_3387_cast_fp16")]; fp16 const_117_promoted_to_fp16 = const()[name = string("const_117_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_3389_cast_fp16 = mul(x = var_3387_cast_fp16_1, y = const_117_promoted_to_fp16)[name = string("op_3389_cast_fp16")]; int32 var_3391 = const()[name = string("op_3391"), val = int32(-2)]; bool var_3392_interleave_0 = const()[name = string("op_3392_interleave_0"), val = bool(false)]; tensor var_3392_cast_fp16 = concat(axis = var_3391, interleave = var_3392_interleave_0, values = (var_3389_cast_fp16, var_3387_cast_fp16_0))[name = string("op_3392_cast_fp16")]; tensor var_3393_cast_fp16 = mul(x = var_3392_cast_fp16, y = sin_1_cast_fp16)[name = string("op_3393_cast_fp16")]; tensor query_states_47_cast_fp16 = add(x = var_3386_cast_fp16, y = var_3393_cast_fp16)[name = string("query_states_47_cast_fp16")]; tensor embed_47_cast_fp16 = transpose(perm = embed_47_perm_0, x = var_3375_cast_fp16)[name = string("transpose_38")]; tensor var_3396_cast_fp16 = mul(x = embed_47_cast_fp16, y = cos_cast_fp16)[name = string("op_3396_cast_fp16")]; tensor var_3397_split_sizes_0 = const()[name = string("op_3397_split_sizes_0"), val = tensor([32, 32])]; int32 var_3397_axis_0 = const()[name = string("op_3397_axis_0"), val = int32(-1)]; tensor var_3397_cast_fp16_0, tensor var_3397_cast_fp16_1 = split(axis = var_3397_axis_0, split_sizes = var_3397_split_sizes_0, x = embed_47_cast_fp16)[name = string("op_3397_cast_fp16")]; fp16 const_118_promoted_to_fp16 = const()[name = string("const_118_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_3399_cast_fp16 = mul(x = var_3397_cast_fp16_1, y = const_118_promoted_to_fp16)[name = string("op_3399_cast_fp16")]; int32 var_3401 = const()[name = string("op_3401"), val = int32(-1)]; bool var_3402_interleave_0 = const()[name = string("op_3402_interleave_0"), val = bool(false)]; tensor var_3402_cast_fp16 = concat(axis = var_3401, interleave = var_3402_interleave_0, values = (var_3399_cast_fp16, var_3397_cast_fp16_0))[name = string("op_3402_cast_fp16")]; tensor var_3403_cast_fp16 = mul(x = var_3402_cast_fp16, y = sin_cast_fp16)[name = string("op_3403_cast_fp16")]; tensor key_states_47_cast_fp16 = add(x = var_3396_cast_fp16, y = var_3403_cast_fp16)[name = string("key_states_47_cast_fp16")]; tensor expand_dims_111 = const()[name = string("expand_dims_111"), val = tensor([11])]; tensor expand_dims_112 = const()[name = string("expand_dims_112"), val = tensor([0])]; tensor expand_dims_114 = const()[name = string("expand_dims_114"), val = tensor([12])]; tensor concat_91_values3_0 = const()[name = string("concat_91_values3_0"), val = tensor([0])]; int32 concat_91_axis_0 = const()[name = string("concat_91_axis_0"), val = int32(0)]; bool concat_91_interleave_0 = const()[name = string("concat_91_interleave_0"), val = bool(false)]; tensor concat_91 = concat(axis = concat_91_axis_0, interleave = concat_91_interleave_0, values = (expand_dims_111, expand_dims_112, position_id, concat_91_values3_0))[name = string("concat_91")]; tensor concat_92_values1_0 = const()[name = string("concat_92_values1_0"), val = tensor([0])]; tensor concat_92_values3_0 = const()[name = string("concat_92_values3_0"), val = tensor([0])]; int32 concat_92_axis_0 = const()[name = string("concat_92_axis_0"), val = int32(0)]; bool concat_92_interleave_0 = const()[name = string("concat_92_interleave_0"), val = bool(false)]; tensor concat_92 = concat(axis = concat_92_axis_0, interleave = concat_92_interleave_0, values = (expand_dims_114, concat_92_values1_0, var_426, concat_92_values3_0))[name = string("concat_92")]; tensor key_cache_internal_tensor_assign_12_stride_0 = const()[name = string("key_cache_internal_tensor_assign_12_stride_0"), val = tensor([1, 1, 1, 1])]; tensor key_cache_internal_tensor_assign_12_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_12_begin_mask_0"), val = tensor([false, false, false, true])]; tensor key_cache_internal_tensor_assign_12_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_12_end_mask_0"), val = tensor([false, true, false, true])]; tensor key_cache_internal_tensor_assign_12_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_12_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor key_cache_internal_tensor_assign_12_cast_fp16 = slice_update(begin = concat_91, begin_mask = key_cache_internal_tensor_assign_12_begin_mask_0, end = concat_92, end_mask = key_cache_internal_tensor_assign_12_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_12_squeeze_mask_0, stride = key_cache_internal_tensor_assign_12_stride_0, update = key_states_47_cast_fp16, x = coreml_update_state_68)[name = string("key_cache_internal_tensor_assign_12_cast_fp16")]; write_state(data = key_cache_internal_tensor_assign_12_cast_fp16, input = key_cache)[name = string("coreml_update_state_188_write_state")]; tensor coreml_update_state_70 = read_state(input = key_cache)[name = string("coreml_update_state_188")]; tensor value_cache_internal_tensor_assign_12_stride_0 = const()[name = string("value_cache_internal_tensor_assign_12_stride_0"), val = tensor([1, 1, 1, 1])]; tensor value_cache_internal_tensor_assign_12_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_12_begin_mask_0"), val = tensor([false, false, false, true])]; tensor value_cache_internal_tensor_assign_12_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_12_end_mask_0"), val = tensor([false, true, false, true])]; tensor value_cache_internal_tensor_assign_12_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_12_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor value_states_47_cast_fp16 = transpose(perm = value_states_47_perm_0, x = var_3382_cast_fp16)[name = string("transpose_37")]; tensor value_cache_internal_tensor_assign_12_cast_fp16 = slice_update(begin = concat_91, begin_mask = value_cache_internal_tensor_assign_12_begin_mask_0, end = concat_92, end_mask = value_cache_internal_tensor_assign_12_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_12_squeeze_mask_0, stride = value_cache_internal_tensor_assign_12_stride_0, update = value_states_47_cast_fp16, x = coreml_update_state_69)[name = string("value_cache_internal_tensor_assign_12_cast_fp16")]; write_state(data = value_cache_internal_tensor_assign_12_cast_fp16, input = value_cache)[name = string("coreml_update_state_189_write_state")]; tensor coreml_update_state_71 = read_state(input = value_cache)[name = string("coreml_update_state_189")]; tensor var_3446_begin_0 = const()[name = string("op_3446_begin_0"), val = tensor([11, 0, 0, 0])]; tensor var_3446_end_0 = const()[name = string("op_3446_end_0"), val = tensor([12, 2, 2048, 64])]; tensor var_3446_end_mask_0 = const()[name = string("op_3446_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_3446_cast_fp16 = slice_by_index(begin = var_3446_begin_0, end = var_3446_end_0, end_mask = var_3446_end_mask_0, x = coreml_update_state_70)[name = string("op_3446_cast_fp16")]; tensor tile_22 = const()[name = string("tile_22"), val = tensor([1, 1])]; int32 var_3449_axis_0 = const()[name = string("op_3449_axis_0"), val = int32(1)]; tensor var_3449_cast_fp16_0, tensor var_3449_cast_fp16_1 = split(axis = var_3449_axis_0, split_sizes = tile_22, x = var_3446_cast_fp16)[name = string("op_3449_cast_fp16")]; tensor var_3456_begin_0 = const()[name = string("op_3456_begin_0"), val = tensor([11, 0, 0, 0])]; tensor var_3456_end_0 = const()[name = string("op_3456_end_0"), val = tensor([12, 2, 2048, 64])]; tensor var_3456_end_mask_0 = const()[name = string("op_3456_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_3456_cast_fp16 = slice_by_index(begin = var_3456_begin_0, end = var_3456_end_0, end_mask = var_3456_end_mask_0, x = coreml_update_state_71)[name = string("op_3456_cast_fp16")]; tensor tile_23 = const()[name = string("tile_23"), val = tensor([1, 1])]; int32 var_3459_axis_0 = const()[name = string("op_3459_axis_0"), val = int32(1)]; tensor var_3459_cast_fp16_0, tensor var_3459_cast_fp16_1 = split(axis = var_3459_axis_0, split_sizes = tile_23, x = var_3456_cast_fp16)[name = string("op_3459_cast_fp16")]; tensor var_3462_split_sizes_0 = const()[name = string("op_3462_split_sizes_0"), val = tensor([8, 8])]; int32 var_3462_axis_0 = const()[name = string("op_3462_axis_0"), val = int32(1)]; tensor var_3462_cast_fp16_0, tensor var_3462_cast_fp16_1 = split(axis = var_3462_axis_0, split_sizes = var_3462_split_sizes_0, x = query_states_47_cast_fp16)[name = string("op_3462_cast_fp16")]; bool attn_weights_177_transpose_x_0 = const()[name = string("attn_weights_177_transpose_x_0"), val = bool(false)]; bool attn_weights_177_transpose_y_0 = const()[name = string("attn_weights_177_transpose_y_0"), val = bool(false)]; tensor attn_weights_177_cast_fp16 = matmul(transpose_x = attn_weights_177_transpose_x_0, transpose_y = attn_weights_177_transpose_y_0, x = var_3449_cast_fp16_0, y = var_3462_cast_fp16_0)[name = string("attn_weights_177_cast_fp16")]; fp16 _inversed_attn_weights_179_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_179_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_179_cast_fp16 = mul(x = attn_weights_177_cast_fp16, y = _inversed_attn_weights_179_y_0_to_fp16)[name = string("_inversed_attn_weights_179_cast_fp16")]; tensor attn_weights_181_cast_fp16 = add(x = _inversed_attn_weights_179_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_181_cast_fp16")]; int32 var_3469 = const()[name = string("op_3469"), val = int32(2)]; tensor attn_weights_183_cast_fp16 = softmax(axis = var_3469, x = attn_weights_181_cast_fp16)[name = string("attn_weights_183_cast_fp16")]; bool var_3475_transpose_x_1 = const()[name = string("op_3475_transpose_x_1"), val = bool(true)]; bool var_3475_transpose_y_1 = const()[name = string("op_3475_transpose_y_1"), val = bool(false)]; tensor var_3475_cast_fp16 = matmul(transpose_x = var_3475_transpose_x_1, transpose_y = var_3475_transpose_y_1, x = attn_weights_183_cast_fp16, y = var_3459_cast_fp16_0)[name = string("op_3475_cast_fp16")]; bool attn_weights_185_transpose_x_0 = const()[name = string("attn_weights_185_transpose_x_0"), val = bool(false)]; bool attn_weights_185_transpose_y_0 = const()[name = string("attn_weights_185_transpose_y_0"), val = bool(false)]; tensor attn_weights_185_cast_fp16 = matmul(transpose_x = attn_weights_185_transpose_x_0, transpose_y = attn_weights_185_transpose_y_0, x = var_3449_cast_fp16_1, y = var_3462_cast_fp16_1)[name = string("attn_weights_185_cast_fp16")]; fp16 _inversed_attn_weights_187_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_187_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_187_cast_fp16 = mul(x = attn_weights_185_cast_fp16, y = _inversed_attn_weights_187_y_0_to_fp16)[name = string("_inversed_attn_weights_187_cast_fp16")]; tensor attn_weights_189_cast_fp16 = add(x = _inversed_attn_weights_187_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_189_cast_fp16")]; int32 var_3481 = const()[name = string("op_3481"), val = int32(2)]; tensor attn_weights_191_cast_fp16 = softmax(axis = var_3481, x = attn_weights_189_cast_fp16)[name = string("attn_weights_191_cast_fp16")]; bool attn_output_67_transpose_x_1 = const()[name = string("attn_output_67_transpose_x_1"), val = bool(true)]; bool attn_output_67_transpose_y_1 = const()[name = string("attn_output_67_transpose_y_1"), val = bool(false)]; tensor attn_output_67_cast_fp16 = matmul(transpose_x = attn_output_67_transpose_x_1, transpose_y = attn_output_67_transpose_y_1, x = attn_weights_191_cast_fp16, y = var_3459_cast_fp16_1)[name = string("attn_output_67_cast_fp16")]; int32 var_3489 = const()[name = string("op_3489"), val = int32(1)]; bool attn_output_69_interleave_0 = const()[name = string("attn_output_69_interleave_0"), val = bool(false)]; tensor attn_output_69_cast_fp16 = concat(axis = var_3489, interleave = attn_output_69_interleave_0, values = (var_3475_cast_fp16, attn_output_67_cast_fp16))[name = string("attn_output_69_cast_fp16")]; tensor var_3493_perm_0 = const()[name = string("op_3493_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_3498 = const()[name = string("op_3498"), val = tensor([1, 1024, 1, 1])]; tensor var_3493_cast_fp16 = transpose(perm = var_3493_perm_0, x = attn_output_69_cast_fp16)[name = string("transpose_36")]; tensor x_203_cast_fp16 = reshape(shape = var_3498, x = var_3493_cast_fp16)[name = string("x_203_cast_fp16")]; string hidden_states_69_pad_type_0 = const()[name = string("hidden_states_69_pad_type_0"), val = string("valid")]; tensor hidden_states_69_strides_0 = const()[name = string("hidden_states_69_strides_0"), val = tensor([1, 1])]; tensor hidden_states_69_pad_0 = const()[name = string("hidden_states_69_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_69_dilations_0 = const()[name = string("hidden_states_69_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_69_groups_0 = const()[name = string("hidden_states_69_groups_0"), val = int32(1)]; tensor var_3505_to_fp16 = const()[name = string("op_3505_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(339852032)))]; tensor hidden_states_69_cast_fp16 = conv(dilations = hidden_states_69_dilations_0, groups = hidden_states_69_groups_0, pad = hidden_states_69_pad_0, pad_type = hidden_states_69_pad_type_0, strides = hidden_states_69_strides_0, weight = var_3505_to_fp16, x = x_203_cast_fp16)[name = string("hidden_states_69_cast_fp16")]; tensor x_205_cast_fp16 = add(x = x_197_cast_fp16, y = hidden_states_69_cast_fp16)[name = string("x_205_cast_fp16")]; int32 var_3517 = const()[name = string("op_3517"), val = int32(1)]; fp16 const_123_promoted_to_fp16 = const()[name = string("const_123_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_3520_cast_fp16 = mul(x = x_205_cast_fp16, y = const_123_promoted_to_fp16)[name = string("op_3520_cast_fp16")]; bool x_207_interleave_0 = const()[name = string("x_207_interleave_0"), val = bool(false)]; tensor x_207_cast_fp16 = concat(axis = var_3517, interleave = x_207_interleave_0, values = (x_205_cast_fp16, var_3520_cast_fp16))[name = string("x_207_cast_fp16")]; tensor out_139_axes_0 = const()[name = string("out_139_axes_0"), val = tensor([1])]; fp16 var_3530_to_fp16 = const()[name = string("op_3530_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_139_cast_fp16 = layer_norm(axes = out_139_axes_0, epsilon = var_3530_to_fp16, x = x_207_cast_fp16)[name = string("out_139_cast_fp16")]; tensor layer_layers_11_post_attention_layernorm_weight_to_fp16 = const()[name = string("layer_layers_11_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(341949248)))]; tensor out_141_cast_fp16 = mul(x = out_139_cast_fp16, y = layer_layers_11_post_attention_layernorm_weight_to_fp16)[name = string("out_141_cast_fp16")]; tensor var_3536_split_sizes_0 = const()[name = string("op_3536_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_3536_axis_0 = const()[name = string("op_3536_axis_0"), val = int32(1)]; tensor var_3536_cast_fp16_0, tensor var_3536_cast_fp16_1 = split(axis = var_3536_axis_0, split_sizes = var_3536_split_sizes_0, x = out_141_cast_fp16)[name = string("op_3536_cast_fp16")]; string input_23_pad_type_0 = const()[name = string("input_23_pad_type_0"), val = string("valid")]; tensor input_23_strides_0 = const()[name = string("input_23_strides_0"), val = tensor([1, 1])]; tensor input_23_pad_0 = const()[name = string("input_23_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_23_dilations_0 = const()[name = string("input_23_dilations_0"), val = tensor([1, 1])]; int32 input_23_groups_0 = const()[name = string("input_23_groups_0"), val = int32(1)]; tensor var_3541_to_fp16 = const()[name = string("op_3541_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(341953408)))]; tensor input_23_cast_fp16 = conv(dilations = input_23_dilations_0, groups = input_23_groups_0, pad = input_23_pad_0, pad_type = input_23_pad_type_0, strides = input_23_strides_0, weight = var_3541_to_fp16, x = var_3536_cast_fp16_0)[name = string("input_23_cast_fp16")]; tensor var_3552_cast_fp16 = silu(x = input_23_cast_fp16)[name = string("op_3552_cast_fp16")]; string var_3557_pad_type_0 = const()[name = string("op_3557_pad_type_0"), val = string("valid")]; tensor var_3557_strides_0 = const()[name = string("op_3557_strides_0"), val = tensor([1, 1])]; tensor var_3557_pad_0 = const()[name = string("op_3557_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_3557_dilations_0 = const()[name = string("op_3557_dilations_0"), val = tensor([1, 1])]; int32 var_3557_groups_0 = const()[name = string("op_3557_groups_0"), val = int32(1)]; tensor var_3540_to_fp16 = const()[name = string("op_3540_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(350342080)))]; tensor var_3557_cast_fp16 = conv(dilations = var_3557_dilations_0, groups = var_3557_groups_0, pad = var_3557_pad_0, pad_type = var_3557_pad_type_0, strides = var_3557_strides_0, weight = var_3540_to_fp16, x = var_3536_cast_fp16_0)[name = string("op_3557_cast_fp16")]; tensor x_213_cast_fp16 = mul(x = var_3552_cast_fp16, y = var_3557_cast_fp16)[name = string("x_213_cast_fp16")]; string hidden_states_71_pad_type_0 = const()[name = string("hidden_states_71_pad_type_0"), val = string("valid")]; tensor hidden_states_71_strides_0 = const()[name = string("hidden_states_71_strides_0"), val = tensor([1, 1])]; tensor hidden_states_71_pad_0 = const()[name = string("hidden_states_71_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_71_dilations_0 = const()[name = string("hidden_states_71_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_71_groups_0 = const()[name = string("hidden_states_71_groups_0"), val = int32(1)]; tensor var_3539_to_fp16 = const()[name = string("op_3539_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(358730752)))]; tensor hidden_states_71_cast_fp16 = conv(dilations = hidden_states_71_dilations_0, groups = hidden_states_71_groups_0, pad = hidden_states_71_pad_0, pad_type = hidden_states_71_pad_type_0, strides = hidden_states_71_strides_0, weight = var_3539_to_fp16, x = x_213_cast_fp16)[name = string("hidden_states_71_cast_fp16")]; tensor x_215_cast_fp16 = add(x = x_205_cast_fp16, y = hidden_states_71_cast_fp16)[name = string("x_215_cast_fp16")]; int32 var_3570 = const()[name = string("op_3570"), val = int32(1)]; fp16 const_124_promoted_to_fp16 = const()[name = string("const_124_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_3573_cast_fp16 = mul(x = x_215_cast_fp16, y = const_124_promoted_to_fp16)[name = string("op_3573_cast_fp16")]; bool x_217_interleave_0 = const()[name = string("x_217_interleave_0"), val = bool(false)]; tensor x_217_cast_fp16 = concat(axis = var_3570, interleave = x_217_interleave_0, values = (x_215_cast_fp16, var_3573_cast_fp16))[name = string("x_217_cast_fp16")]; tensor out_145_axes_0 = const()[name = string("out_145_axes_0"), val = tensor([1])]; fp16 var_3583_to_fp16 = const()[name = string("op_3583_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_145_cast_fp16 = layer_norm(axes = out_145_axes_0, epsilon = var_3583_to_fp16, x = x_217_cast_fp16)[name = string("out_145_cast_fp16")]; tensor layer_layers_12_input_layernorm_weight_to_fp16 = const()[name = string("layer_layers_12_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(367119424)))]; tensor out_147_cast_fp16 = mul(x = out_145_cast_fp16, y = layer_layers_12_input_layernorm_weight_to_fp16)[name = string("out_147_cast_fp16")]; tensor var_3589_split_sizes_0 = const()[name = string("op_3589_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_3589_axis_0 = const()[name = string("op_3589_axis_0"), val = int32(1)]; tensor var_3589_cast_fp16_0, tensor var_3589_cast_fp16_1 = split(axis = var_3589_axis_0, split_sizes = var_3589_split_sizes_0, x = out_147_cast_fp16)[name = string("op_3589_cast_fp16")]; string query_states_49_pad_type_0 = const()[name = string("query_states_49_pad_type_0"), val = string("valid")]; tensor query_states_49_strides_0 = const()[name = string("query_states_49_strides_0"), val = tensor([1, 1])]; tensor query_states_49_pad_0 = const()[name = string("query_states_49_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_states_49_dilations_0 = const()[name = string("query_states_49_dilations_0"), val = tensor([1, 1])]; int32 query_states_49_groups_0 = const()[name = string("query_states_49_groups_0"), val = int32(1)]; tensor var_3611_to_fp16 = const()[name = string("op_3611_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(367123584)))]; tensor query_states_49_cast_fp16 = conv(dilations = query_states_49_dilations_0, groups = query_states_49_groups_0, pad = query_states_49_pad_0, pad_type = query_states_49_pad_type_0, strides = query_states_49_strides_0, weight = var_3611_to_fp16, x = var_3589_cast_fp16_0)[name = string("query_states_49_cast_fp16")]; string key_states_49_pad_type_0 = const()[name = string("key_states_49_pad_type_0"), val = string("valid")]; tensor key_states_49_strides_0 = const()[name = string("key_states_49_strides_0"), val = tensor([1, 1])]; tensor key_states_49_pad_0 = const()[name = string("key_states_49_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_states_49_dilations_0 = const()[name = string("key_states_49_dilations_0"), val = tensor([1, 1])]; int32 key_states_49_groups_0 = const()[name = string("key_states_49_groups_0"), val = int32(1)]; tensor var_3622_to_fp16 = const()[name = string("op_3622_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(369220800)))]; tensor key_states_49_cast_fp16 = conv(dilations = key_states_49_dilations_0, groups = key_states_49_groups_0, pad = key_states_49_pad_0, pad_type = key_states_49_pad_type_0, strides = key_states_49_strides_0, weight = var_3622_to_fp16, x = var_3589_cast_fp16_0)[name = string("key_states_49_cast_fp16")]; string value_states_49_pad_type_0 = const()[name = string("value_states_49_pad_type_0"), val = string("valid")]; tensor value_states_49_strides_0 = const()[name = string("value_states_49_strides_0"), val = tensor([1, 1])]; tensor value_states_49_pad_0 = const()[name = string("value_states_49_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_states_49_dilations_0 = const()[name = string("value_states_49_dilations_0"), val = tensor([1, 1])]; int32 value_states_49_groups_0 = const()[name = string("value_states_49_groups_0"), val = int32(1)]; tensor var_3633_to_fp16 = const()[name = string("op_3633_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(369483008)))]; tensor value_states_49_cast_fp16 = conv(dilations = value_states_49_dilations_0, groups = value_states_49_groups_0, pad = value_states_49_pad_0, pad_type = value_states_49_pad_type_0, strides = value_states_49_strides_0, weight = var_3633_to_fp16, x = var_3589_cast_fp16_0)[name = string("value_states_49_cast_fp16")]; tensor var_3641 = const()[name = string("op_3641"), val = tensor([1, 16, 64, 1])]; tensor embed_49_cast_fp16 = reshape(shape = var_3641, x = query_states_49_cast_fp16)[name = string("embed_49_cast_fp16")]; tensor var_3645 = const()[name = string("op_3645"), val = tensor([1, 2, 64, 1])]; tensor var_3646_cast_fp16 = reshape(shape = var_3645, x = key_states_49_cast_fp16)[name = string("op_3646_cast_fp16")]; tensor embed_51_perm_0 = const()[name = string("embed_51_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_3652 = const()[name = string("op_3652"), val = tensor([1, 2, 64, 1])]; tensor var_3653_cast_fp16 = reshape(shape = var_3652, x = value_states_49_cast_fp16)[name = string("op_3653_cast_fp16")]; tensor value_states_51_perm_0 = const()[name = string("value_states_51_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_3657_cast_fp16 = mul(x = embed_49_cast_fp16, y = cos_1_cast_fp16)[name = string("op_3657_cast_fp16")]; tensor var_3658_split_sizes_0 = const()[name = string("op_3658_split_sizes_0"), val = tensor([32, 32])]; int32 var_3658_axis_0 = const()[name = string("op_3658_axis_0"), val = int32(-2)]; tensor var_3658_cast_fp16_0, tensor var_3658_cast_fp16_1 = split(axis = var_3658_axis_0, split_sizes = var_3658_split_sizes_0, x = embed_49_cast_fp16)[name = string("op_3658_cast_fp16")]; fp16 const_127_promoted_to_fp16 = const()[name = string("const_127_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_3660_cast_fp16 = mul(x = var_3658_cast_fp16_1, y = const_127_promoted_to_fp16)[name = string("op_3660_cast_fp16")]; int32 var_3662 = const()[name = string("op_3662"), val = int32(-2)]; bool var_3663_interleave_0 = const()[name = string("op_3663_interleave_0"), val = bool(false)]; tensor var_3663_cast_fp16 = concat(axis = var_3662, interleave = var_3663_interleave_0, values = (var_3660_cast_fp16, var_3658_cast_fp16_0))[name = string("op_3663_cast_fp16")]; tensor var_3664_cast_fp16 = mul(x = var_3663_cast_fp16, y = sin_1_cast_fp16)[name = string("op_3664_cast_fp16")]; tensor query_states_51_cast_fp16 = add(x = var_3657_cast_fp16, y = var_3664_cast_fp16)[name = string("query_states_51_cast_fp16")]; tensor embed_51_cast_fp16 = transpose(perm = embed_51_perm_0, x = var_3646_cast_fp16)[name = string("transpose_35")]; tensor var_3667_cast_fp16 = mul(x = embed_51_cast_fp16, y = cos_cast_fp16)[name = string("op_3667_cast_fp16")]; tensor var_3668_split_sizes_0 = const()[name = string("op_3668_split_sizes_0"), val = tensor([32, 32])]; int32 var_3668_axis_0 = const()[name = string("op_3668_axis_0"), val = int32(-1)]; tensor var_3668_cast_fp16_0, tensor var_3668_cast_fp16_1 = split(axis = var_3668_axis_0, split_sizes = var_3668_split_sizes_0, x = embed_51_cast_fp16)[name = string("op_3668_cast_fp16")]; fp16 const_128_promoted_to_fp16 = const()[name = string("const_128_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_3670_cast_fp16 = mul(x = var_3668_cast_fp16_1, y = const_128_promoted_to_fp16)[name = string("op_3670_cast_fp16")]; int32 var_3672 = const()[name = string("op_3672"), val = int32(-1)]; bool var_3673_interleave_0 = const()[name = string("op_3673_interleave_0"), val = bool(false)]; tensor var_3673_cast_fp16 = concat(axis = var_3672, interleave = var_3673_interleave_0, values = (var_3670_cast_fp16, var_3668_cast_fp16_0))[name = string("op_3673_cast_fp16")]; tensor var_3674_cast_fp16 = mul(x = var_3673_cast_fp16, y = sin_cast_fp16)[name = string("op_3674_cast_fp16")]; tensor key_states_51_cast_fp16 = add(x = var_3667_cast_fp16, y = var_3674_cast_fp16)[name = string("key_states_51_cast_fp16")]; tensor expand_dims_121 = const()[name = string("expand_dims_121"), val = tensor([12])]; tensor expand_dims_122 = const()[name = string("expand_dims_122"), val = tensor([0])]; tensor expand_dims_124 = const()[name = string("expand_dims_124"), val = tensor([13])]; tensor concat_99_values3_0 = const()[name = string("concat_99_values3_0"), val = tensor([0])]; int32 concat_99_axis_0 = const()[name = string("concat_99_axis_0"), val = int32(0)]; bool concat_99_interleave_0 = const()[name = string("concat_99_interleave_0"), val = bool(false)]; tensor concat_99 = concat(axis = concat_99_axis_0, interleave = concat_99_interleave_0, values = (expand_dims_121, expand_dims_122, position_id, concat_99_values3_0))[name = string("concat_99")]; tensor concat_100_values1_0 = const()[name = string("concat_100_values1_0"), val = tensor([0])]; tensor concat_100_values3_0 = const()[name = string("concat_100_values3_0"), val = tensor([0])]; int32 concat_100_axis_0 = const()[name = string("concat_100_axis_0"), val = int32(0)]; bool concat_100_interleave_0 = const()[name = string("concat_100_interleave_0"), val = bool(false)]; tensor concat_100 = concat(axis = concat_100_axis_0, interleave = concat_100_interleave_0, values = (expand_dims_124, concat_100_values1_0, var_426, concat_100_values3_0))[name = string("concat_100")]; tensor key_cache_internal_tensor_assign_13_stride_0 = const()[name = string("key_cache_internal_tensor_assign_13_stride_0"), val = tensor([1, 1, 1, 1])]; tensor key_cache_internal_tensor_assign_13_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_13_begin_mask_0"), val = tensor([false, false, false, true])]; tensor key_cache_internal_tensor_assign_13_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_13_end_mask_0"), val = tensor([false, true, false, true])]; tensor key_cache_internal_tensor_assign_13_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_13_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor key_cache_internal_tensor_assign_13_cast_fp16 = slice_update(begin = concat_99, begin_mask = key_cache_internal_tensor_assign_13_begin_mask_0, end = concat_100, end_mask = key_cache_internal_tensor_assign_13_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_13_squeeze_mask_0, stride = key_cache_internal_tensor_assign_13_stride_0, update = key_states_51_cast_fp16, x = coreml_update_state_70)[name = string("key_cache_internal_tensor_assign_13_cast_fp16")]; write_state(data = key_cache_internal_tensor_assign_13_cast_fp16, input = key_cache)[name = string("coreml_update_state_190_write_state")]; tensor coreml_update_state_72 = read_state(input = key_cache)[name = string("coreml_update_state_190")]; tensor value_cache_internal_tensor_assign_13_stride_0 = const()[name = string("value_cache_internal_tensor_assign_13_stride_0"), val = tensor([1, 1, 1, 1])]; tensor value_cache_internal_tensor_assign_13_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_13_begin_mask_0"), val = tensor([false, false, false, true])]; tensor value_cache_internal_tensor_assign_13_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_13_end_mask_0"), val = tensor([false, true, false, true])]; tensor value_cache_internal_tensor_assign_13_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_13_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor value_states_51_cast_fp16 = transpose(perm = value_states_51_perm_0, x = var_3653_cast_fp16)[name = string("transpose_34")]; tensor value_cache_internal_tensor_assign_13_cast_fp16 = slice_update(begin = concat_99, begin_mask = value_cache_internal_tensor_assign_13_begin_mask_0, end = concat_100, end_mask = value_cache_internal_tensor_assign_13_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_13_squeeze_mask_0, stride = value_cache_internal_tensor_assign_13_stride_0, update = value_states_51_cast_fp16, x = coreml_update_state_71)[name = string("value_cache_internal_tensor_assign_13_cast_fp16")]; write_state(data = value_cache_internal_tensor_assign_13_cast_fp16, input = value_cache)[name = string("coreml_update_state_191_write_state")]; tensor coreml_update_state_73 = read_state(input = value_cache)[name = string("coreml_update_state_191")]; tensor var_3717_begin_0 = const()[name = string("op_3717_begin_0"), val = tensor([12, 0, 0, 0])]; tensor var_3717_end_0 = const()[name = string("op_3717_end_0"), val = tensor([13, 2, 2048, 64])]; tensor var_3717_end_mask_0 = const()[name = string("op_3717_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_3717_cast_fp16 = slice_by_index(begin = var_3717_begin_0, end = var_3717_end_0, end_mask = var_3717_end_mask_0, x = coreml_update_state_72)[name = string("op_3717_cast_fp16")]; tensor tile_24 = const()[name = string("tile_24"), val = tensor([1, 1])]; int32 var_3720_axis_0 = const()[name = string("op_3720_axis_0"), val = int32(1)]; tensor var_3720_cast_fp16_0, tensor var_3720_cast_fp16_1 = split(axis = var_3720_axis_0, split_sizes = tile_24, x = var_3717_cast_fp16)[name = string("op_3720_cast_fp16")]; tensor var_3727_begin_0 = const()[name = string("op_3727_begin_0"), val = tensor([12, 0, 0, 0])]; tensor var_3727_end_0 = const()[name = string("op_3727_end_0"), val = tensor([13, 2, 2048, 64])]; tensor var_3727_end_mask_0 = const()[name = string("op_3727_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_3727_cast_fp16 = slice_by_index(begin = var_3727_begin_0, end = var_3727_end_0, end_mask = var_3727_end_mask_0, x = coreml_update_state_73)[name = string("op_3727_cast_fp16")]; tensor tile_25 = const()[name = string("tile_25"), val = tensor([1, 1])]; int32 var_3730_axis_0 = const()[name = string("op_3730_axis_0"), val = int32(1)]; tensor var_3730_cast_fp16_0, tensor var_3730_cast_fp16_1 = split(axis = var_3730_axis_0, split_sizes = tile_25, x = var_3727_cast_fp16)[name = string("op_3730_cast_fp16")]; tensor var_3733_split_sizes_0 = const()[name = string("op_3733_split_sizes_0"), val = tensor([8, 8])]; int32 var_3733_axis_0 = const()[name = string("op_3733_axis_0"), val = int32(1)]; tensor var_3733_cast_fp16_0, tensor var_3733_cast_fp16_1 = split(axis = var_3733_axis_0, split_sizes = var_3733_split_sizes_0, x = query_states_51_cast_fp16)[name = string("op_3733_cast_fp16")]; bool attn_weights_193_transpose_x_0 = const()[name = string("attn_weights_193_transpose_x_0"), val = bool(false)]; bool attn_weights_193_transpose_y_0 = const()[name = string("attn_weights_193_transpose_y_0"), val = bool(false)]; tensor attn_weights_193_cast_fp16 = matmul(transpose_x = attn_weights_193_transpose_x_0, transpose_y = attn_weights_193_transpose_y_0, x = var_3720_cast_fp16_0, y = var_3733_cast_fp16_0)[name = string("attn_weights_193_cast_fp16")]; fp16 _inversed_attn_weights_195_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_195_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_195_cast_fp16 = mul(x = attn_weights_193_cast_fp16, y = _inversed_attn_weights_195_y_0_to_fp16)[name = string("_inversed_attn_weights_195_cast_fp16")]; tensor attn_weights_197_cast_fp16 = add(x = _inversed_attn_weights_195_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_197_cast_fp16")]; int32 var_3740 = const()[name = string("op_3740"), val = int32(2)]; tensor attn_weights_199_cast_fp16 = softmax(axis = var_3740, x = attn_weights_197_cast_fp16)[name = string("attn_weights_199_cast_fp16")]; bool var_3746_transpose_x_1 = const()[name = string("op_3746_transpose_x_1"), val = bool(true)]; bool var_3746_transpose_y_1 = const()[name = string("op_3746_transpose_y_1"), val = bool(false)]; tensor var_3746_cast_fp16 = matmul(transpose_x = var_3746_transpose_x_1, transpose_y = var_3746_transpose_y_1, x = attn_weights_199_cast_fp16, y = var_3730_cast_fp16_0)[name = string("op_3746_cast_fp16")]; bool attn_weights_201_transpose_x_0 = const()[name = string("attn_weights_201_transpose_x_0"), val = bool(false)]; bool attn_weights_201_transpose_y_0 = const()[name = string("attn_weights_201_transpose_y_0"), val = bool(false)]; tensor attn_weights_201_cast_fp16 = matmul(transpose_x = attn_weights_201_transpose_x_0, transpose_y = attn_weights_201_transpose_y_0, x = var_3720_cast_fp16_1, y = var_3733_cast_fp16_1)[name = string("attn_weights_201_cast_fp16")]; fp16 _inversed_attn_weights_203_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_203_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_203_cast_fp16 = mul(x = attn_weights_201_cast_fp16, y = _inversed_attn_weights_203_y_0_to_fp16)[name = string("_inversed_attn_weights_203_cast_fp16")]; tensor attn_weights_205_cast_fp16 = add(x = _inversed_attn_weights_203_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_205_cast_fp16")]; int32 var_3752 = const()[name = string("op_3752"), val = int32(2)]; tensor attn_weights_207_cast_fp16 = softmax(axis = var_3752, x = attn_weights_205_cast_fp16)[name = string("attn_weights_207_cast_fp16")]; bool attn_output_73_transpose_x_1 = const()[name = string("attn_output_73_transpose_x_1"), val = bool(true)]; bool attn_output_73_transpose_y_1 = const()[name = string("attn_output_73_transpose_y_1"), val = bool(false)]; tensor attn_output_73_cast_fp16 = matmul(transpose_x = attn_output_73_transpose_x_1, transpose_y = attn_output_73_transpose_y_1, x = attn_weights_207_cast_fp16, y = var_3730_cast_fp16_1)[name = string("attn_output_73_cast_fp16")]; int32 var_3760 = const()[name = string("op_3760"), val = int32(1)]; bool attn_output_75_interleave_0 = const()[name = string("attn_output_75_interleave_0"), val = bool(false)]; tensor attn_output_75_cast_fp16 = concat(axis = var_3760, interleave = attn_output_75_interleave_0, values = (var_3746_cast_fp16, attn_output_73_cast_fp16))[name = string("attn_output_75_cast_fp16")]; tensor var_3764_perm_0 = const()[name = string("op_3764_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_3769 = const()[name = string("op_3769"), val = tensor([1, 1024, 1, 1])]; tensor var_3764_cast_fp16 = transpose(perm = var_3764_perm_0, x = attn_output_75_cast_fp16)[name = string("transpose_33")]; tensor x_221_cast_fp16 = reshape(shape = var_3769, x = var_3764_cast_fp16)[name = string("x_221_cast_fp16")]; string hidden_states_75_pad_type_0 = const()[name = string("hidden_states_75_pad_type_0"), val = string("valid")]; tensor hidden_states_75_strides_0 = const()[name = string("hidden_states_75_strides_0"), val = tensor([1, 1])]; tensor hidden_states_75_pad_0 = const()[name = string("hidden_states_75_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_75_dilations_0 = const()[name = string("hidden_states_75_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_75_groups_0 = const()[name = string("hidden_states_75_groups_0"), val = int32(1)]; tensor var_3776_to_fp16 = const()[name = string("op_3776_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(369745216)))]; tensor hidden_states_75_cast_fp16 = conv(dilations = hidden_states_75_dilations_0, groups = hidden_states_75_groups_0, pad = hidden_states_75_pad_0, pad_type = hidden_states_75_pad_type_0, strides = hidden_states_75_strides_0, weight = var_3776_to_fp16, x = x_221_cast_fp16)[name = string("hidden_states_75_cast_fp16")]; tensor x_223_cast_fp16 = add(x = x_215_cast_fp16, y = hidden_states_75_cast_fp16)[name = string("x_223_cast_fp16")]; int32 var_3788 = const()[name = string("op_3788"), val = int32(1)]; fp16 const_133_promoted_to_fp16 = const()[name = string("const_133_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_3791_cast_fp16 = mul(x = x_223_cast_fp16, y = const_133_promoted_to_fp16)[name = string("op_3791_cast_fp16")]; bool x_225_interleave_0 = const()[name = string("x_225_interleave_0"), val = bool(false)]; tensor x_225_cast_fp16 = concat(axis = var_3788, interleave = x_225_interleave_0, values = (x_223_cast_fp16, var_3791_cast_fp16))[name = string("x_225_cast_fp16")]; tensor out_151_axes_0 = const()[name = string("out_151_axes_0"), val = tensor([1])]; fp16 var_3801_to_fp16 = const()[name = string("op_3801_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_151_cast_fp16 = layer_norm(axes = out_151_axes_0, epsilon = var_3801_to_fp16, x = x_225_cast_fp16)[name = string("out_151_cast_fp16")]; tensor layer_layers_12_post_attention_layernorm_weight_to_fp16 = const()[name = string("layer_layers_12_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(371842432)))]; tensor out_153_cast_fp16 = mul(x = out_151_cast_fp16, y = layer_layers_12_post_attention_layernorm_weight_to_fp16)[name = string("out_153_cast_fp16")]; tensor var_3807_split_sizes_0 = const()[name = string("op_3807_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_3807_axis_0 = const()[name = string("op_3807_axis_0"), val = int32(1)]; tensor var_3807_cast_fp16_0, tensor var_3807_cast_fp16_1 = split(axis = var_3807_axis_0, split_sizes = var_3807_split_sizes_0, x = out_153_cast_fp16)[name = string("op_3807_cast_fp16")]; string input_25_pad_type_0 = const()[name = string("input_25_pad_type_0"), val = string("valid")]; tensor input_25_strides_0 = const()[name = string("input_25_strides_0"), val = tensor([1, 1])]; tensor input_25_pad_0 = const()[name = string("input_25_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_25_dilations_0 = const()[name = string("input_25_dilations_0"), val = tensor([1, 1])]; int32 input_25_groups_0 = const()[name = string("input_25_groups_0"), val = int32(1)]; tensor var_3812_to_fp16 = const()[name = string("op_3812_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(371846592)))]; tensor input_25_cast_fp16 = conv(dilations = input_25_dilations_0, groups = input_25_groups_0, pad = input_25_pad_0, pad_type = input_25_pad_type_0, strides = input_25_strides_0, weight = var_3812_to_fp16, x = var_3807_cast_fp16_0)[name = string("input_25_cast_fp16")]; tensor var_3823_cast_fp16 = silu(x = input_25_cast_fp16)[name = string("op_3823_cast_fp16")]; string var_3828_pad_type_0 = const()[name = string("op_3828_pad_type_0"), val = string("valid")]; tensor var_3828_strides_0 = const()[name = string("op_3828_strides_0"), val = tensor([1, 1])]; tensor var_3828_pad_0 = const()[name = string("op_3828_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_3828_dilations_0 = const()[name = string("op_3828_dilations_0"), val = tensor([1, 1])]; int32 var_3828_groups_0 = const()[name = string("op_3828_groups_0"), val = int32(1)]; tensor var_3811_to_fp16 = const()[name = string("op_3811_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(380235264)))]; tensor var_3828_cast_fp16 = conv(dilations = var_3828_dilations_0, groups = var_3828_groups_0, pad = var_3828_pad_0, pad_type = var_3828_pad_type_0, strides = var_3828_strides_0, weight = var_3811_to_fp16, x = var_3807_cast_fp16_0)[name = string("op_3828_cast_fp16")]; tensor x_231_cast_fp16 = mul(x = var_3823_cast_fp16, y = var_3828_cast_fp16)[name = string("x_231_cast_fp16")]; string hidden_states_77_pad_type_0 = const()[name = string("hidden_states_77_pad_type_0"), val = string("valid")]; tensor hidden_states_77_strides_0 = const()[name = string("hidden_states_77_strides_0"), val = tensor([1, 1])]; tensor hidden_states_77_pad_0 = const()[name = string("hidden_states_77_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_77_dilations_0 = const()[name = string("hidden_states_77_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_77_groups_0 = const()[name = string("hidden_states_77_groups_0"), val = int32(1)]; tensor var_3810_to_fp16 = const()[name = string("op_3810_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(388623936)))]; tensor hidden_states_77_cast_fp16 = conv(dilations = hidden_states_77_dilations_0, groups = hidden_states_77_groups_0, pad = hidden_states_77_pad_0, pad_type = hidden_states_77_pad_type_0, strides = hidden_states_77_strides_0, weight = var_3810_to_fp16, x = x_231_cast_fp16)[name = string("hidden_states_77_cast_fp16")]; tensor x_233_cast_fp16 = add(x = x_223_cast_fp16, y = hidden_states_77_cast_fp16)[name = string("x_233_cast_fp16")]; int32 var_3841 = const()[name = string("op_3841"), val = int32(1)]; fp16 const_134_promoted_to_fp16 = const()[name = string("const_134_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_3844_cast_fp16 = mul(x = x_233_cast_fp16, y = const_134_promoted_to_fp16)[name = string("op_3844_cast_fp16")]; bool x_235_interleave_0 = const()[name = string("x_235_interleave_0"), val = bool(false)]; tensor x_235_cast_fp16 = concat(axis = var_3841, interleave = x_235_interleave_0, values = (x_233_cast_fp16, var_3844_cast_fp16))[name = string("x_235_cast_fp16")]; tensor out_157_axes_0 = const()[name = string("out_157_axes_0"), val = tensor([1])]; fp16 var_3854_to_fp16 = const()[name = string("op_3854_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_157_cast_fp16 = layer_norm(axes = out_157_axes_0, epsilon = var_3854_to_fp16, x = x_235_cast_fp16)[name = string("out_157_cast_fp16")]; tensor layer_layers_13_input_layernorm_weight_to_fp16 = const()[name = string("layer_layers_13_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(397012608)))]; tensor out_159_cast_fp16 = mul(x = out_157_cast_fp16, y = layer_layers_13_input_layernorm_weight_to_fp16)[name = string("out_159_cast_fp16")]; tensor var_3860_split_sizes_0 = const()[name = string("op_3860_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_3860_axis_0 = const()[name = string("op_3860_axis_0"), val = int32(1)]; tensor var_3860_cast_fp16_0, tensor var_3860_cast_fp16_1 = split(axis = var_3860_axis_0, split_sizes = var_3860_split_sizes_0, x = out_159_cast_fp16)[name = string("op_3860_cast_fp16")]; string query_states_53_pad_type_0 = const()[name = string("query_states_53_pad_type_0"), val = string("valid")]; tensor query_states_53_strides_0 = const()[name = string("query_states_53_strides_0"), val = tensor([1, 1])]; tensor query_states_53_pad_0 = const()[name = string("query_states_53_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_states_53_dilations_0 = const()[name = string("query_states_53_dilations_0"), val = tensor([1, 1])]; int32 query_states_53_groups_0 = const()[name = string("query_states_53_groups_0"), val = int32(1)]; tensor var_3882_to_fp16 = const()[name = string("op_3882_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(397016768)))]; tensor query_states_53_cast_fp16 = conv(dilations = query_states_53_dilations_0, groups = query_states_53_groups_0, pad = query_states_53_pad_0, pad_type = query_states_53_pad_type_0, strides = query_states_53_strides_0, weight = var_3882_to_fp16, x = var_3860_cast_fp16_0)[name = string("query_states_53_cast_fp16")]; string key_states_53_pad_type_0 = const()[name = string("key_states_53_pad_type_0"), val = string("valid")]; tensor key_states_53_strides_0 = const()[name = string("key_states_53_strides_0"), val = tensor([1, 1])]; tensor key_states_53_pad_0 = const()[name = string("key_states_53_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_states_53_dilations_0 = const()[name = string("key_states_53_dilations_0"), val = tensor([1, 1])]; int32 key_states_53_groups_0 = const()[name = string("key_states_53_groups_0"), val = int32(1)]; tensor var_3893_to_fp16 = const()[name = string("op_3893_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(399113984)))]; tensor key_states_53_cast_fp16 = conv(dilations = key_states_53_dilations_0, groups = key_states_53_groups_0, pad = key_states_53_pad_0, pad_type = key_states_53_pad_type_0, strides = key_states_53_strides_0, weight = var_3893_to_fp16, x = var_3860_cast_fp16_0)[name = string("key_states_53_cast_fp16")]; string value_states_53_pad_type_0 = const()[name = string("value_states_53_pad_type_0"), val = string("valid")]; tensor value_states_53_strides_0 = const()[name = string("value_states_53_strides_0"), val = tensor([1, 1])]; tensor value_states_53_pad_0 = const()[name = string("value_states_53_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_states_53_dilations_0 = const()[name = string("value_states_53_dilations_0"), val = tensor([1, 1])]; int32 value_states_53_groups_0 = const()[name = string("value_states_53_groups_0"), val = int32(1)]; tensor var_3904_to_fp16 = const()[name = string("op_3904_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(399376192)))]; tensor value_states_53_cast_fp16 = conv(dilations = value_states_53_dilations_0, groups = value_states_53_groups_0, pad = value_states_53_pad_0, pad_type = value_states_53_pad_type_0, strides = value_states_53_strides_0, weight = var_3904_to_fp16, x = var_3860_cast_fp16_0)[name = string("value_states_53_cast_fp16")]; tensor var_3912 = const()[name = string("op_3912"), val = tensor([1, 16, 64, 1])]; tensor embed_53_cast_fp16 = reshape(shape = var_3912, x = query_states_53_cast_fp16)[name = string("embed_53_cast_fp16")]; tensor var_3916 = const()[name = string("op_3916"), val = tensor([1, 2, 64, 1])]; tensor var_3917_cast_fp16 = reshape(shape = var_3916, x = key_states_53_cast_fp16)[name = string("op_3917_cast_fp16")]; tensor embed_55_perm_0 = const()[name = string("embed_55_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_3923 = const()[name = string("op_3923"), val = tensor([1, 2, 64, 1])]; tensor var_3924_cast_fp16 = reshape(shape = var_3923, x = value_states_53_cast_fp16)[name = string("op_3924_cast_fp16")]; tensor value_states_55_perm_0 = const()[name = string("value_states_55_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_3928_cast_fp16 = mul(x = embed_53_cast_fp16, y = cos_1_cast_fp16)[name = string("op_3928_cast_fp16")]; tensor var_3929_split_sizes_0 = const()[name = string("op_3929_split_sizes_0"), val = tensor([32, 32])]; int32 var_3929_axis_0 = const()[name = string("op_3929_axis_0"), val = int32(-2)]; tensor var_3929_cast_fp16_0, tensor var_3929_cast_fp16_1 = split(axis = var_3929_axis_0, split_sizes = var_3929_split_sizes_0, x = embed_53_cast_fp16)[name = string("op_3929_cast_fp16")]; fp16 const_137_promoted_to_fp16 = const()[name = string("const_137_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_3931_cast_fp16 = mul(x = var_3929_cast_fp16_1, y = const_137_promoted_to_fp16)[name = string("op_3931_cast_fp16")]; int32 var_3933 = const()[name = string("op_3933"), val = int32(-2)]; bool var_3934_interleave_0 = const()[name = string("op_3934_interleave_0"), val = bool(false)]; tensor var_3934_cast_fp16 = concat(axis = var_3933, interleave = var_3934_interleave_0, values = (var_3931_cast_fp16, var_3929_cast_fp16_0))[name = string("op_3934_cast_fp16")]; tensor var_3935_cast_fp16 = mul(x = var_3934_cast_fp16, y = sin_1_cast_fp16)[name = string("op_3935_cast_fp16")]; tensor query_states_55_cast_fp16 = add(x = var_3928_cast_fp16, y = var_3935_cast_fp16)[name = string("query_states_55_cast_fp16")]; tensor embed_55_cast_fp16 = transpose(perm = embed_55_perm_0, x = var_3917_cast_fp16)[name = string("transpose_32")]; tensor var_3938_cast_fp16 = mul(x = embed_55_cast_fp16, y = cos_cast_fp16)[name = string("op_3938_cast_fp16")]; tensor var_3939_split_sizes_0 = const()[name = string("op_3939_split_sizes_0"), val = tensor([32, 32])]; int32 var_3939_axis_0 = const()[name = string("op_3939_axis_0"), val = int32(-1)]; tensor var_3939_cast_fp16_0, tensor var_3939_cast_fp16_1 = split(axis = var_3939_axis_0, split_sizes = var_3939_split_sizes_0, x = embed_55_cast_fp16)[name = string("op_3939_cast_fp16")]; fp16 const_138_promoted_to_fp16 = const()[name = string("const_138_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_3941_cast_fp16 = mul(x = var_3939_cast_fp16_1, y = const_138_promoted_to_fp16)[name = string("op_3941_cast_fp16")]; int32 var_3943 = const()[name = string("op_3943"), val = int32(-1)]; bool var_3944_interleave_0 = const()[name = string("op_3944_interleave_0"), val = bool(false)]; tensor var_3944_cast_fp16 = concat(axis = var_3943, interleave = var_3944_interleave_0, values = (var_3941_cast_fp16, var_3939_cast_fp16_0))[name = string("op_3944_cast_fp16")]; tensor var_3945_cast_fp16 = mul(x = var_3944_cast_fp16, y = sin_cast_fp16)[name = string("op_3945_cast_fp16")]; tensor key_states_55_cast_fp16 = add(x = var_3938_cast_fp16, y = var_3945_cast_fp16)[name = string("key_states_55_cast_fp16")]; tensor expand_dims_131 = const()[name = string("expand_dims_131"), val = tensor([13])]; tensor expand_dims_132 = const()[name = string("expand_dims_132"), val = tensor([0])]; tensor expand_dims_134 = const()[name = string("expand_dims_134"), val = tensor([14])]; tensor concat_107_values3_0 = const()[name = string("concat_107_values3_0"), val = tensor([0])]; int32 concat_107_axis_0 = const()[name = string("concat_107_axis_0"), val = int32(0)]; bool concat_107_interleave_0 = const()[name = string("concat_107_interleave_0"), val = bool(false)]; tensor concat_107 = concat(axis = concat_107_axis_0, interleave = concat_107_interleave_0, values = (expand_dims_131, expand_dims_132, position_id, concat_107_values3_0))[name = string("concat_107")]; tensor concat_108_values1_0 = const()[name = string("concat_108_values1_0"), val = tensor([0])]; tensor concat_108_values3_0 = const()[name = string("concat_108_values3_0"), val = tensor([0])]; int32 concat_108_axis_0 = const()[name = string("concat_108_axis_0"), val = int32(0)]; bool concat_108_interleave_0 = const()[name = string("concat_108_interleave_0"), val = bool(false)]; tensor concat_108 = concat(axis = concat_108_axis_0, interleave = concat_108_interleave_0, values = (expand_dims_134, concat_108_values1_0, var_426, concat_108_values3_0))[name = string("concat_108")]; tensor key_cache_internal_tensor_assign_14_stride_0 = const()[name = string("key_cache_internal_tensor_assign_14_stride_0"), val = tensor([1, 1, 1, 1])]; tensor key_cache_internal_tensor_assign_14_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_14_begin_mask_0"), val = tensor([false, false, false, true])]; tensor key_cache_internal_tensor_assign_14_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_14_end_mask_0"), val = tensor([false, true, false, true])]; tensor key_cache_internal_tensor_assign_14_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_14_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor key_cache_internal_tensor_assign_14_cast_fp16 = slice_update(begin = concat_107, begin_mask = key_cache_internal_tensor_assign_14_begin_mask_0, end = concat_108, end_mask = key_cache_internal_tensor_assign_14_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_14_squeeze_mask_0, stride = key_cache_internal_tensor_assign_14_stride_0, update = key_states_55_cast_fp16, x = coreml_update_state_72)[name = string("key_cache_internal_tensor_assign_14_cast_fp16")]; write_state(data = key_cache_internal_tensor_assign_14_cast_fp16, input = key_cache)[name = string("coreml_update_state_192_write_state")]; tensor coreml_update_state_74 = read_state(input = key_cache)[name = string("coreml_update_state_192")]; tensor value_cache_internal_tensor_assign_14_stride_0 = const()[name = string("value_cache_internal_tensor_assign_14_stride_0"), val = tensor([1, 1, 1, 1])]; tensor value_cache_internal_tensor_assign_14_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_14_begin_mask_0"), val = tensor([false, false, false, true])]; tensor value_cache_internal_tensor_assign_14_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_14_end_mask_0"), val = tensor([false, true, false, true])]; tensor value_cache_internal_tensor_assign_14_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_14_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor value_states_55_cast_fp16 = transpose(perm = value_states_55_perm_0, x = var_3924_cast_fp16)[name = string("transpose_31")]; tensor value_cache_internal_tensor_assign_14_cast_fp16 = slice_update(begin = concat_107, begin_mask = value_cache_internal_tensor_assign_14_begin_mask_0, end = concat_108, end_mask = value_cache_internal_tensor_assign_14_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_14_squeeze_mask_0, stride = value_cache_internal_tensor_assign_14_stride_0, update = value_states_55_cast_fp16, x = coreml_update_state_73)[name = string("value_cache_internal_tensor_assign_14_cast_fp16")]; write_state(data = value_cache_internal_tensor_assign_14_cast_fp16, input = value_cache)[name = string("coreml_update_state_193_write_state")]; tensor coreml_update_state_75 = read_state(input = value_cache)[name = string("coreml_update_state_193")]; tensor var_3988_begin_0 = const()[name = string("op_3988_begin_0"), val = tensor([13, 0, 0, 0])]; tensor var_3988_end_0 = const()[name = string("op_3988_end_0"), val = tensor([14, 2, 2048, 64])]; tensor var_3988_end_mask_0 = const()[name = string("op_3988_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_3988_cast_fp16 = slice_by_index(begin = var_3988_begin_0, end = var_3988_end_0, end_mask = var_3988_end_mask_0, x = coreml_update_state_74)[name = string("op_3988_cast_fp16")]; tensor tile_26 = const()[name = string("tile_26"), val = tensor([1, 1])]; int32 var_3991_axis_0 = const()[name = string("op_3991_axis_0"), val = int32(1)]; tensor var_3991_cast_fp16_0, tensor var_3991_cast_fp16_1 = split(axis = var_3991_axis_0, split_sizes = tile_26, x = var_3988_cast_fp16)[name = string("op_3991_cast_fp16")]; tensor var_3998_begin_0 = const()[name = string("op_3998_begin_0"), val = tensor([13, 0, 0, 0])]; tensor var_3998_end_0 = const()[name = string("op_3998_end_0"), val = tensor([14, 2, 2048, 64])]; tensor var_3998_end_mask_0 = const()[name = string("op_3998_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_3998_cast_fp16 = slice_by_index(begin = var_3998_begin_0, end = var_3998_end_0, end_mask = var_3998_end_mask_0, x = coreml_update_state_75)[name = string("op_3998_cast_fp16")]; tensor tile_27 = const()[name = string("tile_27"), val = tensor([1, 1])]; int32 var_4001_axis_0 = const()[name = string("op_4001_axis_0"), val = int32(1)]; tensor var_4001_cast_fp16_0, tensor var_4001_cast_fp16_1 = split(axis = var_4001_axis_0, split_sizes = tile_27, x = var_3998_cast_fp16)[name = string("op_4001_cast_fp16")]; tensor var_4004_split_sizes_0 = const()[name = string("op_4004_split_sizes_0"), val = tensor([8, 8])]; int32 var_4004_axis_0 = const()[name = string("op_4004_axis_0"), val = int32(1)]; tensor var_4004_cast_fp16_0, tensor var_4004_cast_fp16_1 = split(axis = var_4004_axis_0, split_sizes = var_4004_split_sizes_0, x = query_states_55_cast_fp16)[name = string("op_4004_cast_fp16")]; bool attn_weights_209_transpose_x_0 = const()[name = string("attn_weights_209_transpose_x_0"), val = bool(false)]; bool attn_weights_209_transpose_y_0 = const()[name = string("attn_weights_209_transpose_y_0"), val = bool(false)]; tensor attn_weights_209_cast_fp16 = matmul(transpose_x = attn_weights_209_transpose_x_0, transpose_y = attn_weights_209_transpose_y_0, x = var_3991_cast_fp16_0, y = var_4004_cast_fp16_0)[name = string("attn_weights_209_cast_fp16")]; fp16 _inversed_attn_weights_211_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_211_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_211_cast_fp16 = mul(x = attn_weights_209_cast_fp16, y = _inversed_attn_weights_211_y_0_to_fp16)[name = string("_inversed_attn_weights_211_cast_fp16")]; tensor attn_weights_213_cast_fp16 = add(x = _inversed_attn_weights_211_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_213_cast_fp16")]; int32 var_4011 = const()[name = string("op_4011"), val = int32(2)]; tensor attn_weights_215_cast_fp16 = softmax(axis = var_4011, x = attn_weights_213_cast_fp16)[name = string("attn_weights_215_cast_fp16")]; bool var_4017_transpose_x_1 = const()[name = string("op_4017_transpose_x_1"), val = bool(true)]; bool var_4017_transpose_y_1 = const()[name = string("op_4017_transpose_y_1"), val = bool(false)]; tensor var_4017_cast_fp16 = matmul(transpose_x = var_4017_transpose_x_1, transpose_y = var_4017_transpose_y_1, x = attn_weights_215_cast_fp16, y = var_4001_cast_fp16_0)[name = string("op_4017_cast_fp16")]; bool attn_weights_217_transpose_x_0 = const()[name = string("attn_weights_217_transpose_x_0"), val = bool(false)]; bool attn_weights_217_transpose_y_0 = const()[name = string("attn_weights_217_transpose_y_0"), val = bool(false)]; tensor attn_weights_217_cast_fp16 = matmul(transpose_x = attn_weights_217_transpose_x_0, transpose_y = attn_weights_217_transpose_y_0, x = var_3991_cast_fp16_1, y = var_4004_cast_fp16_1)[name = string("attn_weights_217_cast_fp16")]; fp16 _inversed_attn_weights_219_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_219_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_219_cast_fp16 = mul(x = attn_weights_217_cast_fp16, y = _inversed_attn_weights_219_y_0_to_fp16)[name = string("_inversed_attn_weights_219_cast_fp16")]; tensor attn_weights_221_cast_fp16 = add(x = _inversed_attn_weights_219_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_221_cast_fp16")]; int32 var_4023 = const()[name = string("op_4023"), val = int32(2)]; tensor attn_weights_223_cast_fp16 = softmax(axis = var_4023, x = attn_weights_221_cast_fp16)[name = string("attn_weights_223_cast_fp16")]; bool attn_output_79_transpose_x_1 = const()[name = string("attn_output_79_transpose_x_1"), val = bool(true)]; bool attn_output_79_transpose_y_1 = const()[name = string("attn_output_79_transpose_y_1"), val = bool(false)]; tensor attn_output_79_cast_fp16 = matmul(transpose_x = attn_output_79_transpose_x_1, transpose_y = attn_output_79_transpose_y_1, x = attn_weights_223_cast_fp16, y = var_4001_cast_fp16_1)[name = string("attn_output_79_cast_fp16")]; int32 var_4031 = const()[name = string("op_4031"), val = int32(1)]; bool attn_output_81_interleave_0 = const()[name = string("attn_output_81_interleave_0"), val = bool(false)]; tensor attn_output_81_cast_fp16 = concat(axis = var_4031, interleave = attn_output_81_interleave_0, values = (var_4017_cast_fp16, attn_output_79_cast_fp16))[name = string("attn_output_81_cast_fp16")]; tensor var_4035_perm_0 = const()[name = string("op_4035_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_4040 = const()[name = string("op_4040"), val = tensor([1, 1024, 1, 1])]; tensor var_4035_cast_fp16 = transpose(perm = var_4035_perm_0, x = attn_output_81_cast_fp16)[name = string("transpose_30")]; tensor x_239_cast_fp16 = reshape(shape = var_4040, x = var_4035_cast_fp16)[name = string("x_239_cast_fp16")]; string hidden_states_81_pad_type_0 = const()[name = string("hidden_states_81_pad_type_0"), val = string("valid")]; tensor hidden_states_81_strides_0 = const()[name = string("hidden_states_81_strides_0"), val = tensor([1, 1])]; tensor hidden_states_81_pad_0 = const()[name = string("hidden_states_81_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_81_dilations_0 = const()[name = string("hidden_states_81_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_81_groups_0 = const()[name = string("hidden_states_81_groups_0"), val = int32(1)]; tensor var_4047_to_fp16 = const()[name = string("op_4047_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(399638400)))]; tensor hidden_states_81_cast_fp16 = conv(dilations = hidden_states_81_dilations_0, groups = hidden_states_81_groups_0, pad = hidden_states_81_pad_0, pad_type = hidden_states_81_pad_type_0, strides = hidden_states_81_strides_0, weight = var_4047_to_fp16, x = x_239_cast_fp16)[name = string("hidden_states_81_cast_fp16")]; tensor x_241_cast_fp16 = add(x = x_233_cast_fp16, y = hidden_states_81_cast_fp16)[name = string("x_241_cast_fp16")]; int32 var_4059 = const()[name = string("op_4059"), val = int32(1)]; fp16 const_143_promoted_to_fp16 = const()[name = string("const_143_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_4062_cast_fp16 = mul(x = x_241_cast_fp16, y = const_143_promoted_to_fp16)[name = string("op_4062_cast_fp16")]; bool x_243_interleave_0 = const()[name = string("x_243_interleave_0"), val = bool(false)]; tensor x_243_cast_fp16 = concat(axis = var_4059, interleave = x_243_interleave_0, values = (x_241_cast_fp16, var_4062_cast_fp16))[name = string("x_243_cast_fp16")]; tensor out_163_axes_0 = const()[name = string("out_163_axes_0"), val = tensor([1])]; fp16 var_4072_to_fp16 = const()[name = string("op_4072_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_163_cast_fp16 = layer_norm(axes = out_163_axes_0, epsilon = var_4072_to_fp16, x = x_243_cast_fp16)[name = string("out_163_cast_fp16")]; tensor layer_layers_13_post_attention_layernorm_weight_to_fp16 = const()[name = string("layer_layers_13_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(401735616)))]; tensor out_165_cast_fp16 = mul(x = out_163_cast_fp16, y = layer_layers_13_post_attention_layernorm_weight_to_fp16)[name = string("out_165_cast_fp16")]; tensor var_4078_split_sizes_0 = const()[name = string("op_4078_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_4078_axis_0 = const()[name = string("op_4078_axis_0"), val = int32(1)]; tensor var_4078_cast_fp16_0, tensor var_4078_cast_fp16_1 = split(axis = var_4078_axis_0, split_sizes = var_4078_split_sizes_0, x = out_165_cast_fp16)[name = string("op_4078_cast_fp16")]; string input_27_pad_type_0 = const()[name = string("input_27_pad_type_0"), val = string("valid")]; tensor input_27_strides_0 = const()[name = string("input_27_strides_0"), val = tensor([1, 1])]; tensor input_27_pad_0 = const()[name = string("input_27_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_27_dilations_0 = const()[name = string("input_27_dilations_0"), val = tensor([1, 1])]; int32 input_27_groups_0 = const()[name = string("input_27_groups_0"), val = int32(1)]; tensor var_4083_to_fp16 = const()[name = string("op_4083_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(401739776)))]; tensor input_27_cast_fp16 = conv(dilations = input_27_dilations_0, groups = input_27_groups_0, pad = input_27_pad_0, pad_type = input_27_pad_type_0, strides = input_27_strides_0, weight = var_4083_to_fp16, x = var_4078_cast_fp16_0)[name = string("input_27_cast_fp16")]; tensor var_4094_cast_fp16 = silu(x = input_27_cast_fp16)[name = string("op_4094_cast_fp16")]; string var_4099_pad_type_0 = const()[name = string("op_4099_pad_type_0"), val = string("valid")]; tensor var_4099_strides_0 = const()[name = string("op_4099_strides_0"), val = tensor([1, 1])]; tensor var_4099_pad_0 = const()[name = string("op_4099_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_4099_dilations_0 = const()[name = string("op_4099_dilations_0"), val = tensor([1, 1])]; int32 var_4099_groups_0 = const()[name = string("op_4099_groups_0"), val = int32(1)]; tensor var_4082_to_fp16 = const()[name = string("op_4082_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(410128448)))]; tensor var_4099_cast_fp16 = conv(dilations = var_4099_dilations_0, groups = var_4099_groups_0, pad = var_4099_pad_0, pad_type = var_4099_pad_type_0, strides = var_4099_strides_0, weight = var_4082_to_fp16, x = var_4078_cast_fp16_0)[name = string("op_4099_cast_fp16")]; tensor x_249_cast_fp16 = mul(x = var_4094_cast_fp16, y = var_4099_cast_fp16)[name = string("x_249_cast_fp16")]; string hidden_states_83_pad_type_0 = const()[name = string("hidden_states_83_pad_type_0"), val = string("valid")]; tensor hidden_states_83_strides_0 = const()[name = string("hidden_states_83_strides_0"), val = tensor([1, 1])]; tensor hidden_states_83_pad_0 = const()[name = string("hidden_states_83_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_83_dilations_0 = const()[name = string("hidden_states_83_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_83_groups_0 = const()[name = string("hidden_states_83_groups_0"), val = int32(1)]; tensor var_4081_to_fp16 = const()[name = string("op_4081_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(418517120)))]; tensor hidden_states_83_cast_fp16 = conv(dilations = hidden_states_83_dilations_0, groups = hidden_states_83_groups_0, pad = hidden_states_83_pad_0, pad_type = hidden_states_83_pad_type_0, strides = hidden_states_83_strides_0, weight = var_4081_to_fp16, x = x_249_cast_fp16)[name = string("hidden_states_83_cast_fp16")]; tensor x_251_cast_fp16 = add(x = x_241_cast_fp16, y = hidden_states_83_cast_fp16)[name = string("x_251_cast_fp16")]; int32 var_4112 = const()[name = string("op_4112"), val = int32(1)]; fp16 const_144_promoted_to_fp16 = const()[name = string("const_144_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_4115_cast_fp16 = mul(x = x_251_cast_fp16, y = const_144_promoted_to_fp16)[name = string("op_4115_cast_fp16")]; bool x_253_interleave_0 = const()[name = string("x_253_interleave_0"), val = bool(false)]; tensor x_253_cast_fp16 = concat(axis = var_4112, interleave = x_253_interleave_0, values = (x_251_cast_fp16, var_4115_cast_fp16))[name = string("x_253_cast_fp16")]; tensor out_169_axes_0 = const()[name = string("out_169_axes_0"), val = tensor([1])]; fp16 var_4125_to_fp16 = const()[name = string("op_4125_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_169_cast_fp16 = layer_norm(axes = out_169_axes_0, epsilon = var_4125_to_fp16, x = x_253_cast_fp16)[name = string("out_169_cast_fp16")]; tensor layer_layers_14_input_layernorm_weight_to_fp16 = const()[name = string("layer_layers_14_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(426905792)))]; tensor out_171_cast_fp16 = mul(x = out_169_cast_fp16, y = layer_layers_14_input_layernorm_weight_to_fp16)[name = string("out_171_cast_fp16")]; tensor var_4131_split_sizes_0 = const()[name = string("op_4131_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_4131_axis_0 = const()[name = string("op_4131_axis_0"), val = int32(1)]; tensor var_4131_cast_fp16_0, tensor var_4131_cast_fp16_1 = split(axis = var_4131_axis_0, split_sizes = var_4131_split_sizes_0, x = out_171_cast_fp16)[name = string("op_4131_cast_fp16")]; string query_states_57_pad_type_0 = const()[name = string("query_states_57_pad_type_0"), val = string("valid")]; tensor query_states_57_strides_0 = const()[name = string("query_states_57_strides_0"), val = tensor([1, 1])]; tensor query_states_57_pad_0 = const()[name = string("query_states_57_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_states_57_dilations_0 = const()[name = string("query_states_57_dilations_0"), val = tensor([1, 1])]; int32 query_states_57_groups_0 = const()[name = string("query_states_57_groups_0"), val = int32(1)]; tensor var_4153_to_fp16 = const()[name = string("op_4153_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(426909952)))]; tensor query_states_57_cast_fp16 = conv(dilations = query_states_57_dilations_0, groups = query_states_57_groups_0, pad = query_states_57_pad_0, pad_type = query_states_57_pad_type_0, strides = query_states_57_strides_0, weight = var_4153_to_fp16, x = var_4131_cast_fp16_0)[name = string("query_states_57_cast_fp16")]; string key_states_57_pad_type_0 = const()[name = string("key_states_57_pad_type_0"), val = string("valid")]; tensor key_states_57_strides_0 = const()[name = string("key_states_57_strides_0"), val = tensor([1, 1])]; tensor key_states_57_pad_0 = const()[name = string("key_states_57_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_states_57_dilations_0 = const()[name = string("key_states_57_dilations_0"), val = tensor([1, 1])]; int32 key_states_57_groups_0 = const()[name = string("key_states_57_groups_0"), val = int32(1)]; tensor var_4164_to_fp16 = const()[name = string("op_4164_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(429007168)))]; tensor key_states_57_cast_fp16 = conv(dilations = key_states_57_dilations_0, groups = key_states_57_groups_0, pad = key_states_57_pad_0, pad_type = key_states_57_pad_type_0, strides = key_states_57_strides_0, weight = var_4164_to_fp16, x = var_4131_cast_fp16_0)[name = string("key_states_57_cast_fp16")]; string value_states_57_pad_type_0 = const()[name = string("value_states_57_pad_type_0"), val = string("valid")]; tensor value_states_57_strides_0 = const()[name = string("value_states_57_strides_0"), val = tensor([1, 1])]; tensor value_states_57_pad_0 = const()[name = string("value_states_57_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_states_57_dilations_0 = const()[name = string("value_states_57_dilations_0"), val = tensor([1, 1])]; int32 value_states_57_groups_0 = const()[name = string("value_states_57_groups_0"), val = int32(1)]; tensor var_4175_to_fp16 = const()[name = string("op_4175_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(429269376)))]; tensor value_states_57_cast_fp16 = conv(dilations = value_states_57_dilations_0, groups = value_states_57_groups_0, pad = value_states_57_pad_0, pad_type = value_states_57_pad_type_0, strides = value_states_57_strides_0, weight = var_4175_to_fp16, x = var_4131_cast_fp16_0)[name = string("value_states_57_cast_fp16")]; tensor var_4183 = const()[name = string("op_4183"), val = tensor([1, 16, 64, 1])]; tensor embed_57_cast_fp16 = reshape(shape = var_4183, x = query_states_57_cast_fp16)[name = string("embed_57_cast_fp16")]; tensor var_4187 = const()[name = string("op_4187"), val = tensor([1, 2, 64, 1])]; tensor var_4188_cast_fp16 = reshape(shape = var_4187, x = key_states_57_cast_fp16)[name = string("op_4188_cast_fp16")]; tensor embed_59_perm_0 = const()[name = string("embed_59_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_4194 = const()[name = string("op_4194"), val = tensor([1, 2, 64, 1])]; tensor var_4195_cast_fp16 = reshape(shape = var_4194, x = value_states_57_cast_fp16)[name = string("op_4195_cast_fp16")]; tensor value_states_59_perm_0 = const()[name = string("value_states_59_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_4199_cast_fp16 = mul(x = embed_57_cast_fp16, y = cos_1_cast_fp16)[name = string("op_4199_cast_fp16")]; tensor var_4200_split_sizes_0 = const()[name = string("op_4200_split_sizes_0"), val = tensor([32, 32])]; int32 var_4200_axis_0 = const()[name = string("op_4200_axis_0"), val = int32(-2)]; tensor var_4200_cast_fp16_0, tensor var_4200_cast_fp16_1 = split(axis = var_4200_axis_0, split_sizes = var_4200_split_sizes_0, x = embed_57_cast_fp16)[name = string("op_4200_cast_fp16")]; fp16 const_147_promoted_to_fp16 = const()[name = string("const_147_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_4202_cast_fp16 = mul(x = var_4200_cast_fp16_1, y = const_147_promoted_to_fp16)[name = string("op_4202_cast_fp16")]; int32 var_4204 = const()[name = string("op_4204"), val = int32(-2)]; bool var_4205_interleave_0 = const()[name = string("op_4205_interleave_0"), val = bool(false)]; tensor var_4205_cast_fp16 = concat(axis = var_4204, interleave = var_4205_interleave_0, values = (var_4202_cast_fp16, var_4200_cast_fp16_0))[name = string("op_4205_cast_fp16")]; tensor var_4206_cast_fp16 = mul(x = var_4205_cast_fp16, y = sin_1_cast_fp16)[name = string("op_4206_cast_fp16")]; tensor query_states_59_cast_fp16 = add(x = var_4199_cast_fp16, y = var_4206_cast_fp16)[name = string("query_states_59_cast_fp16")]; tensor embed_59_cast_fp16 = transpose(perm = embed_59_perm_0, x = var_4188_cast_fp16)[name = string("transpose_29")]; tensor var_4209_cast_fp16 = mul(x = embed_59_cast_fp16, y = cos_cast_fp16)[name = string("op_4209_cast_fp16")]; tensor var_4210_split_sizes_0 = const()[name = string("op_4210_split_sizes_0"), val = tensor([32, 32])]; int32 var_4210_axis_0 = const()[name = string("op_4210_axis_0"), val = int32(-1)]; tensor var_4210_cast_fp16_0, tensor var_4210_cast_fp16_1 = split(axis = var_4210_axis_0, split_sizes = var_4210_split_sizes_0, x = embed_59_cast_fp16)[name = string("op_4210_cast_fp16")]; fp16 const_148_promoted_to_fp16 = const()[name = string("const_148_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_4212_cast_fp16 = mul(x = var_4210_cast_fp16_1, y = const_148_promoted_to_fp16)[name = string("op_4212_cast_fp16")]; int32 var_4214 = const()[name = string("op_4214"), val = int32(-1)]; bool var_4215_interleave_0 = const()[name = string("op_4215_interleave_0"), val = bool(false)]; tensor var_4215_cast_fp16 = concat(axis = var_4214, interleave = var_4215_interleave_0, values = (var_4212_cast_fp16, var_4210_cast_fp16_0))[name = string("op_4215_cast_fp16")]; tensor var_4216_cast_fp16 = mul(x = var_4215_cast_fp16, y = sin_cast_fp16)[name = string("op_4216_cast_fp16")]; tensor key_states_59_cast_fp16 = add(x = var_4209_cast_fp16, y = var_4216_cast_fp16)[name = string("key_states_59_cast_fp16")]; tensor expand_dims_141 = const()[name = string("expand_dims_141"), val = tensor([14])]; tensor expand_dims_142 = const()[name = string("expand_dims_142"), val = tensor([0])]; tensor expand_dims_144 = const()[name = string("expand_dims_144"), val = tensor([15])]; tensor concat_115_values3_0 = const()[name = string("concat_115_values3_0"), val = tensor([0])]; int32 concat_115_axis_0 = const()[name = string("concat_115_axis_0"), val = int32(0)]; bool concat_115_interleave_0 = const()[name = string("concat_115_interleave_0"), val = bool(false)]; tensor concat_115 = concat(axis = concat_115_axis_0, interleave = concat_115_interleave_0, values = (expand_dims_141, expand_dims_142, position_id, concat_115_values3_0))[name = string("concat_115")]; tensor concat_116_values1_0 = const()[name = string("concat_116_values1_0"), val = tensor([0])]; tensor concat_116_values3_0 = const()[name = string("concat_116_values3_0"), val = tensor([0])]; int32 concat_116_axis_0 = const()[name = string("concat_116_axis_0"), val = int32(0)]; bool concat_116_interleave_0 = const()[name = string("concat_116_interleave_0"), val = bool(false)]; tensor concat_116 = concat(axis = concat_116_axis_0, interleave = concat_116_interleave_0, values = (expand_dims_144, concat_116_values1_0, var_426, concat_116_values3_0))[name = string("concat_116")]; tensor key_cache_internal_tensor_assign_15_stride_0 = const()[name = string("key_cache_internal_tensor_assign_15_stride_0"), val = tensor([1, 1, 1, 1])]; tensor key_cache_internal_tensor_assign_15_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_15_begin_mask_0"), val = tensor([false, false, false, true])]; tensor key_cache_internal_tensor_assign_15_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_15_end_mask_0"), val = tensor([false, true, false, true])]; tensor key_cache_internal_tensor_assign_15_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_15_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor key_cache_internal_tensor_assign_15_cast_fp16 = slice_update(begin = concat_115, begin_mask = key_cache_internal_tensor_assign_15_begin_mask_0, end = concat_116, end_mask = key_cache_internal_tensor_assign_15_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_15_squeeze_mask_0, stride = key_cache_internal_tensor_assign_15_stride_0, update = key_states_59_cast_fp16, x = coreml_update_state_74)[name = string("key_cache_internal_tensor_assign_15_cast_fp16")]; write_state(data = key_cache_internal_tensor_assign_15_cast_fp16, input = key_cache)[name = string("coreml_update_state_194_write_state")]; tensor coreml_update_state_76 = read_state(input = key_cache)[name = string("coreml_update_state_194")]; tensor value_cache_internal_tensor_assign_15_stride_0 = const()[name = string("value_cache_internal_tensor_assign_15_stride_0"), val = tensor([1, 1, 1, 1])]; tensor value_cache_internal_tensor_assign_15_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_15_begin_mask_0"), val = tensor([false, false, false, true])]; tensor value_cache_internal_tensor_assign_15_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_15_end_mask_0"), val = tensor([false, true, false, true])]; tensor value_cache_internal_tensor_assign_15_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_15_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor value_states_59_cast_fp16 = transpose(perm = value_states_59_perm_0, x = var_4195_cast_fp16)[name = string("transpose_28")]; tensor value_cache_internal_tensor_assign_15_cast_fp16 = slice_update(begin = concat_115, begin_mask = value_cache_internal_tensor_assign_15_begin_mask_0, end = concat_116, end_mask = value_cache_internal_tensor_assign_15_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_15_squeeze_mask_0, stride = value_cache_internal_tensor_assign_15_stride_0, update = value_states_59_cast_fp16, x = coreml_update_state_75)[name = string("value_cache_internal_tensor_assign_15_cast_fp16")]; write_state(data = value_cache_internal_tensor_assign_15_cast_fp16, input = value_cache)[name = string("coreml_update_state_195_write_state")]; tensor coreml_update_state_77 = read_state(input = value_cache)[name = string("coreml_update_state_195")]; tensor var_4259_begin_0 = const()[name = string("op_4259_begin_0"), val = tensor([14, 0, 0, 0])]; tensor var_4259_end_0 = const()[name = string("op_4259_end_0"), val = tensor([15, 2, 2048, 64])]; tensor var_4259_end_mask_0 = const()[name = string("op_4259_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_4259_cast_fp16 = slice_by_index(begin = var_4259_begin_0, end = var_4259_end_0, end_mask = var_4259_end_mask_0, x = coreml_update_state_76)[name = string("op_4259_cast_fp16")]; tensor tile_28 = const()[name = string("tile_28"), val = tensor([1, 1])]; int32 var_4262_axis_0 = const()[name = string("op_4262_axis_0"), val = int32(1)]; tensor var_4262_cast_fp16_0, tensor var_4262_cast_fp16_1 = split(axis = var_4262_axis_0, split_sizes = tile_28, x = var_4259_cast_fp16)[name = string("op_4262_cast_fp16")]; tensor var_4269_begin_0 = const()[name = string("op_4269_begin_0"), val = tensor([14, 0, 0, 0])]; tensor var_4269_end_0 = const()[name = string("op_4269_end_0"), val = tensor([15, 2, 2048, 64])]; tensor var_4269_end_mask_0 = const()[name = string("op_4269_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_4269_cast_fp16 = slice_by_index(begin = var_4269_begin_0, end = var_4269_end_0, end_mask = var_4269_end_mask_0, x = coreml_update_state_77)[name = string("op_4269_cast_fp16")]; tensor tile_29 = const()[name = string("tile_29"), val = tensor([1, 1])]; int32 var_4272_axis_0 = const()[name = string("op_4272_axis_0"), val = int32(1)]; tensor var_4272_cast_fp16_0, tensor var_4272_cast_fp16_1 = split(axis = var_4272_axis_0, split_sizes = tile_29, x = var_4269_cast_fp16)[name = string("op_4272_cast_fp16")]; tensor var_4275_split_sizes_0 = const()[name = string("op_4275_split_sizes_0"), val = tensor([8, 8])]; int32 var_4275_axis_0 = const()[name = string("op_4275_axis_0"), val = int32(1)]; tensor var_4275_cast_fp16_0, tensor var_4275_cast_fp16_1 = split(axis = var_4275_axis_0, split_sizes = var_4275_split_sizes_0, x = query_states_59_cast_fp16)[name = string("op_4275_cast_fp16")]; bool attn_weights_225_transpose_x_0 = const()[name = string("attn_weights_225_transpose_x_0"), val = bool(false)]; bool attn_weights_225_transpose_y_0 = const()[name = string("attn_weights_225_transpose_y_0"), val = bool(false)]; tensor attn_weights_225_cast_fp16 = matmul(transpose_x = attn_weights_225_transpose_x_0, transpose_y = attn_weights_225_transpose_y_0, x = var_4262_cast_fp16_0, y = var_4275_cast_fp16_0)[name = string("attn_weights_225_cast_fp16")]; fp16 _inversed_attn_weights_227_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_227_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_227_cast_fp16 = mul(x = attn_weights_225_cast_fp16, y = _inversed_attn_weights_227_y_0_to_fp16)[name = string("_inversed_attn_weights_227_cast_fp16")]; tensor attn_weights_229_cast_fp16 = add(x = _inversed_attn_weights_227_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_229_cast_fp16")]; int32 var_4282 = const()[name = string("op_4282"), val = int32(2)]; tensor attn_weights_231_cast_fp16 = softmax(axis = var_4282, x = attn_weights_229_cast_fp16)[name = string("attn_weights_231_cast_fp16")]; bool var_4288_transpose_x_1 = const()[name = string("op_4288_transpose_x_1"), val = bool(true)]; bool var_4288_transpose_y_1 = const()[name = string("op_4288_transpose_y_1"), val = bool(false)]; tensor var_4288_cast_fp16 = matmul(transpose_x = var_4288_transpose_x_1, transpose_y = var_4288_transpose_y_1, x = attn_weights_231_cast_fp16, y = var_4272_cast_fp16_0)[name = string("op_4288_cast_fp16")]; bool attn_weights_233_transpose_x_0 = const()[name = string("attn_weights_233_transpose_x_0"), val = bool(false)]; bool attn_weights_233_transpose_y_0 = const()[name = string("attn_weights_233_transpose_y_0"), val = bool(false)]; tensor attn_weights_233_cast_fp16 = matmul(transpose_x = attn_weights_233_transpose_x_0, transpose_y = attn_weights_233_transpose_y_0, x = var_4262_cast_fp16_1, y = var_4275_cast_fp16_1)[name = string("attn_weights_233_cast_fp16")]; fp16 _inversed_attn_weights_235_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_235_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_235_cast_fp16 = mul(x = attn_weights_233_cast_fp16, y = _inversed_attn_weights_235_y_0_to_fp16)[name = string("_inversed_attn_weights_235_cast_fp16")]; tensor attn_weights_237_cast_fp16 = add(x = _inversed_attn_weights_235_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_237_cast_fp16")]; int32 var_4294 = const()[name = string("op_4294"), val = int32(2)]; tensor attn_weights_239_cast_fp16 = softmax(axis = var_4294, x = attn_weights_237_cast_fp16)[name = string("attn_weights_239_cast_fp16")]; bool attn_output_85_transpose_x_1 = const()[name = string("attn_output_85_transpose_x_1"), val = bool(true)]; bool attn_output_85_transpose_y_1 = const()[name = string("attn_output_85_transpose_y_1"), val = bool(false)]; tensor attn_output_85_cast_fp16 = matmul(transpose_x = attn_output_85_transpose_x_1, transpose_y = attn_output_85_transpose_y_1, x = attn_weights_239_cast_fp16, y = var_4272_cast_fp16_1)[name = string("attn_output_85_cast_fp16")]; int32 var_4302 = const()[name = string("op_4302"), val = int32(1)]; bool attn_output_87_interleave_0 = const()[name = string("attn_output_87_interleave_0"), val = bool(false)]; tensor attn_output_87_cast_fp16 = concat(axis = var_4302, interleave = attn_output_87_interleave_0, values = (var_4288_cast_fp16, attn_output_85_cast_fp16))[name = string("attn_output_87_cast_fp16")]; tensor var_4306_perm_0 = const()[name = string("op_4306_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_4311 = const()[name = string("op_4311"), val = tensor([1, 1024, 1, 1])]; tensor var_4306_cast_fp16 = transpose(perm = var_4306_perm_0, x = attn_output_87_cast_fp16)[name = string("transpose_27")]; tensor x_257_cast_fp16 = reshape(shape = var_4311, x = var_4306_cast_fp16)[name = string("x_257_cast_fp16")]; string hidden_states_87_pad_type_0 = const()[name = string("hidden_states_87_pad_type_0"), val = string("valid")]; tensor hidden_states_87_strides_0 = const()[name = string("hidden_states_87_strides_0"), val = tensor([1, 1])]; tensor hidden_states_87_pad_0 = const()[name = string("hidden_states_87_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_87_dilations_0 = const()[name = string("hidden_states_87_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_87_groups_0 = const()[name = string("hidden_states_87_groups_0"), val = int32(1)]; tensor var_4318_to_fp16 = const()[name = string("op_4318_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(429531584)))]; tensor hidden_states_87_cast_fp16 = conv(dilations = hidden_states_87_dilations_0, groups = hidden_states_87_groups_0, pad = hidden_states_87_pad_0, pad_type = hidden_states_87_pad_type_0, strides = hidden_states_87_strides_0, weight = var_4318_to_fp16, x = x_257_cast_fp16)[name = string("hidden_states_87_cast_fp16")]; tensor x_259_cast_fp16 = add(x = x_251_cast_fp16, y = hidden_states_87_cast_fp16)[name = string("x_259_cast_fp16")]; int32 var_4330 = const()[name = string("op_4330"), val = int32(1)]; fp16 const_153_promoted_to_fp16 = const()[name = string("const_153_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_4333_cast_fp16 = mul(x = x_259_cast_fp16, y = const_153_promoted_to_fp16)[name = string("op_4333_cast_fp16")]; bool x_261_interleave_0 = const()[name = string("x_261_interleave_0"), val = bool(false)]; tensor x_261_cast_fp16 = concat(axis = var_4330, interleave = x_261_interleave_0, values = (x_259_cast_fp16, var_4333_cast_fp16))[name = string("x_261_cast_fp16")]; tensor out_175_axes_0 = const()[name = string("out_175_axes_0"), val = tensor([1])]; fp16 var_4343_to_fp16 = const()[name = string("op_4343_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_175_cast_fp16 = layer_norm(axes = out_175_axes_0, epsilon = var_4343_to_fp16, x = x_261_cast_fp16)[name = string("out_175_cast_fp16")]; tensor layer_layers_14_post_attention_layernorm_weight_to_fp16 = const()[name = string("layer_layers_14_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(431628800)))]; tensor out_177_cast_fp16 = mul(x = out_175_cast_fp16, y = layer_layers_14_post_attention_layernorm_weight_to_fp16)[name = string("out_177_cast_fp16")]; tensor var_4349_split_sizes_0 = const()[name = string("op_4349_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_4349_axis_0 = const()[name = string("op_4349_axis_0"), val = int32(1)]; tensor var_4349_cast_fp16_0, tensor var_4349_cast_fp16_1 = split(axis = var_4349_axis_0, split_sizes = var_4349_split_sizes_0, x = out_177_cast_fp16)[name = string("op_4349_cast_fp16")]; string input_29_pad_type_0 = const()[name = string("input_29_pad_type_0"), val = string("valid")]; tensor input_29_strides_0 = const()[name = string("input_29_strides_0"), val = tensor([1, 1])]; tensor input_29_pad_0 = const()[name = string("input_29_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_29_dilations_0 = const()[name = string("input_29_dilations_0"), val = tensor([1, 1])]; int32 input_29_groups_0 = const()[name = string("input_29_groups_0"), val = int32(1)]; tensor var_4354_to_fp16 = const()[name = string("op_4354_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(431632960)))]; tensor input_29_cast_fp16 = conv(dilations = input_29_dilations_0, groups = input_29_groups_0, pad = input_29_pad_0, pad_type = input_29_pad_type_0, strides = input_29_strides_0, weight = var_4354_to_fp16, x = var_4349_cast_fp16_0)[name = string("input_29_cast_fp16")]; tensor var_4365_cast_fp16 = silu(x = input_29_cast_fp16)[name = string("op_4365_cast_fp16")]; string var_4370_pad_type_0 = const()[name = string("op_4370_pad_type_0"), val = string("valid")]; tensor var_4370_strides_0 = const()[name = string("op_4370_strides_0"), val = tensor([1, 1])]; tensor var_4370_pad_0 = const()[name = string("op_4370_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_4370_dilations_0 = const()[name = string("op_4370_dilations_0"), val = tensor([1, 1])]; int32 var_4370_groups_0 = const()[name = string("op_4370_groups_0"), val = int32(1)]; tensor var_4353_to_fp16 = const()[name = string("op_4353_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(440021632)))]; tensor var_4370_cast_fp16 = conv(dilations = var_4370_dilations_0, groups = var_4370_groups_0, pad = var_4370_pad_0, pad_type = var_4370_pad_type_0, strides = var_4370_strides_0, weight = var_4353_to_fp16, x = var_4349_cast_fp16_0)[name = string("op_4370_cast_fp16")]; tensor x_267_cast_fp16 = mul(x = var_4365_cast_fp16, y = var_4370_cast_fp16)[name = string("x_267_cast_fp16")]; string hidden_states_89_pad_type_0 = const()[name = string("hidden_states_89_pad_type_0"), val = string("valid")]; tensor hidden_states_89_strides_0 = const()[name = string("hidden_states_89_strides_0"), val = tensor([1, 1])]; tensor hidden_states_89_pad_0 = const()[name = string("hidden_states_89_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_89_dilations_0 = const()[name = string("hidden_states_89_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_89_groups_0 = const()[name = string("hidden_states_89_groups_0"), val = int32(1)]; tensor var_4352_to_fp16 = const()[name = string("op_4352_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(448410304)))]; tensor hidden_states_89_cast_fp16 = conv(dilations = hidden_states_89_dilations_0, groups = hidden_states_89_groups_0, pad = hidden_states_89_pad_0, pad_type = hidden_states_89_pad_type_0, strides = hidden_states_89_strides_0, weight = var_4352_to_fp16, x = x_267_cast_fp16)[name = string("hidden_states_89_cast_fp16")]; tensor x_269_cast_fp16 = add(x = x_259_cast_fp16, y = hidden_states_89_cast_fp16)[name = string("x_269_cast_fp16")]; int32 var_4383 = const()[name = string("op_4383"), val = int32(1)]; fp16 const_154_promoted_to_fp16 = const()[name = string("const_154_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_4386_cast_fp16 = mul(x = x_269_cast_fp16, y = const_154_promoted_to_fp16)[name = string("op_4386_cast_fp16")]; bool x_271_interleave_0 = const()[name = string("x_271_interleave_0"), val = bool(false)]; tensor x_271_cast_fp16 = concat(axis = var_4383, interleave = x_271_interleave_0, values = (x_269_cast_fp16, var_4386_cast_fp16))[name = string("x_271_cast_fp16")]; tensor out_181_axes_0 = const()[name = string("out_181_axes_0"), val = tensor([1])]; fp16 var_4396_to_fp16 = const()[name = string("op_4396_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_181_cast_fp16 = layer_norm(axes = out_181_axes_0, epsilon = var_4396_to_fp16, x = x_271_cast_fp16)[name = string("out_181_cast_fp16")]; tensor layer_layers_15_input_layernorm_weight_to_fp16 = const()[name = string("layer_layers_15_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(456798976)))]; tensor out_183_cast_fp16 = mul(x = out_181_cast_fp16, y = layer_layers_15_input_layernorm_weight_to_fp16)[name = string("out_183_cast_fp16")]; tensor var_4402_split_sizes_0 = const()[name = string("op_4402_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_4402_axis_0 = const()[name = string("op_4402_axis_0"), val = int32(1)]; tensor var_4402_cast_fp16_0, tensor var_4402_cast_fp16_1 = split(axis = var_4402_axis_0, split_sizes = var_4402_split_sizes_0, x = out_183_cast_fp16)[name = string("op_4402_cast_fp16")]; string query_states_61_pad_type_0 = const()[name = string("query_states_61_pad_type_0"), val = string("valid")]; tensor query_states_61_strides_0 = const()[name = string("query_states_61_strides_0"), val = tensor([1, 1])]; tensor query_states_61_pad_0 = const()[name = string("query_states_61_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_states_61_dilations_0 = const()[name = string("query_states_61_dilations_0"), val = tensor([1, 1])]; int32 query_states_61_groups_0 = const()[name = string("query_states_61_groups_0"), val = int32(1)]; tensor var_4424_to_fp16 = const()[name = string("op_4424_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(456803136)))]; tensor query_states_61_cast_fp16 = conv(dilations = query_states_61_dilations_0, groups = query_states_61_groups_0, pad = query_states_61_pad_0, pad_type = query_states_61_pad_type_0, strides = query_states_61_strides_0, weight = var_4424_to_fp16, x = var_4402_cast_fp16_0)[name = string("query_states_61_cast_fp16")]; string key_states_61_pad_type_0 = const()[name = string("key_states_61_pad_type_0"), val = string("valid")]; tensor key_states_61_strides_0 = const()[name = string("key_states_61_strides_0"), val = tensor([1, 1])]; tensor key_states_61_pad_0 = const()[name = string("key_states_61_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_states_61_dilations_0 = const()[name = string("key_states_61_dilations_0"), val = tensor([1, 1])]; int32 key_states_61_groups_0 = const()[name = string("key_states_61_groups_0"), val = int32(1)]; tensor var_4435_to_fp16 = const()[name = string("op_4435_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(458900352)))]; tensor key_states_61_cast_fp16 = conv(dilations = key_states_61_dilations_0, groups = key_states_61_groups_0, pad = key_states_61_pad_0, pad_type = key_states_61_pad_type_0, strides = key_states_61_strides_0, weight = var_4435_to_fp16, x = var_4402_cast_fp16_0)[name = string("key_states_61_cast_fp16")]; string value_states_61_pad_type_0 = const()[name = string("value_states_61_pad_type_0"), val = string("valid")]; tensor value_states_61_strides_0 = const()[name = string("value_states_61_strides_0"), val = tensor([1, 1])]; tensor value_states_61_pad_0 = const()[name = string("value_states_61_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_states_61_dilations_0 = const()[name = string("value_states_61_dilations_0"), val = tensor([1, 1])]; int32 value_states_61_groups_0 = const()[name = string("value_states_61_groups_0"), val = int32(1)]; tensor var_4446_to_fp16 = const()[name = string("op_4446_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(459162560)))]; tensor value_states_61_cast_fp16 = conv(dilations = value_states_61_dilations_0, groups = value_states_61_groups_0, pad = value_states_61_pad_0, pad_type = value_states_61_pad_type_0, strides = value_states_61_strides_0, weight = var_4446_to_fp16, x = var_4402_cast_fp16_0)[name = string("value_states_61_cast_fp16")]; tensor var_4454 = const()[name = string("op_4454"), val = tensor([1, 16, 64, 1])]; tensor embed_61_cast_fp16 = reshape(shape = var_4454, x = query_states_61_cast_fp16)[name = string("embed_61_cast_fp16")]; tensor var_4458 = const()[name = string("op_4458"), val = tensor([1, 2, 64, 1])]; tensor var_4459_cast_fp16 = reshape(shape = var_4458, x = key_states_61_cast_fp16)[name = string("op_4459_cast_fp16")]; tensor embed_63_perm_0 = const()[name = string("embed_63_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_4465 = const()[name = string("op_4465"), val = tensor([1, 2, 64, 1])]; tensor var_4466_cast_fp16 = reshape(shape = var_4465, x = value_states_61_cast_fp16)[name = string("op_4466_cast_fp16")]; tensor value_states_63_perm_0 = const()[name = string("value_states_63_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_4470_cast_fp16 = mul(x = embed_61_cast_fp16, y = cos_1_cast_fp16)[name = string("op_4470_cast_fp16")]; tensor var_4471_split_sizes_0 = const()[name = string("op_4471_split_sizes_0"), val = tensor([32, 32])]; int32 var_4471_axis_0 = const()[name = string("op_4471_axis_0"), val = int32(-2)]; tensor var_4471_cast_fp16_0, tensor var_4471_cast_fp16_1 = split(axis = var_4471_axis_0, split_sizes = var_4471_split_sizes_0, x = embed_61_cast_fp16)[name = string("op_4471_cast_fp16")]; fp16 const_157_promoted_to_fp16 = const()[name = string("const_157_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_4473_cast_fp16 = mul(x = var_4471_cast_fp16_1, y = const_157_promoted_to_fp16)[name = string("op_4473_cast_fp16")]; int32 var_4475 = const()[name = string("op_4475"), val = int32(-2)]; bool var_4476_interleave_0 = const()[name = string("op_4476_interleave_0"), val = bool(false)]; tensor var_4476_cast_fp16 = concat(axis = var_4475, interleave = var_4476_interleave_0, values = (var_4473_cast_fp16, var_4471_cast_fp16_0))[name = string("op_4476_cast_fp16")]; tensor var_4477_cast_fp16 = mul(x = var_4476_cast_fp16, y = sin_1_cast_fp16)[name = string("op_4477_cast_fp16")]; tensor query_states_63_cast_fp16 = add(x = var_4470_cast_fp16, y = var_4477_cast_fp16)[name = string("query_states_63_cast_fp16")]; tensor embed_63_cast_fp16 = transpose(perm = embed_63_perm_0, x = var_4459_cast_fp16)[name = string("transpose_26")]; tensor var_4480_cast_fp16 = mul(x = embed_63_cast_fp16, y = cos_cast_fp16)[name = string("op_4480_cast_fp16")]; tensor var_4481_split_sizes_0 = const()[name = string("op_4481_split_sizes_0"), val = tensor([32, 32])]; int32 var_4481_axis_0 = const()[name = string("op_4481_axis_0"), val = int32(-1)]; tensor var_4481_cast_fp16_0, tensor var_4481_cast_fp16_1 = split(axis = var_4481_axis_0, split_sizes = var_4481_split_sizes_0, x = embed_63_cast_fp16)[name = string("op_4481_cast_fp16")]; fp16 const_158_promoted_to_fp16 = const()[name = string("const_158_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_4483_cast_fp16 = mul(x = var_4481_cast_fp16_1, y = const_158_promoted_to_fp16)[name = string("op_4483_cast_fp16")]; int32 var_4485 = const()[name = string("op_4485"), val = int32(-1)]; bool var_4486_interleave_0 = const()[name = string("op_4486_interleave_0"), val = bool(false)]; tensor var_4486_cast_fp16 = concat(axis = var_4485, interleave = var_4486_interleave_0, values = (var_4483_cast_fp16, var_4481_cast_fp16_0))[name = string("op_4486_cast_fp16")]; tensor var_4487_cast_fp16 = mul(x = var_4486_cast_fp16, y = sin_cast_fp16)[name = string("op_4487_cast_fp16")]; tensor key_states_63_cast_fp16 = add(x = var_4480_cast_fp16, y = var_4487_cast_fp16)[name = string("key_states_63_cast_fp16")]; tensor expand_dims_151 = const()[name = string("expand_dims_151"), val = tensor([15])]; tensor expand_dims_152 = const()[name = string("expand_dims_152"), val = tensor([0])]; tensor expand_dims_154 = const()[name = string("expand_dims_154"), val = tensor([16])]; tensor concat_123_values3_0 = const()[name = string("concat_123_values3_0"), val = tensor([0])]; int32 concat_123_axis_0 = const()[name = string("concat_123_axis_0"), val = int32(0)]; bool concat_123_interleave_0 = const()[name = string("concat_123_interleave_0"), val = bool(false)]; tensor concat_123 = concat(axis = concat_123_axis_0, interleave = concat_123_interleave_0, values = (expand_dims_151, expand_dims_152, position_id, concat_123_values3_0))[name = string("concat_123")]; tensor concat_124_values1_0 = const()[name = string("concat_124_values1_0"), val = tensor([0])]; tensor concat_124_values3_0 = const()[name = string("concat_124_values3_0"), val = tensor([0])]; int32 concat_124_axis_0 = const()[name = string("concat_124_axis_0"), val = int32(0)]; bool concat_124_interleave_0 = const()[name = string("concat_124_interleave_0"), val = bool(false)]; tensor concat_124 = concat(axis = concat_124_axis_0, interleave = concat_124_interleave_0, values = (expand_dims_154, concat_124_values1_0, var_426, concat_124_values3_0))[name = string("concat_124")]; tensor key_cache_internal_tensor_assign_16_stride_0 = const()[name = string("key_cache_internal_tensor_assign_16_stride_0"), val = tensor([1, 1, 1, 1])]; tensor key_cache_internal_tensor_assign_16_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_16_begin_mask_0"), val = tensor([false, false, false, true])]; tensor key_cache_internal_tensor_assign_16_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_16_end_mask_0"), val = tensor([false, true, false, true])]; tensor key_cache_internal_tensor_assign_16_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_16_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor key_cache_internal_tensor_assign_16_cast_fp16 = slice_update(begin = concat_123, begin_mask = key_cache_internal_tensor_assign_16_begin_mask_0, end = concat_124, end_mask = key_cache_internal_tensor_assign_16_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_16_squeeze_mask_0, stride = key_cache_internal_tensor_assign_16_stride_0, update = key_states_63_cast_fp16, x = coreml_update_state_76)[name = string("key_cache_internal_tensor_assign_16_cast_fp16")]; write_state(data = key_cache_internal_tensor_assign_16_cast_fp16, input = key_cache)[name = string("coreml_update_state_196_write_state")]; tensor coreml_update_state_78 = read_state(input = key_cache)[name = string("coreml_update_state_196")]; tensor value_cache_internal_tensor_assign_16_stride_0 = const()[name = string("value_cache_internal_tensor_assign_16_stride_0"), val = tensor([1, 1, 1, 1])]; tensor value_cache_internal_tensor_assign_16_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_16_begin_mask_0"), val = tensor([false, false, false, true])]; tensor value_cache_internal_tensor_assign_16_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_16_end_mask_0"), val = tensor([false, true, false, true])]; tensor value_cache_internal_tensor_assign_16_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_16_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor value_states_63_cast_fp16 = transpose(perm = value_states_63_perm_0, x = var_4466_cast_fp16)[name = string("transpose_25")]; tensor value_cache_internal_tensor_assign_16_cast_fp16 = slice_update(begin = concat_123, begin_mask = value_cache_internal_tensor_assign_16_begin_mask_0, end = concat_124, end_mask = value_cache_internal_tensor_assign_16_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_16_squeeze_mask_0, stride = value_cache_internal_tensor_assign_16_stride_0, update = value_states_63_cast_fp16, x = coreml_update_state_77)[name = string("value_cache_internal_tensor_assign_16_cast_fp16")]; write_state(data = value_cache_internal_tensor_assign_16_cast_fp16, input = value_cache)[name = string("coreml_update_state_197_write_state")]; tensor coreml_update_state_79 = read_state(input = value_cache)[name = string("coreml_update_state_197")]; tensor var_4530_begin_0 = const()[name = string("op_4530_begin_0"), val = tensor([15, 0, 0, 0])]; tensor var_4530_end_0 = const()[name = string("op_4530_end_0"), val = tensor([16, 2, 2048, 64])]; tensor var_4530_end_mask_0 = const()[name = string("op_4530_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_4530_cast_fp16 = slice_by_index(begin = var_4530_begin_0, end = var_4530_end_0, end_mask = var_4530_end_mask_0, x = coreml_update_state_78)[name = string("op_4530_cast_fp16")]; tensor tile_30 = const()[name = string("tile_30"), val = tensor([1, 1])]; int32 var_4533_axis_0 = const()[name = string("op_4533_axis_0"), val = int32(1)]; tensor var_4533_cast_fp16_0, tensor var_4533_cast_fp16_1 = split(axis = var_4533_axis_0, split_sizes = tile_30, x = var_4530_cast_fp16)[name = string("op_4533_cast_fp16")]; tensor var_4540_begin_0 = const()[name = string("op_4540_begin_0"), val = tensor([15, 0, 0, 0])]; tensor var_4540_end_0 = const()[name = string("op_4540_end_0"), val = tensor([16, 2, 2048, 64])]; tensor var_4540_end_mask_0 = const()[name = string("op_4540_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_4540_cast_fp16 = slice_by_index(begin = var_4540_begin_0, end = var_4540_end_0, end_mask = var_4540_end_mask_0, x = coreml_update_state_79)[name = string("op_4540_cast_fp16")]; tensor tile_31 = const()[name = string("tile_31"), val = tensor([1, 1])]; int32 var_4543_axis_0 = const()[name = string("op_4543_axis_0"), val = int32(1)]; tensor var_4543_cast_fp16_0, tensor var_4543_cast_fp16_1 = split(axis = var_4543_axis_0, split_sizes = tile_31, x = var_4540_cast_fp16)[name = string("op_4543_cast_fp16")]; tensor var_4546_split_sizes_0 = const()[name = string("op_4546_split_sizes_0"), val = tensor([8, 8])]; int32 var_4546_axis_0 = const()[name = string("op_4546_axis_0"), val = int32(1)]; tensor var_4546_cast_fp16_0, tensor var_4546_cast_fp16_1 = split(axis = var_4546_axis_0, split_sizes = var_4546_split_sizes_0, x = query_states_63_cast_fp16)[name = string("op_4546_cast_fp16")]; bool attn_weights_241_transpose_x_0 = const()[name = string("attn_weights_241_transpose_x_0"), val = bool(false)]; bool attn_weights_241_transpose_y_0 = const()[name = string("attn_weights_241_transpose_y_0"), val = bool(false)]; tensor attn_weights_241_cast_fp16 = matmul(transpose_x = attn_weights_241_transpose_x_0, transpose_y = attn_weights_241_transpose_y_0, x = var_4533_cast_fp16_0, y = var_4546_cast_fp16_0)[name = string("attn_weights_241_cast_fp16")]; fp16 _inversed_attn_weights_243_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_243_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_243_cast_fp16 = mul(x = attn_weights_241_cast_fp16, y = _inversed_attn_weights_243_y_0_to_fp16)[name = string("_inversed_attn_weights_243_cast_fp16")]; tensor attn_weights_245_cast_fp16 = add(x = _inversed_attn_weights_243_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_245_cast_fp16")]; int32 var_4553 = const()[name = string("op_4553"), val = int32(2)]; tensor attn_weights_247_cast_fp16 = softmax(axis = var_4553, x = attn_weights_245_cast_fp16)[name = string("attn_weights_247_cast_fp16")]; bool var_4559_transpose_x_1 = const()[name = string("op_4559_transpose_x_1"), val = bool(true)]; bool var_4559_transpose_y_1 = const()[name = string("op_4559_transpose_y_1"), val = bool(false)]; tensor var_4559_cast_fp16 = matmul(transpose_x = var_4559_transpose_x_1, transpose_y = var_4559_transpose_y_1, x = attn_weights_247_cast_fp16, y = var_4543_cast_fp16_0)[name = string("op_4559_cast_fp16")]; bool attn_weights_249_transpose_x_0 = const()[name = string("attn_weights_249_transpose_x_0"), val = bool(false)]; bool attn_weights_249_transpose_y_0 = const()[name = string("attn_weights_249_transpose_y_0"), val = bool(false)]; tensor attn_weights_249_cast_fp16 = matmul(transpose_x = attn_weights_249_transpose_x_0, transpose_y = attn_weights_249_transpose_y_0, x = var_4533_cast_fp16_1, y = var_4546_cast_fp16_1)[name = string("attn_weights_249_cast_fp16")]; fp16 _inversed_attn_weights_251_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_251_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_251_cast_fp16 = mul(x = attn_weights_249_cast_fp16, y = _inversed_attn_weights_251_y_0_to_fp16)[name = string("_inversed_attn_weights_251_cast_fp16")]; tensor attn_weights_253_cast_fp16 = add(x = _inversed_attn_weights_251_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_253_cast_fp16")]; int32 var_4565 = const()[name = string("op_4565"), val = int32(2)]; tensor attn_weights_255_cast_fp16 = softmax(axis = var_4565, x = attn_weights_253_cast_fp16)[name = string("attn_weights_255_cast_fp16")]; bool attn_output_91_transpose_x_1 = const()[name = string("attn_output_91_transpose_x_1"), val = bool(true)]; bool attn_output_91_transpose_y_1 = const()[name = string("attn_output_91_transpose_y_1"), val = bool(false)]; tensor attn_output_91_cast_fp16 = matmul(transpose_x = attn_output_91_transpose_x_1, transpose_y = attn_output_91_transpose_y_1, x = attn_weights_255_cast_fp16, y = var_4543_cast_fp16_1)[name = string("attn_output_91_cast_fp16")]; int32 var_4573 = const()[name = string("op_4573"), val = int32(1)]; bool attn_output_93_interleave_0 = const()[name = string("attn_output_93_interleave_0"), val = bool(false)]; tensor attn_output_93_cast_fp16 = concat(axis = var_4573, interleave = attn_output_93_interleave_0, values = (var_4559_cast_fp16, attn_output_91_cast_fp16))[name = string("attn_output_93_cast_fp16")]; tensor var_4577_perm_0 = const()[name = string("op_4577_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_4582 = const()[name = string("op_4582"), val = tensor([1, 1024, 1, 1])]; tensor var_4577_cast_fp16 = transpose(perm = var_4577_perm_0, x = attn_output_93_cast_fp16)[name = string("transpose_24")]; tensor x_275_cast_fp16 = reshape(shape = var_4582, x = var_4577_cast_fp16)[name = string("x_275_cast_fp16")]; string hidden_states_93_pad_type_0 = const()[name = string("hidden_states_93_pad_type_0"), val = string("valid")]; tensor hidden_states_93_strides_0 = const()[name = string("hidden_states_93_strides_0"), val = tensor([1, 1])]; tensor hidden_states_93_pad_0 = const()[name = string("hidden_states_93_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_93_dilations_0 = const()[name = string("hidden_states_93_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_93_groups_0 = const()[name = string("hidden_states_93_groups_0"), val = int32(1)]; tensor var_4589_to_fp16 = const()[name = string("op_4589_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(459424768)))]; tensor hidden_states_93_cast_fp16 = conv(dilations = hidden_states_93_dilations_0, groups = hidden_states_93_groups_0, pad = hidden_states_93_pad_0, pad_type = hidden_states_93_pad_type_0, strides = hidden_states_93_strides_0, weight = var_4589_to_fp16, x = x_275_cast_fp16)[name = string("hidden_states_93_cast_fp16")]; tensor x_277_cast_fp16 = add(x = x_269_cast_fp16, y = hidden_states_93_cast_fp16)[name = string("x_277_cast_fp16")]; int32 var_4601 = const()[name = string("op_4601"), val = int32(1)]; fp16 const_163_promoted_to_fp16 = const()[name = string("const_163_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_4604_cast_fp16 = mul(x = x_277_cast_fp16, y = const_163_promoted_to_fp16)[name = string("op_4604_cast_fp16")]; bool x_279_interleave_0 = const()[name = string("x_279_interleave_0"), val = bool(false)]; tensor x_279_cast_fp16 = concat(axis = var_4601, interleave = x_279_interleave_0, values = (x_277_cast_fp16, var_4604_cast_fp16))[name = string("x_279_cast_fp16")]; tensor out_187_axes_0 = const()[name = string("out_187_axes_0"), val = tensor([1])]; fp16 var_4614_to_fp16 = const()[name = string("op_4614_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_187_cast_fp16 = layer_norm(axes = out_187_axes_0, epsilon = var_4614_to_fp16, x = x_279_cast_fp16)[name = string("out_187_cast_fp16")]; tensor layer_layers_15_post_attention_layernorm_weight_to_fp16 = const()[name = string("layer_layers_15_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(461521984)))]; tensor out_189_cast_fp16 = mul(x = out_187_cast_fp16, y = layer_layers_15_post_attention_layernorm_weight_to_fp16)[name = string("out_189_cast_fp16")]; tensor var_4620_split_sizes_0 = const()[name = string("op_4620_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_4620_axis_0 = const()[name = string("op_4620_axis_0"), val = int32(1)]; tensor var_4620_cast_fp16_0, tensor var_4620_cast_fp16_1 = split(axis = var_4620_axis_0, split_sizes = var_4620_split_sizes_0, x = out_189_cast_fp16)[name = string("op_4620_cast_fp16")]; string input_31_pad_type_0 = const()[name = string("input_31_pad_type_0"), val = string("valid")]; tensor input_31_strides_0 = const()[name = string("input_31_strides_0"), val = tensor([1, 1])]; tensor input_31_pad_0 = const()[name = string("input_31_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_31_dilations_0 = const()[name = string("input_31_dilations_0"), val = tensor([1, 1])]; int32 input_31_groups_0 = const()[name = string("input_31_groups_0"), val = int32(1)]; tensor var_4625_to_fp16 = const()[name = string("op_4625_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(461526144)))]; tensor input_31_cast_fp16 = conv(dilations = input_31_dilations_0, groups = input_31_groups_0, pad = input_31_pad_0, pad_type = input_31_pad_type_0, strides = input_31_strides_0, weight = var_4625_to_fp16, x = var_4620_cast_fp16_0)[name = string("input_31_cast_fp16")]; tensor var_4636_cast_fp16 = silu(x = input_31_cast_fp16)[name = string("op_4636_cast_fp16")]; string var_4641_pad_type_0 = const()[name = string("op_4641_pad_type_0"), val = string("valid")]; tensor var_4641_strides_0 = const()[name = string("op_4641_strides_0"), val = tensor([1, 1])]; tensor var_4641_pad_0 = const()[name = string("op_4641_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_4641_dilations_0 = const()[name = string("op_4641_dilations_0"), val = tensor([1, 1])]; int32 var_4641_groups_0 = const()[name = string("op_4641_groups_0"), val = int32(1)]; tensor var_4624_to_fp16 = const()[name = string("op_4624_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(469914816)))]; tensor var_4641_cast_fp16 = conv(dilations = var_4641_dilations_0, groups = var_4641_groups_0, pad = var_4641_pad_0, pad_type = var_4641_pad_type_0, strides = var_4641_strides_0, weight = var_4624_to_fp16, x = var_4620_cast_fp16_0)[name = string("op_4641_cast_fp16")]; tensor x_285_cast_fp16 = mul(x = var_4636_cast_fp16, y = var_4641_cast_fp16)[name = string("x_285_cast_fp16")]; string hidden_states_95_pad_type_0 = const()[name = string("hidden_states_95_pad_type_0"), val = string("valid")]; tensor hidden_states_95_strides_0 = const()[name = string("hidden_states_95_strides_0"), val = tensor([1, 1])]; tensor hidden_states_95_pad_0 = const()[name = string("hidden_states_95_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_95_dilations_0 = const()[name = string("hidden_states_95_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_95_groups_0 = const()[name = string("hidden_states_95_groups_0"), val = int32(1)]; tensor var_4623_to_fp16 = const()[name = string("op_4623_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(478303488)))]; tensor hidden_states_95_cast_fp16 = conv(dilations = hidden_states_95_dilations_0, groups = hidden_states_95_groups_0, pad = hidden_states_95_pad_0, pad_type = hidden_states_95_pad_type_0, strides = hidden_states_95_strides_0, weight = var_4623_to_fp16, x = x_285_cast_fp16)[name = string("hidden_states_95_cast_fp16")]; tensor x_287_cast_fp16 = add(x = x_277_cast_fp16, y = hidden_states_95_cast_fp16)[name = string("x_287_cast_fp16")]; int32 var_4654 = const()[name = string("op_4654"), val = int32(1)]; fp16 const_164_promoted_to_fp16 = const()[name = string("const_164_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_4657_cast_fp16 = mul(x = x_287_cast_fp16, y = const_164_promoted_to_fp16)[name = string("op_4657_cast_fp16")]; bool x_289_interleave_0 = const()[name = string("x_289_interleave_0"), val = bool(false)]; tensor x_289_cast_fp16 = concat(axis = var_4654, interleave = x_289_interleave_0, values = (x_287_cast_fp16, var_4657_cast_fp16))[name = string("x_289_cast_fp16")]; tensor out_193_axes_0 = const()[name = string("out_193_axes_0"), val = tensor([1])]; fp16 var_4667_to_fp16 = const()[name = string("op_4667_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_193_cast_fp16 = layer_norm(axes = out_193_axes_0, epsilon = var_4667_to_fp16, x = x_289_cast_fp16)[name = string("out_193_cast_fp16")]; tensor layer_layers_16_input_layernorm_weight_to_fp16 = const()[name = string("layer_layers_16_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(486692160)))]; tensor out_195_cast_fp16 = mul(x = out_193_cast_fp16, y = layer_layers_16_input_layernorm_weight_to_fp16)[name = string("out_195_cast_fp16")]; tensor var_4673_split_sizes_0 = const()[name = string("op_4673_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_4673_axis_0 = const()[name = string("op_4673_axis_0"), val = int32(1)]; tensor var_4673_cast_fp16_0, tensor var_4673_cast_fp16_1 = split(axis = var_4673_axis_0, split_sizes = var_4673_split_sizes_0, x = out_195_cast_fp16)[name = string("op_4673_cast_fp16")]; string query_states_65_pad_type_0 = const()[name = string("query_states_65_pad_type_0"), val = string("valid")]; tensor query_states_65_strides_0 = const()[name = string("query_states_65_strides_0"), val = tensor([1, 1])]; tensor query_states_65_pad_0 = const()[name = string("query_states_65_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_states_65_dilations_0 = const()[name = string("query_states_65_dilations_0"), val = tensor([1, 1])]; int32 query_states_65_groups_0 = const()[name = string("query_states_65_groups_0"), val = int32(1)]; tensor var_4695_to_fp16 = const()[name = string("op_4695_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(486696320)))]; tensor query_states_65_cast_fp16 = conv(dilations = query_states_65_dilations_0, groups = query_states_65_groups_0, pad = query_states_65_pad_0, pad_type = query_states_65_pad_type_0, strides = query_states_65_strides_0, weight = var_4695_to_fp16, x = var_4673_cast_fp16_0)[name = string("query_states_65_cast_fp16")]; string key_states_65_pad_type_0 = const()[name = string("key_states_65_pad_type_0"), val = string("valid")]; tensor key_states_65_strides_0 = const()[name = string("key_states_65_strides_0"), val = tensor([1, 1])]; tensor key_states_65_pad_0 = const()[name = string("key_states_65_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_states_65_dilations_0 = const()[name = string("key_states_65_dilations_0"), val = tensor([1, 1])]; int32 key_states_65_groups_0 = const()[name = string("key_states_65_groups_0"), val = int32(1)]; tensor var_4706_to_fp16 = const()[name = string("op_4706_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(488793536)))]; tensor key_states_65_cast_fp16 = conv(dilations = key_states_65_dilations_0, groups = key_states_65_groups_0, pad = key_states_65_pad_0, pad_type = key_states_65_pad_type_0, strides = key_states_65_strides_0, weight = var_4706_to_fp16, x = var_4673_cast_fp16_0)[name = string("key_states_65_cast_fp16")]; string value_states_65_pad_type_0 = const()[name = string("value_states_65_pad_type_0"), val = string("valid")]; tensor value_states_65_strides_0 = const()[name = string("value_states_65_strides_0"), val = tensor([1, 1])]; tensor value_states_65_pad_0 = const()[name = string("value_states_65_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_states_65_dilations_0 = const()[name = string("value_states_65_dilations_0"), val = tensor([1, 1])]; int32 value_states_65_groups_0 = const()[name = string("value_states_65_groups_0"), val = int32(1)]; tensor var_4717_to_fp16 = const()[name = string("op_4717_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(489055744)))]; tensor value_states_65_cast_fp16 = conv(dilations = value_states_65_dilations_0, groups = value_states_65_groups_0, pad = value_states_65_pad_0, pad_type = value_states_65_pad_type_0, strides = value_states_65_strides_0, weight = var_4717_to_fp16, x = var_4673_cast_fp16_0)[name = string("value_states_65_cast_fp16")]; tensor var_4725 = const()[name = string("op_4725"), val = tensor([1, 16, 64, 1])]; tensor embed_65_cast_fp16 = reshape(shape = var_4725, x = query_states_65_cast_fp16)[name = string("embed_65_cast_fp16")]; tensor var_4729 = const()[name = string("op_4729"), val = tensor([1, 2, 64, 1])]; tensor var_4730_cast_fp16 = reshape(shape = var_4729, x = key_states_65_cast_fp16)[name = string("op_4730_cast_fp16")]; tensor embed_67_perm_0 = const()[name = string("embed_67_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_4736 = const()[name = string("op_4736"), val = tensor([1, 2, 64, 1])]; tensor var_4737_cast_fp16 = reshape(shape = var_4736, x = value_states_65_cast_fp16)[name = string("op_4737_cast_fp16")]; tensor value_states_67_perm_0 = const()[name = string("value_states_67_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_4741_cast_fp16 = mul(x = embed_65_cast_fp16, y = cos_1_cast_fp16)[name = string("op_4741_cast_fp16")]; tensor var_4742_split_sizes_0 = const()[name = string("op_4742_split_sizes_0"), val = tensor([32, 32])]; int32 var_4742_axis_0 = const()[name = string("op_4742_axis_0"), val = int32(-2)]; tensor var_4742_cast_fp16_0, tensor var_4742_cast_fp16_1 = split(axis = var_4742_axis_0, split_sizes = var_4742_split_sizes_0, x = embed_65_cast_fp16)[name = string("op_4742_cast_fp16")]; fp16 const_167_promoted_to_fp16 = const()[name = string("const_167_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_4744_cast_fp16 = mul(x = var_4742_cast_fp16_1, y = const_167_promoted_to_fp16)[name = string("op_4744_cast_fp16")]; int32 var_4746 = const()[name = string("op_4746"), val = int32(-2)]; bool var_4747_interleave_0 = const()[name = string("op_4747_interleave_0"), val = bool(false)]; tensor var_4747_cast_fp16 = concat(axis = var_4746, interleave = var_4747_interleave_0, values = (var_4744_cast_fp16, var_4742_cast_fp16_0))[name = string("op_4747_cast_fp16")]; tensor var_4748_cast_fp16 = mul(x = var_4747_cast_fp16, y = sin_1_cast_fp16)[name = string("op_4748_cast_fp16")]; tensor query_states_67_cast_fp16 = add(x = var_4741_cast_fp16, y = var_4748_cast_fp16)[name = string("query_states_67_cast_fp16")]; tensor embed_67_cast_fp16 = transpose(perm = embed_67_perm_0, x = var_4730_cast_fp16)[name = string("transpose_23")]; tensor var_4751_cast_fp16 = mul(x = embed_67_cast_fp16, y = cos_cast_fp16)[name = string("op_4751_cast_fp16")]; tensor var_4752_split_sizes_0 = const()[name = string("op_4752_split_sizes_0"), val = tensor([32, 32])]; int32 var_4752_axis_0 = const()[name = string("op_4752_axis_0"), val = int32(-1)]; tensor var_4752_cast_fp16_0, tensor var_4752_cast_fp16_1 = split(axis = var_4752_axis_0, split_sizes = var_4752_split_sizes_0, x = embed_67_cast_fp16)[name = string("op_4752_cast_fp16")]; fp16 const_168_promoted_to_fp16 = const()[name = string("const_168_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_4754_cast_fp16 = mul(x = var_4752_cast_fp16_1, y = const_168_promoted_to_fp16)[name = string("op_4754_cast_fp16")]; int32 var_4756 = const()[name = string("op_4756"), val = int32(-1)]; bool var_4757_interleave_0 = const()[name = string("op_4757_interleave_0"), val = bool(false)]; tensor var_4757_cast_fp16 = concat(axis = var_4756, interleave = var_4757_interleave_0, values = (var_4754_cast_fp16, var_4752_cast_fp16_0))[name = string("op_4757_cast_fp16")]; tensor var_4758_cast_fp16 = mul(x = var_4757_cast_fp16, y = sin_cast_fp16)[name = string("op_4758_cast_fp16")]; tensor key_states_67_cast_fp16 = add(x = var_4751_cast_fp16, y = var_4758_cast_fp16)[name = string("key_states_67_cast_fp16")]; tensor expand_dims_161 = const()[name = string("expand_dims_161"), val = tensor([16])]; tensor expand_dims_162 = const()[name = string("expand_dims_162"), val = tensor([0])]; tensor expand_dims_164 = const()[name = string("expand_dims_164"), val = tensor([17])]; tensor concat_131_values3_0 = const()[name = string("concat_131_values3_0"), val = tensor([0])]; int32 concat_131_axis_0 = const()[name = string("concat_131_axis_0"), val = int32(0)]; bool concat_131_interleave_0 = const()[name = string("concat_131_interleave_0"), val = bool(false)]; tensor concat_131 = concat(axis = concat_131_axis_0, interleave = concat_131_interleave_0, values = (expand_dims_161, expand_dims_162, position_id, concat_131_values3_0))[name = string("concat_131")]; tensor concat_132_values1_0 = const()[name = string("concat_132_values1_0"), val = tensor([0])]; tensor concat_132_values3_0 = const()[name = string("concat_132_values3_0"), val = tensor([0])]; int32 concat_132_axis_0 = const()[name = string("concat_132_axis_0"), val = int32(0)]; bool concat_132_interleave_0 = const()[name = string("concat_132_interleave_0"), val = bool(false)]; tensor concat_132 = concat(axis = concat_132_axis_0, interleave = concat_132_interleave_0, values = (expand_dims_164, concat_132_values1_0, var_426, concat_132_values3_0))[name = string("concat_132")]; tensor key_cache_internal_tensor_assign_17_stride_0 = const()[name = string("key_cache_internal_tensor_assign_17_stride_0"), val = tensor([1, 1, 1, 1])]; tensor key_cache_internal_tensor_assign_17_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_17_begin_mask_0"), val = tensor([false, false, false, true])]; tensor key_cache_internal_tensor_assign_17_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_17_end_mask_0"), val = tensor([false, true, false, true])]; tensor key_cache_internal_tensor_assign_17_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_17_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor key_cache_internal_tensor_assign_17_cast_fp16 = slice_update(begin = concat_131, begin_mask = key_cache_internal_tensor_assign_17_begin_mask_0, end = concat_132, end_mask = key_cache_internal_tensor_assign_17_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_17_squeeze_mask_0, stride = key_cache_internal_tensor_assign_17_stride_0, update = key_states_67_cast_fp16, x = coreml_update_state_78)[name = string("key_cache_internal_tensor_assign_17_cast_fp16")]; write_state(data = key_cache_internal_tensor_assign_17_cast_fp16, input = key_cache)[name = string("coreml_update_state_198_write_state")]; tensor coreml_update_state_80 = read_state(input = key_cache)[name = string("coreml_update_state_198")]; tensor value_cache_internal_tensor_assign_17_stride_0 = const()[name = string("value_cache_internal_tensor_assign_17_stride_0"), val = tensor([1, 1, 1, 1])]; tensor value_cache_internal_tensor_assign_17_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_17_begin_mask_0"), val = tensor([false, false, false, true])]; tensor value_cache_internal_tensor_assign_17_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_17_end_mask_0"), val = tensor([false, true, false, true])]; tensor value_cache_internal_tensor_assign_17_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_17_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor value_states_67_cast_fp16 = transpose(perm = value_states_67_perm_0, x = var_4737_cast_fp16)[name = string("transpose_22")]; tensor value_cache_internal_tensor_assign_17_cast_fp16 = slice_update(begin = concat_131, begin_mask = value_cache_internal_tensor_assign_17_begin_mask_0, end = concat_132, end_mask = value_cache_internal_tensor_assign_17_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_17_squeeze_mask_0, stride = value_cache_internal_tensor_assign_17_stride_0, update = value_states_67_cast_fp16, x = coreml_update_state_79)[name = string("value_cache_internal_tensor_assign_17_cast_fp16")]; write_state(data = value_cache_internal_tensor_assign_17_cast_fp16, input = value_cache)[name = string("coreml_update_state_199_write_state")]; tensor coreml_update_state_81 = read_state(input = value_cache)[name = string("coreml_update_state_199")]; tensor var_4801_begin_0 = const()[name = string("op_4801_begin_0"), val = tensor([16, 0, 0, 0])]; tensor var_4801_end_0 = const()[name = string("op_4801_end_0"), val = tensor([17, 2, 2048, 64])]; tensor var_4801_end_mask_0 = const()[name = string("op_4801_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_4801_cast_fp16 = slice_by_index(begin = var_4801_begin_0, end = var_4801_end_0, end_mask = var_4801_end_mask_0, x = coreml_update_state_80)[name = string("op_4801_cast_fp16")]; tensor tile_32 = const()[name = string("tile_32"), val = tensor([1, 1])]; int32 var_4804_axis_0 = const()[name = string("op_4804_axis_0"), val = int32(1)]; tensor var_4804_cast_fp16_0, tensor var_4804_cast_fp16_1 = split(axis = var_4804_axis_0, split_sizes = tile_32, x = var_4801_cast_fp16)[name = string("op_4804_cast_fp16")]; tensor var_4811_begin_0 = const()[name = string("op_4811_begin_0"), val = tensor([16, 0, 0, 0])]; tensor var_4811_end_0 = const()[name = string("op_4811_end_0"), val = tensor([17, 2, 2048, 64])]; tensor var_4811_end_mask_0 = const()[name = string("op_4811_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_4811_cast_fp16 = slice_by_index(begin = var_4811_begin_0, end = var_4811_end_0, end_mask = var_4811_end_mask_0, x = coreml_update_state_81)[name = string("op_4811_cast_fp16")]; tensor tile_33 = const()[name = string("tile_33"), val = tensor([1, 1])]; int32 var_4814_axis_0 = const()[name = string("op_4814_axis_0"), val = int32(1)]; tensor var_4814_cast_fp16_0, tensor var_4814_cast_fp16_1 = split(axis = var_4814_axis_0, split_sizes = tile_33, x = var_4811_cast_fp16)[name = string("op_4814_cast_fp16")]; tensor var_4817_split_sizes_0 = const()[name = string("op_4817_split_sizes_0"), val = tensor([8, 8])]; int32 var_4817_axis_0 = const()[name = string("op_4817_axis_0"), val = int32(1)]; tensor var_4817_cast_fp16_0, tensor var_4817_cast_fp16_1 = split(axis = var_4817_axis_0, split_sizes = var_4817_split_sizes_0, x = query_states_67_cast_fp16)[name = string("op_4817_cast_fp16")]; bool attn_weights_257_transpose_x_0 = const()[name = string("attn_weights_257_transpose_x_0"), val = bool(false)]; bool attn_weights_257_transpose_y_0 = const()[name = string("attn_weights_257_transpose_y_0"), val = bool(false)]; tensor attn_weights_257_cast_fp16 = matmul(transpose_x = attn_weights_257_transpose_x_0, transpose_y = attn_weights_257_transpose_y_0, x = var_4804_cast_fp16_0, y = var_4817_cast_fp16_0)[name = string("attn_weights_257_cast_fp16")]; fp16 _inversed_attn_weights_259_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_259_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_259_cast_fp16 = mul(x = attn_weights_257_cast_fp16, y = _inversed_attn_weights_259_y_0_to_fp16)[name = string("_inversed_attn_weights_259_cast_fp16")]; tensor attn_weights_261_cast_fp16 = add(x = _inversed_attn_weights_259_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_261_cast_fp16")]; int32 var_4824 = const()[name = string("op_4824"), val = int32(2)]; tensor attn_weights_263_cast_fp16 = softmax(axis = var_4824, x = attn_weights_261_cast_fp16)[name = string("attn_weights_263_cast_fp16")]; bool var_4830_transpose_x_1 = const()[name = string("op_4830_transpose_x_1"), val = bool(true)]; bool var_4830_transpose_y_1 = const()[name = string("op_4830_transpose_y_1"), val = bool(false)]; tensor var_4830_cast_fp16 = matmul(transpose_x = var_4830_transpose_x_1, transpose_y = var_4830_transpose_y_1, x = attn_weights_263_cast_fp16, y = var_4814_cast_fp16_0)[name = string("op_4830_cast_fp16")]; bool attn_weights_265_transpose_x_0 = const()[name = string("attn_weights_265_transpose_x_0"), val = bool(false)]; bool attn_weights_265_transpose_y_0 = const()[name = string("attn_weights_265_transpose_y_0"), val = bool(false)]; tensor attn_weights_265_cast_fp16 = matmul(transpose_x = attn_weights_265_transpose_x_0, transpose_y = attn_weights_265_transpose_y_0, x = var_4804_cast_fp16_1, y = var_4817_cast_fp16_1)[name = string("attn_weights_265_cast_fp16")]; fp16 _inversed_attn_weights_267_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_267_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_267_cast_fp16 = mul(x = attn_weights_265_cast_fp16, y = _inversed_attn_weights_267_y_0_to_fp16)[name = string("_inversed_attn_weights_267_cast_fp16")]; tensor attn_weights_269_cast_fp16 = add(x = _inversed_attn_weights_267_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_269_cast_fp16")]; int32 var_4836 = const()[name = string("op_4836"), val = int32(2)]; tensor attn_weights_271_cast_fp16 = softmax(axis = var_4836, x = attn_weights_269_cast_fp16)[name = string("attn_weights_271_cast_fp16")]; bool attn_output_97_transpose_x_1 = const()[name = string("attn_output_97_transpose_x_1"), val = bool(true)]; bool attn_output_97_transpose_y_1 = const()[name = string("attn_output_97_transpose_y_1"), val = bool(false)]; tensor attn_output_97_cast_fp16 = matmul(transpose_x = attn_output_97_transpose_x_1, transpose_y = attn_output_97_transpose_y_1, x = attn_weights_271_cast_fp16, y = var_4814_cast_fp16_1)[name = string("attn_output_97_cast_fp16")]; int32 var_4844 = const()[name = string("op_4844"), val = int32(1)]; bool attn_output_99_interleave_0 = const()[name = string("attn_output_99_interleave_0"), val = bool(false)]; tensor attn_output_99_cast_fp16 = concat(axis = var_4844, interleave = attn_output_99_interleave_0, values = (var_4830_cast_fp16, attn_output_97_cast_fp16))[name = string("attn_output_99_cast_fp16")]; tensor var_4848_perm_0 = const()[name = string("op_4848_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_4853 = const()[name = string("op_4853"), val = tensor([1, 1024, 1, 1])]; tensor var_4848_cast_fp16 = transpose(perm = var_4848_perm_0, x = attn_output_99_cast_fp16)[name = string("transpose_21")]; tensor x_293_cast_fp16 = reshape(shape = var_4853, x = var_4848_cast_fp16)[name = string("x_293_cast_fp16")]; string hidden_states_99_pad_type_0 = const()[name = string("hidden_states_99_pad_type_0"), val = string("valid")]; tensor hidden_states_99_strides_0 = const()[name = string("hidden_states_99_strides_0"), val = tensor([1, 1])]; tensor hidden_states_99_pad_0 = const()[name = string("hidden_states_99_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_99_dilations_0 = const()[name = string("hidden_states_99_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_99_groups_0 = const()[name = string("hidden_states_99_groups_0"), val = int32(1)]; tensor var_4860_to_fp16 = const()[name = string("op_4860_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(489317952)))]; tensor hidden_states_99_cast_fp16 = conv(dilations = hidden_states_99_dilations_0, groups = hidden_states_99_groups_0, pad = hidden_states_99_pad_0, pad_type = hidden_states_99_pad_type_0, strides = hidden_states_99_strides_0, weight = var_4860_to_fp16, x = x_293_cast_fp16)[name = string("hidden_states_99_cast_fp16")]; tensor x_295_cast_fp16 = add(x = x_287_cast_fp16, y = hidden_states_99_cast_fp16)[name = string("x_295_cast_fp16")]; int32 var_4872 = const()[name = string("op_4872"), val = int32(1)]; fp16 const_173_promoted_to_fp16 = const()[name = string("const_173_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_4875_cast_fp16 = mul(x = x_295_cast_fp16, y = const_173_promoted_to_fp16)[name = string("op_4875_cast_fp16")]; bool x_297_interleave_0 = const()[name = string("x_297_interleave_0"), val = bool(false)]; tensor x_297_cast_fp16 = concat(axis = var_4872, interleave = x_297_interleave_0, values = (x_295_cast_fp16, var_4875_cast_fp16))[name = string("x_297_cast_fp16")]; tensor out_199_axes_0 = const()[name = string("out_199_axes_0"), val = tensor([1])]; fp16 var_4885_to_fp16 = const()[name = string("op_4885_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_199_cast_fp16 = layer_norm(axes = out_199_axes_0, epsilon = var_4885_to_fp16, x = x_297_cast_fp16)[name = string("out_199_cast_fp16")]; tensor layer_layers_16_post_attention_layernorm_weight_to_fp16 = const()[name = string("layer_layers_16_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(491415168)))]; tensor out_201_cast_fp16 = mul(x = out_199_cast_fp16, y = layer_layers_16_post_attention_layernorm_weight_to_fp16)[name = string("out_201_cast_fp16")]; tensor var_4891_split_sizes_0 = const()[name = string("op_4891_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_4891_axis_0 = const()[name = string("op_4891_axis_0"), val = int32(1)]; tensor var_4891_cast_fp16_0, tensor var_4891_cast_fp16_1 = split(axis = var_4891_axis_0, split_sizes = var_4891_split_sizes_0, x = out_201_cast_fp16)[name = string("op_4891_cast_fp16")]; string input_33_pad_type_0 = const()[name = string("input_33_pad_type_0"), val = string("valid")]; tensor input_33_strides_0 = const()[name = string("input_33_strides_0"), val = tensor([1, 1])]; tensor input_33_pad_0 = const()[name = string("input_33_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_33_dilations_0 = const()[name = string("input_33_dilations_0"), val = tensor([1, 1])]; int32 input_33_groups_0 = const()[name = string("input_33_groups_0"), val = int32(1)]; tensor var_4896_to_fp16 = const()[name = string("op_4896_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(491419328)))]; tensor input_33_cast_fp16 = conv(dilations = input_33_dilations_0, groups = input_33_groups_0, pad = input_33_pad_0, pad_type = input_33_pad_type_0, strides = input_33_strides_0, weight = var_4896_to_fp16, x = var_4891_cast_fp16_0)[name = string("input_33_cast_fp16")]; tensor var_4907_cast_fp16 = silu(x = input_33_cast_fp16)[name = string("op_4907_cast_fp16")]; string var_4912_pad_type_0 = const()[name = string("op_4912_pad_type_0"), val = string("valid")]; tensor var_4912_strides_0 = const()[name = string("op_4912_strides_0"), val = tensor([1, 1])]; tensor var_4912_pad_0 = const()[name = string("op_4912_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_4912_dilations_0 = const()[name = string("op_4912_dilations_0"), val = tensor([1, 1])]; int32 var_4912_groups_0 = const()[name = string("op_4912_groups_0"), val = int32(1)]; tensor var_4895_to_fp16 = const()[name = string("op_4895_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(499808000)))]; tensor var_4912_cast_fp16 = conv(dilations = var_4912_dilations_0, groups = var_4912_groups_0, pad = var_4912_pad_0, pad_type = var_4912_pad_type_0, strides = var_4912_strides_0, weight = var_4895_to_fp16, x = var_4891_cast_fp16_0)[name = string("op_4912_cast_fp16")]; tensor x_303_cast_fp16 = mul(x = var_4907_cast_fp16, y = var_4912_cast_fp16)[name = string("x_303_cast_fp16")]; string hidden_states_101_pad_type_0 = const()[name = string("hidden_states_101_pad_type_0"), val = string("valid")]; tensor hidden_states_101_strides_0 = const()[name = string("hidden_states_101_strides_0"), val = tensor([1, 1])]; tensor hidden_states_101_pad_0 = const()[name = string("hidden_states_101_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_101_dilations_0 = const()[name = string("hidden_states_101_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_101_groups_0 = const()[name = string("hidden_states_101_groups_0"), val = int32(1)]; tensor var_4894_to_fp16 = const()[name = string("op_4894_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(508196672)))]; tensor hidden_states_101_cast_fp16 = conv(dilations = hidden_states_101_dilations_0, groups = hidden_states_101_groups_0, pad = hidden_states_101_pad_0, pad_type = hidden_states_101_pad_type_0, strides = hidden_states_101_strides_0, weight = var_4894_to_fp16, x = x_303_cast_fp16)[name = string("hidden_states_101_cast_fp16")]; tensor x_305_cast_fp16 = add(x = x_295_cast_fp16, y = hidden_states_101_cast_fp16)[name = string("x_305_cast_fp16")]; int32 var_4925 = const()[name = string("op_4925"), val = int32(1)]; fp16 const_174_promoted_to_fp16 = const()[name = string("const_174_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_4928_cast_fp16 = mul(x = x_305_cast_fp16, y = const_174_promoted_to_fp16)[name = string("op_4928_cast_fp16")]; bool x_307_interleave_0 = const()[name = string("x_307_interleave_0"), val = bool(false)]; tensor x_307_cast_fp16 = concat(axis = var_4925, interleave = x_307_interleave_0, values = (x_305_cast_fp16, var_4928_cast_fp16))[name = string("x_307_cast_fp16")]; tensor out_205_axes_0 = const()[name = string("out_205_axes_0"), val = tensor([1])]; fp16 var_4938_to_fp16 = const()[name = string("op_4938_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_205_cast_fp16 = layer_norm(axes = out_205_axes_0, epsilon = var_4938_to_fp16, x = x_307_cast_fp16)[name = string("out_205_cast_fp16")]; tensor layer_layers_17_input_layernorm_weight_to_fp16 = const()[name = string("layer_layers_17_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(516585344)))]; tensor out_207_cast_fp16 = mul(x = out_205_cast_fp16, y = layer_layers_17_input_layernorm_weight_to_fp16)[name = string("out_207_cast_fp16")]; tensor var_4944_split_sizes_0 = const()[name = string("op_4944_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_4944_axis_0 = const()[name = string("op_4944_axis_0"), val = int32(1)]; tensor var_4944_cast_fp16_0, tensor var_4944_cast_fp16_1 = split(axis = var_4944_axis_0, split_sizes = var_4944_split_sizes_0, x = out_207_cast_fp16)[name = string("op_4944_cast_fp16")]; string query_states_69_pad_type_0 = const()[name = string("query_states_69_pad_type_0"), val = string("valid")]; tensor query_states_69_strides_0 = const()[name = string("query_states_69_strides_0"), val = tensor([1, 1])]; tensor query_states_69_pad_0 = const()[name = string("query_states_69_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_states_69_dilations_0 = const()[name = string("query_states_69_dilations_0"), val = tensor([1, 1])]; int32 query_states_69_groups_0 = const()[name = string("query_states_69_groups_0"), val = int32(1)]; tensor var_4966_to_fp16 = const()[name = string("op_4966_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(516589504)))]; tensor query_states_69_cast_fp16 = conv(dilations = query_states_69_dilations_0, groups = query_states_69_groups_0, pad = query_states_69_pad_0, pad_type = query_states_69_pad_type_0, strides = query_states_69_strides_0, weight = var_4966_to_fp16, x = var_4944_cast_fp16_0)[name = string("query_states_69_cast_fp16")]; string key_states_69_pad_type_0 = const()[name = string("key_states_69_pad_type_0"), val = string("valid")]; tensor key_states_69_strides_0 = const()[name = string("key_states_69_strides_0"), val = tensor([1, 1])]; tensor key_states_69_pad_0 = const()[name = string("key_states_69_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_states_69_dilations_0 = const()[name = string("key_states_69_dilations_0"), val = tensor([1, 1])]; int32 key_states_69_groups_0 = const()[name = string("key_states_69_groups_0"), val = int32(1)]; tensor var_4977_to_fp16 = const()[name = string("op_4977_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(518686720)))]; tensor key_states_69_cast_fp16 = conv(dilations = key_states_69_dilations_0, groups = key_states_69_groups_0, pad = key_states_69_pad_0, pad_type = key_states_69_pad_type_0, strides = key_states_69_strides_0, weight = var_4977_to_fp16, x = var_4944_cast_fp16_0)[name = string("key_states_69_cast_fp16")]; string value_states_69_pad_type_0 = const()[name = string("value_states_69_pad_type_0"), val = string("valid")]; tensor value_states_69_strides_0 = const()[name = string("value_states_69_strides_0"), val = tensor([1, 1])]; tensor value_states_69_pad_0 = const()[name = string("value_states_69_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_states_69_dilations_0 = const()[name = string("value_states_69_dilations_0"), val = tensor([1, 1])]; int32 value_states_69_groups_0 = const()[name = string("value_states_69_groups_0"), val = int32(1)]; tensor var_4988_to_fp16 = const()[name = string("op_4988_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(518948928)))]; tensor value_states_69_cast_fp16 = conv(dilations = value_states_69_dilations_0, groups = value_states_69_groups_0, pad = value_states_69_pad_0, pad_type = value_states_69_pad_type_0, strides = value_states_69_strides_0, weight = var_4988_to_fp16, x = var_4944_cast_fp16_0)[name = string("value_states_69_cast_fp16")]; tensor var_4996 = const()[name = string("op_4996"), val = tensor([1, 16, 64, 1])]; tensor embed_69_cast_fp16 = reshape(shape = var_4996, x = query_states_69_cast_fp16)[name = string("embed_69_cast_fp16")]; tensor var_5000 = const()[name = string("op_5000"), val = tensor([1, 2, 64, 1])]; tensor var_5001_cast_fp16 = reshape(shape = var_5000, x = key_states_69_cast_fp16)[name = string("op_5001_cast_fp16")]; tensor embed_71_perm_0 = const()[name = string("embed_71_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_5007 = const()[name = string("op_5007"), val = tensor([1, 2, 64, 1])]; tensor var_5008_cast_fp16 = reshape(shape = var_5007, x = value_states_69_cast_fp16)[name = string("op_5008_cast_fp16")]; tensor value_states_71_perm_0 = const()[name = string("value_states_71_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_5012_cast_fp16 = mul(x = embed_69_cast_fp16, y = cos_1_cast_fp16)[name = string("op_5012_cast_fp16")]; tensor var_5013_split_sizes_0 = const()[name = string("op_5013_split_sizes_0"), val = tensor([32, 32])]; int32 var_5013_axis_0 = const()[name = string("op_5013_axis_0"), val = int32(-2)]; tensor var_5013_cast_fp16_0, tensor var_5013_cast_fp16_1 = split(axis = var_5013_axis_0, split_sizes = var_5013_split_sizes_0, x = embed_69_cast_fp16)[name = string("op_5013_cast_fp16")]; fp16 const_177_promoted_to_fp16 = const()[name = string("const_177_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_5015_cast_fp16 = mul(x = var_5013_cast_fp16_1, y = const_177_promoted_to_fp16)[name = string("op_5015_cast_fp16")]; int32 var_5017 = const()[name = string("op_5017"), val = int32(-2)]; bool var_5018_interleave_0 = const()[name = string("op_5018_interleave_0"), val = bool(false)]; tensor var_5018_cast_fp16 = concat(axis = var_5017, interleave = var_5018_interleave_0, values = (var_5015_cast_fp16, var_5013_cast_fp16_0))[name = string("op_5018_cast_fp16")]; tensor var_5019_cast_fp16 = mul(x = var_5018_cast_fp16, y = sin_1_cast_fp16)[name = string("op_5019_cast_fp16")]; tensor query_states_71_cast_fp16 = add(x = var_5012_cast_fp16, y = var_5019_cast_fp16)[name = string("query_states_71_cast_fp16")]; tensor embed_71_cast_fp16 = transpose(perm = embed_71_perm_0, x = var_5001_cast_fp16)[name = string("transpose_20")]; tensor var_5022_cast_fp16 = mul(x = embed_71_cast_fp16, y = cos_cast_fp16)[name = string("op_5022_cast_fp16")]; tensor var_5023_split_sizes_0 = const()[name = string("op_5023_split_sizes_0"), val = tensor([32, 32])]; int32 var_5023_axis_0 = const()[name = string("op_5023_axis_0"), val = int32(-1)]; tensor var_5023_cast_fp16_0, tensor var_5023_cast_fp16_1 = split(axis = var_5023_axis_0, split_sizes = var_5023_split_sizes_0, x = embed_71_cast_fp16)[name = string("op_5023_cast_fp16")]; fp16 const_178_promoted_to_fp16 = const()[name = string("const_178_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_5025_cast_fp16 = mul(x = var_5023_cast_fp16_1, y = const_178_promoted_to_fp16)[name = string("op_5025_cast_fp16")]; int32 var_5027 = const()[name = string("op_5027"), val = int32(-1)]; bool var_5028_interleave_0 = const()[name = string("op_5028_interleave_0"), val = bool(false)]; tensor var_5028_cast_fp16 = concat(axis = var_5027, interleave = var_5028_interleave_0, values = (var_5025_cast_fp16, var_5023_cast_fp16_0))[name = string("op_5028_cast_fp16")]; tensor var_5029_cast_fp16 = mul(x = var_5028_cast_fp16, y = sin_cast_fp16)[name = string("op_5029_cast_fp16")]; tensor key_states_71_cast_fp16 = add(x = var_5022_cast_fp16, y = var_5029_cast_fp16)[name = string("key_states_71_cast_fp16")]; tensor expand_dims_171 = const()[name = string("expand_dims_171"), val = tensor([17])]; tensor expand_dims_172 = const()[name = string("expand_dims_172"), val = tensor([0])]; tensor expand_dims_174 = const()[name = string("expand_dims_174"), val = tensor([18])]; tensor concat_139_values3_0 = const()[name = string("concat_139_values3_0"), val = tensor([0])]; int32 concat_139_axis_0 = const()[name = string("concat_139_axis_0"), val = int32(0)]; bool concat_139_interleave_0 = const()[name = string("concat_139_interleave_0"), val = bool(false)]; tensor concat_139 = concat(axis = concat_139_axis_0, interleave = concat_139_interleave_0, values = (expand_dims_171, expand_dims_172, position_id, concat_139_values3_0))[name = string("concat_139")]; tensor concat_140_values1_0 = const()[name = string("concat_140_values1_0"), val = tensor([0])]; tensor concat_140_values3_0 = const()[name = string("concat_140_values3_0"), val = tensor([0])]; int32 concat_140_axis_0 = const()[name = string("concat_140_axis_0"), val = int32(0)]; bool concat_140_interleave_0 = const()[name = string("concat_140_interleave_0"), val = bool(false)]; tensor concat_140 = concat(axis = concat_140_axis_0, interleave = concat_140_interleave_0, values = (expand_dims_174, concat_140_values1_0, var_426, concat_140_values3_0))[name = string("concat_140")]; tensor key_cache_internal_tensor_assign_18_stride_0 = const()[name = string("key_cache_internal_tensor_assign_18_stride_0"), val = tensor([1, 1, 1, 1])]; tensor key_cache_internal_tensor_assign_18_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_18_begin_mask_0"), val = tensor([false, false, false, true])]; tensor key_cache_internal_tensor_assign_18_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_18_end_mask_0"), val = tensor([false, true, false, true])]; tensor key_cache_internal_tensor_assign_18_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_18_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor key_cache_internal_tensor_assign_18_cast_fp16 = slice_update(begin = concat_139, begin_mask = key_cache_internal_tensor_assign_18_begin_mask_0, end = concat_140, end_mask = key_cache_internal_tensor_assign_18_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_18_squeeze_mask_0, stride = key_cache_internal_tensor_assign_18_stride_0, update = key_states_71_cast_fp16, x = coreml_update_state_80)[name = string("key_cache_internal_tensor_assign_18_cast_fp16")]; write_state(data = key_cache_internal_tensor_assign_18_cast_fp16, input = key_cache)[name = string("coreml_update_state_200_write_state")]; tensor coreml_update_state_82 = read_state(input = key_cache)[name = string("coreml_update_state_200")]; tensor value_cache_internal_tensor_assign_18_stride_0 = const()[name = string("value_cache_internal_tensor_assign_18_stride_0"), val = tensor([1, 1, 1, 1])]; tensor value_cache_internal_tensor_assign_18_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_18_begin_mask_0"), val = tensor([false, false, false, true])]; tensor value_cache_internal_tensor_assign_18_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_18_end_mask_0"), val = tensor([false, true, false, true])]; tensor value_cache_internal_tensor_assign_18_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_18_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor value_states_71_cast_fp16 = transpose(perm = value_states_71_perm_0, x = var_5008_cast_fp16)[name = string("transpose_19")]; tensor value_cache_internal_tensor_assign_18_cast_fp16 = slice_update(begin = concat_139, begin_mask = value_cache_internal_tensor_assign_18_begin_mask_0, end = concat_140, end_mask = value_cache_internal_tensor_assign_18_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_18_squeeze_mask_0, stride = value_cache_internal_tensor_assign_18_stride_0, update = value_states_71_cast_fp16, x = coreml_update_state_81)[name = string("value_cache_internal_tensor_assign_18_cast_fp16")]; write_state(data = value_cache_internal_tensor_assign_18_cast_fp16, input = value_cache)[name = string("coreml_update_state_201_write_state")]; tensor coreml_update_state_83 = read_state(input = value_cache)[name = string("coreml_update_state_201")]; tensor var_5072_begin_0 = const()[name = string("op_5072_begin_0"), val = tensor([17, 0, 0, 0])]; tensor var_5072_end_0 = const()[name = string("op_5072_end_0"), val = tensor([18, 2, 2048, 64])]; tensor var_5072_end_mask_0 = const()[name = string("op_5072_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_5072_cast_fp16 = slice_by_index(begin = var_5072_begin_0, end = var_5072_end_0, end_mask = var_5072_end_mask_0, x = coreml_update_state_82)[name = string("op_5072_cast_fp16")]; tensor tile_34 = const()[name = string("tile_34"), val = tensor([1, 1])]; int32 var_5075_axis_0 = const()[name = string("op_5075_axis_0"), val = int32(1)]; tensor var_5075_cast_fp16_0, tensor var_5075_cast_fp16_1 = split(axis = var_5075_axis_0, split_sizes = tile_34, x = var_5072_cast_fp16)[name = string("op_5075_cast_fp16")]; tensor var_5082_begin_0 = const()[name = string("op_5082_begin_0"), val = tensor([17, 0, 0, 0])]; tensor var_5082_end_0 = const()[name = string("op_5082_end_0"), val = tensor([18, 2, 2048, 64])]; tensor var_5082_end_mask_0 = const()[name = string("op_5082_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_5082_cast_fp16 = slice_by_index(begin = var_5082_begin_0, end = var_5082_end_0, end_mask = var_5082_end_mask_0, x = coreml_update_state_83)[name = string("op_5082_cast_fp16")]; tensor tile_35 = const()[name = string("tile_35"), val = tensor([1, 1])]; int32 var_5085_axis_0 = const()[name = string("op_5085_axis_0"), val = int32(1)]; tensor var_5085_cast_fp16_0, tensor var_5085_cast_fp16_1 = split(axis = var_5085_axis_0, split_sizes = tile_35, x = var_5082_cast_fp16)[name = string("op_5085_cast_fp16")]; tensor var_5088_split_sizes_0 = const()[name = string("op_5088_split_sizes_0"), val = tensor([8, 8])]; int32 var_5088_axis_0 = const()[name = string("op_5088_axis_0"), val = int32(1)]; tensor var_5088_cast_fp16_0, tensor var_5088_cast_fp16_1 = split(axis = var_5088_axis_0, split_sizes = var_5088_split_sizes_0, x = query_states_71_cast_fp16)[name = string("op_5088_cast_fp16")]; bool attn_weights_273_transpose_x_0 = const()[name = string("attn_weights_273_transpose_x_0"), val = bool(false)]; bool attn_weights_273_transpose_y_0 = const()[name = string("attn_weights_273_transpose_y_0"), val = bool(false)]; tensor attn_weights_273_cast_fp16 = matmul(transpose_x = attn_weights_273_transpose_x_0, transpose_y = attn_weights_273_transpose_y_0, x = var_5075_cast_fp16_0, y = var_5088_cast_fp16_0)[name = string("attn_weights_273_cast_fp16")]; fp16 _inversed_attn_weights_275_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_275_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_275_cast_fp16 = mul(x = attn_weights_273_cast_fp16, y = _inversed_attn_weights_275_y_0_to_fp16)[name = string("_inversed_attn_weights_275_cast_fp16")]; tensor attn_weights_277_cast_fp16 = add(x = _inversed_attn_weights_275_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_277_cast_fp16")]; int32 var_5095 = const()[name = string("op_5095"), val = int32(2)]; tensor attn_weights_279_cast_fp16 = softmax(axis = var_5095, x = attn_weights_277_cast_fp16)[name = string("attn_weights_279_cast_fp16")]; bool var_5101_transpose_x_1 = const()[name = string("op_5101_transpose_x_1"), val = bool(true)]; bool var_5101_transpose_y_1 = const()[name = string("op_5101_transpose_y_1"), val = bool(false)]; tensor var_5101_cast_fp16 = matmul(transpose_x = var_5101_transpose_x_1, transpose_y = var_5101_transpose_y_1, x = attn_weights_279_cast_fp16, y = var_5085_cast_fp16_0)[name = string("op_5101_cast_fp16")]; bool attn_weights_281_transpose_x_0 = const()[name = string("attn_weights_281_transpose_x_0"), val = bool(false)]; bool attn_weights_281_transpose_y_0 = const()[name = string("attn_weights_281_transpose_y_0"), val = bool(false)]; tensor attn_weights_281_cast_fp16 = matmul(transpose_x = attn_weights_281_transpose_x_0, transpose_y = attn_weights_281_transpose_y_0, x = var_5075_cast_fp16_1, y = var_5088_cast_fp16_1)[name = string("attn_weights_281_cast_fp16")]; fp16 _inversed_attn_weights_283_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_283_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_283_cast_fp16 = mul(x = attn_weights_281_cast_fp16, y = _inversed_attn_weights_283_y_0_to_fp16)[name = string("_inversed_attn_weights_283_cast_fp16")]; tensor attn_weights_285_cast_fp16 = add(x = _inversed_attn_weights_283_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_285_cast_fp16")]; int32 var_5107 = const()[name = string("op_5107"), val = int32(2)]; tensor attn_weights_287_cast_fp16 = softmax(axis = var_5107, x = attn_weights_285_cast_fp16)[name = string("attn_weights_287_cast_fp16")]; bool attn_output_103_transpose_x_1 = const()[name = string("attn_output_103_transpose_x_1"), val = bool(true)]; bool attn_output_103_transpose_y_1 = const()[name = string("attn_output_103_transpose_y_1"), val = bool(false)]; tensor attn_output_103_cast_fp16 = matmul(transpose_x = attn_output_103_transpose_x_1, transpose_y = attn_output_103_transpose_y_1, x = attn_weights_287_cast_fp16, y = var_5085_cast_fp16_1)[name = string("attn_output_103_cast_fp16")]; int32 var_5115 = const()[name = string("op_5115"), val = int32(1)]; bool attn_output_105_interleave_0 = const()[name = string("attn_output_105_interleave_0"), val = bool(false)]; tensor attn_output_105_cast_fp16 = concat(axis = var_5115, interleave = attn_output_105_interleave_0, values = (var_5101_cast_fp16, attn_output_103_cast_fp16))[name = string("attn_output_105_cast_fp16")]; tensor var_5119_perm_0 = const()[name = string("op_5119_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_5124 = const()[name = string("op_5124"), val = tensor([1, 1024, 1, 1])]; tensor var_5119_cast_fp16 = transpose(perm = var_5119_perm_0, x = attn_output_105_cast_fp16)[name = string("transpose_18")]; tensor x_311_cast_fp16 = reshape(shape = var_5124, x = var_5119_cast_fp16)[name = string("x_311_cast_fp16")]; string hidden_states_105_pad_type_0 = const()[name = string("hidden_states_105_pad_type_0"), val = string("valid")]; tensor hidden_states_105_strides_0 = const()[name = string("hidden_states_105_strides_0"), val = tensor([1, 1])]; tensor hidden_states_105_pad_0 = const()[name = string("hidden_states_105_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_105_dilations_0 = const()[name = string("hidden_states_105_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_105_groups_0 = const()[name = string("hidden_states_105_groups_0"), val = int32(1)]; tensor var_5131_to_fp16 = const()[name = string("op_5131_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(519211136)))]; tensor hidden_states_105_cast_fp16 = conv(dilations = hidden_states_105_dilations_0, groups = hidden_states_105_groups_0, pad = hidden_states_105_pad_0, pad_type = hidden_states_105_pad_type_0, strides = hidden_states_105_strides_0, weight = var_5131_to_fp16, x = x_311_cast_fp16)[name = string("hidden_states_105_cast_fp16")]; tensor x_313_cast_fp16 = add(x = x_305_cast_fp16, y = hidden_states_105_cast_fp16)[name = string("x_313_cast_fp16")]; int32 var_5143 = const()[name = string("op_5143"), val = int32(1)]; fp16 const_183_promoted_to_fp16 = const()[name = string("const_183_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_5146_cast_fp16 = mul(x = x_313_cast_fp16, y = const_183_promoted_to_fp16)[name = string("op_5146_cast_fp16")]; bool x_315_interleave_0 = const()[name = string("x_315_interleave_0"), val = bool(false)]; tensor x_315_cast_fp16 = concat(axis = var_5143, interleave = x_315_interleave_0, values = (x_313_cast_fp16, var_5146_cast_fp16))[name = string("x_315_cast_fp16")]; tensor out_211_axes_0 = const()[name = string("out_211_axes_0"), val = tensor([1])]; fp16 var_5156_to_fp16 = const()[name = string("op_5156_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_211_cast_fp16 = layer_norm(axes = out_211_axes_0, epsilon = var_5156_to_fp16, x = x_315_cast_fp16)[name = string("out_211_cast_fp16")]; tensor layer_layers_17_post_attention_layernorm_weight_to_fp16 = const()[name = string("layer_layers_17_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(521308352)))]; tensor out_213_cast_fp16 = mul(x = out_211_cast_fp16, y = layer_layers_17_post_attention_layernorm_weight_to_fp16)[name = string("out_213_cast_fp16")]; tensor var_5162_split_sizes_0 = const()[name = string("op_5162_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_5162_axis_0 = const()[name = string("op_5162_axis_0"), val = int32(1)]; tensor var_5162_cast_fp16_0, tensor var_5162_cast_fp16_1 = split(axis = var_5162_axis_0, split_sizes = var_5162_split_sizes_0, x = out_213_cast_fp16)[name = string("op_5162_cast_fp16")]; string input_35_pad_type_0 = const()[name = string("input_35_pad_type_0"), val = string("valid")]; tensor input_35_strides_0 = const()[name = string("input_35_strides_0"), val = tensor([1, 1])]; tensor input_35_pad_0 = const()[name = string("input_35_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_35_dilations_0 = const()[name = string("input_35_dilations_0"), val = tensor([1, 1])]; int32 input_35_groups_0 = const()[name = string("input_35_groups_0"), val = int32(1)]; tensor var_5167_to_fp16 = const()[name = string("op_5167_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(521312512)))]; tensor input_35_cast_fp16 = conv(dilations = input_35_dilations_0, groups = input_35_groups_0, pad = input_35_pad_0, pad_type = input_35_pad_type_0, strides = input_35_strides_0, weight = var_5167_to_fp16, x = var_5162_cast_fp16_0)[name = string("input_35_cast_fp16")]; tensor var_5178_cast_fp16 = silu(x = input_35_cast_fp16)[name = string("op_5178_cast_fp16")]; string var_5183_pad_type_0 = const()[name = string("op_5183_pad_type_0"), val = string("valid")]; tensor var_5183_strides_0 = const()[name = string("op_5183_strides_0"), val = tensor([1, 1])]; tensor var_5183_pad_0 = const()[name = string("op_5183_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_5183_dilations_0 = const()[name = string("op_5183_dilations_0"), val = tensor([1, 1])]; int32 var_5183_groups_0 = const()[name = string("op_5183_groups_0"), val = int32(1)]; tensor var_5166_to_fp16 = const()[name = string("op_5166_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(529701184)))]; tensor var_5183_cast_fp16 = conv(dilations = var_5183_dilations_0, groups = var_5183_groups_0, pad = var_5183_pad_0, pad_type = var_5183_pad_type_0, strides = var_5183_strides_0, weight = var_5166_to_fp16, x = var_5162_cast_fp16_0)[name = string("op_5183_cast_fp16")]; tensor x_321_cast_fp16 = mul(x = var_5178_cast_fp16, y = var_5183_cast_fp16)[name = string("x_321_cast_fp16")]; string hidden_states_107_pad_type_0 = const()[name = string("hidden_states_107_pad_type_0"), val = string("valid")]; tensor hidden_states_107_strides_0 = const()[name = string("hidden_states_107_strides_0"), val = tensor([1, 1])]; tensor hidden_states_107_pad_0 = const()[name = string("hidden_states_107_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_107_dilations_0 = const()[name = string("hidden_states_107_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_107_groups_0 = const()[name = string("hidden_states_107_groups_0"), val = int32(1)]; tensor var_5165_to_fp16 = const()[name = string("op_5165_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(538089856)))]; tensor hidden_states_107_cast_fp16 = conv(dilations = hidden_states_107_dilations_0, groups = hidden_states_107_groups_0, pad = hidden_states_107_pad_0, pad_type = hidden_states_107_pad_type_0, strides = hidden_states_107_strides_0, weight = var_5165_to_fp16, x = x_321_cast_fp16)[name = string("hidden_states_107_cast_fp16")]; tensor x_323_cast_fp16 = add(x = x_313_cast_fp16, y = hidden_states_107_cast_fp16)[name = string("x_323_cast_fp16")]; int32 var_5196 = const()[name = string("op_5196"), val = int32(1)]; fp16 const_184_promoted_to_fp16 = const()[name = string("const_184_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_5199_cast_fp16 = mul(x = x_323_cast_fp16, y = const_184_promoted_to_fp16)[name = string("op_5199_cast_fp16")]; bool x_325_interleave_0 = const()[name = string("x_325_interleave_0"), val = bool(false)]; tensor x_325_cast_fp16 = concat(axis = var_5196, interleave = x_325_interleave_0, values = (x_323_cast_fp16, var_5199_cast_fp16))[name = string("x_325_cast_fp16")]; tensor out_217_axes_0 = const()[name = string("out_217_axes_0"), val = tensor([1])]; fp16 var_5209_to_fp16 = const()[name = string("op_5209_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_217_cast_fp16 = layer_norm(axes = out_217_axes_0, epsilon = var_5209_to_fp16, x = x_325_cast_fp16)[name = string("out_217_cast_fp16")]; tensor layer_layers_18_input_layernorm_weight_to_fp16 = const()[name = string("layer_layers_18_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(546478528)))]; tensor out_219_cast_fp16 = mul(x = out_217_cast_fp16, y = layer_layers_18_input_layernorm_weight_to_fp16)[name = string("out_219_cast_fp16")]; tensor var_5215_split_sizes_0 = const()[name = string("op_5215_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_5215_axis_0 = const()[name = string("op_5215_axis_0"), val = int32(1)]; tensor var_5215_cast_fp16_0, tensor var_5215_cast_fp16_1 = split(axis = var_5215_axis_0, split_sizes = var_5215_split_sizes_0, x = out_219_cast_fp16)[name = string("op_5215_cast_fp16")]; string query_states_73_pad_type_0 = const()[name = string("query_states_73_pad_type_0"), val = string("valid")]; tensor query_states_73_strides_0 = const()[name = string("query_states_73_strides_0"), val = tensor([1, 1])]; tensor query_states_73_pad_0 = const()[name = string("query_states_73_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_states_73_dilations_0 = const()[name = string("query_states_73_dilations_0"), val = tensor([1, 1])]; int32 query_states_73_groups_0 = const()[name = string("query_states_73_groups_0"), val = int32(1)]; tensor var_5237_to_fp16 = const()[name = string("op_5237_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(546482688)))]; tensor query_states_73_cast_fp16 = conv(dilations = query_states_73_dilations_0, groups = query_states_73_groups_0, pad = query_states_73_pad_0, pad_type = query_states_73_pad_type_0, strides = query_states_73_strides_0, weight = var_5237_to_fp16, x = var_5215_cast_fp16_0)[name = string("query_states_73_cast_fp16")]; string key_states_73_pad_type_0 = const()[name = string("key_states_73_pad_type_0"), val = string("valid")]; tensor key_states_73_strides_0 = const()[name = string("key_states_73_strides_0"), val = tensor([1, 1])]; tensor key_states_73_pad_0 = const()[name = string("key_states_73_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_states_73_dilations_0 = const()[name = string("key_states_73_dilations_0"), val = tensor([1, 1])]; int32 key_states_73_groups_0 = const()[name = string("key_states_73_groups_0"), val = int32(1)]; tensor var_5248_to_fp16 = const()[name = string("op_5248_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(548579904)))]; tensor key_states_73_cast_fp16 = conv(dilations = key_states_73_dilations_0, groups = key_states_73_groups_0, pad = key_states_73_pad_0, pad_type = key_states_73_pad_type_0, strides = key_states_73_strides_0, weight = var_5248_to_fp16, x = var_5215_cast_fp16_0)[name = string("key_states_73_cast_fp16")]; string value_states_73_pad_type_0 = const()[name = string("value_states_73_pad_type_0"), val = string("valid")]; tensor value_states_73_strides_0 = const()[name = string("value_states_73_strides_0"), val = tensor([1, 1])]; tensor value_states_73_pad_0 = const()[name = string("value_states_73_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_states_73_dilations_0 = const()[name = string("value_states_73_dilations_0"), val = tensor([1, 1])]; int32 value_states_73_groups_0 = const()[name = string("value_states_73_groups_0"), val = int32(1)]; tensor var_5259_to_fp16 = const()[name = string("op_5259_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(548842112)))]; tensor value_states_73_cast_fp16 = conv(dilations = value_states_73_dilations_0, groups = value_states_73_groups_0, pad = value_states_73_pad_0, pad_type = value_states_73_pad_type_0, strides = value_states_73_strides_0, weight = var_5259_to_fp16, x = var_5215_cast_fp16_0)[name = string("value_states_73_cast_fp16")]; tensor var_5267 = const()[name = string("op_5267"), val = tensor([1, 16, 64, 1])]; tensor embed_73_cast_fp16 = reshape(shape = var_5267, x = query_states_73_cast_fp16)[name = string("embed_73_cast_fp16")]; tensor var_5271 = const()[name = string("op_5271"), val = tensor([1, 2, 64, 1])]; tensor var_5272_cast_fp16 = reshape(shape = var_5271, x = key_states_73_cast_fp16)[name = string("op_5272_cast_fp16")]; tensor embed_75_perm_0 = const()[name = string("embed_75_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_5278 = const()[name = string("op_5278"), val = tensor([1, 2, 64, 1])]; tensor var_5279_cast_fp16 = reshape(shape = var_5278, x = value_states_73_cast_fp16)[name = string("op_5279_cast_fp16")]; tensor value_states_75_perm_0 = const()[name = string("value_states_75_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_5283_cast_fp16 = mul(x = embed_73_cast_fp16, y = cos_1_cast_fp16)[name = string("op_5283_cast_fp16")]; tensor var_5284_split_sizes_0 = const()[name = string("op_5284_split_sizes_0"), val = tensor([32, 32])]; int32 var_5284_axis_0 = const()[name = string("op_5284_axis_0"), val = int32(-2)]; tensor var_5284_cast_fp16_0, tensor var_5284_cast_fp16_1 = split(axis = var_5284_axis_0, split_sizes = var_5284_split_sizes_0, x = embed_73_cast_fp16)[name = string("op_5284_cast_fp16")]; fp16 const_187_promoted_to_fp16 = const()[name = string("const_187_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_5286_cast_fp16 = mul(x = var_5284_cast_fp16_1, y = const_187_promoted_to_fp16)[name = string("op_5286_cast_fp16")]; int32 var_5288 = const()[name = string("op_5288"), val = int32(-2)]; bool var_5289_interleave_0 = const()[name = string("op_5289_interleave_0"), val = bool(false)]; tensor var_5289_cast_fp16 = concat(axis = var_5288, interleave = var_5289_interleave_0, values = (var_5286_cast_fp16, var_5284_cast_fp16_0))[name = string("op_5289_cast_fp16")]; tensor var_5290_cast_fp16 = mul(x = var_5289_cast_fp16, y = sin_1_cast_fp16)[name = string("op_5290_cast_fp16")]; tensor query_states_75_cast_fp16 = add(x = var_5283_cast_fp16, y = var_5290_cast_fp16)[name = string("query_states_75_cast_fp16")]; tensor embed_75_cast_fp16 = transpose(perm = embed_75_perm_0, x = var_5272_cast_fp16)[name = string("transpose_17")]; tensor var_5293_cast_fp16 = mul(x = embed_75_cast_fp16, y = cos_cast_fp16)[name = string("op_5293_cast_fp16")]; tensor var_5294_split_sizes_0 = const()[name = string("op_5294_split_sizes_0"), val = tensor([32, 32])]; int32 var_5294_axis_0 = const()[name = string("op_5294_axis_0"), val = int32(-1)]; tensor var_5294_cast_fp16_0, tensor var_5294_cast_fp16_1 = split(axis = var_5294_axis_0, split_sizes = var_5294_split_sizes_0, x = embed_75_cast_fp16)[name = string("op_5294_cast_fp16")]; fp16 const_188_promoted_to_fp16 = const()[name = string("const_188_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_5296_cast_fp16 = mul(x = var_5294_cast_fp16_1, y = const_188_promoted_to_fp16)[name = string("op_5296_cast_fp16")]; int32 var_5298 = const()[name = string("op_5298"), val = int32(-1)]; bool var_5299_interleave_0 = const()[name = string("op_5299_interleave_0"), val = bool(false)]; tensor var_5299_cast_fp16 = concat(axis = var_5298, interleave = var_5299_interleave_0, values = (var_5296_cast_fp16, var_5294_cast_fp16_0))[name = string("op_5299_cast_fp16")]; tensor var_5300_cast_fp16 = mul(x = var_5299_cast_fp16, y = sin_cast_fp16)[name = string("op_5300_cast_fp16")]; tensor key_states_75_cast_fp16 = add(x = var_5293_cast_fp16, y = var_5300_cast_fp16)[name = string("key_states_75_cast_fp16")]; tensor expand_dims_181 = const()[name = string("expand_dims_181"), val = tensor([18])]; tensor expand_dims_182 = const()[name = string("expand_dims_182"), val = tensor([0])]; tensor expand_dims_184 = const()[name = string("expand_dims_184"), val = tensor([19])]; tensor concat_147_values3_0 = const()[name = string("concat_147_values3_0"), val = tensor([0])]; int32 concat_147_axis_0 = const()[name = string("concat_147_axis_0"), val = int32(0)]; bool concat_147_interleave_0 = const()[name = string("concat_147_interleave_0"), val = bool(false)]; tensor concat_147 = concat(axis = concat_147_axis_0, interleave = concat_147_interleave_0, values = (expand_dims_181, expand_dims_182, position_id, concat_147_values3_0))[name = string("concat_147")]; tensor concat_148_values1_0 = const()[name = string("concat_148_values1_0"), val = tensor([0])]; tensor concat_148_values3_0 = const()[name = string("concat_148_values3_0"), val = tensor([0])]; int32 concat_148_axis_0 = const()[name = string("concat_148_axis_0"), val = int32(0)]; bool concat_148_interleave_0 = const()[name = string("concat_148_interleave_0"), val = bool(false)]; tensor concat_148 = concat(axis = concat_148_axis_0, interleave = concat_148_interleave_0, values = (expand_dims_184, concat_148_values1_0, var_426, concat_148_values3_0))[name = string("concat_148")]; tensor key_cache_internal_tensor_assign_19_stride_0 = const()[name = string("key_cache_internal_tensor_assign_19_stride_0"), val = tensor([1, 1, 1, 1])]; tensor key_cache_internal_tensor_assign_19_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_19_begin_mask_0"), val = tensor([false, false, false, true])]; tensor key_cache_internal_tensor_assign_19_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_19_end_mask_0"), val = tensor([false, true, false, true])]; tensor key_cache_internal_tensor_assign_19_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_19_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor key_cache_internal_tensor_assign_19_cast_fp16 = slice_update(begin = concat_147, begin_mask = key_cache_internal_tensor_assign_19_begin_mask_0, end = concat_148, end_mask = key_cache_internal_tensor_assign_19_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_19_squeeze_mask_0, stride = key_cache_internal_tensor_assign_19_stride_0, update = key_states_75_cast_fp16, x = coreml_update_state_82)[name = string("key_cache_internal_tensor_assign_19_cast_fp16")]; write_state(data = key_cache_internal_tensor_assign_19_cast_fp16, input = key_cache)[name = string("coreml_update_state_202_write_state")]; tensor coreml_update_state_84 = read_state(input = key_cache)[name = string("coreml_update_state_202")]; tensor value_cache_internal_tensor_assign_19_stride_0 = const()[name = string("value_cache_internal_tensor_assign_19_stride_0"), val = tensor([1, 1, 1, 1])]; tensor value_cache_internal_tensor_assign_19_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_19_begin_mask_0"), val = tensor([false, false, false, true])]; tensor value_cache_internal_tensor_assign_19_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_19_end_mask_0"), val = tensor([false, true, false, true])]; tensor value_cache_internal_tensor_assign_19_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_19_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor value_states_75_cast_fp16 = transpose(perm = value_states_75_perm_0, x = var_5279_cast_fp16)[name = string("transpose_16")]; tensor value_cache_internal_tensor_assign_19_cast_fp16 = slice_update(begin = concat_147, begin_mask = value_cache_internal_tensor_assign_19_begin_mask_0, end = concat_148, end_mask = value_cache_internal_tensor_assign_19_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_19_squeeze_mask_0, stride = value_cache_internal_tensor_assign_19_stride_0, update = value_states_75_cast_fp16, x = coreml_update_state_83)[name = string("value_cache_internal_tensor_assign_19_cast_fp16")]; write_state(data = value_cache_internal_tensor_assign_19_cast_fp16, input = value_cache)[name = string("coreml_update_state_203_write_state")]; tensor coreml_update_state_85 = read_state(input = value_cache)[name = string("coreml_update_state_203")]; tensor var_5343_begin_0 = const()[name = string("op_5343_begin_0"), val = tensor([18, 0, 0, 0])]; tensor var_5343_end_0 = const()[name = string("op_5343_end_0"), val = tensor([19, 2, 2048, 64])]; tensor var_5343_end_mask_0 = const()[name = string("op_5343_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_5343_cast_fp16 = slice_by_index(begin = var_5343_begin_0, end = var_5343_end_0, end_mask = var_5343_end_mask_0, x = coreml_update_state_84)[name = string("op_5343_cast_fp16")]; tensor tile_36 = const()[name = string("tile_36"), val = tensor([1, 1])]; int32 var_5346_axis_0 = const()[name = string("op_5346_axis_0"), val = int32(1)]; tensor var_5346_cast_fp16_0, tensor var_5346_cast_fp16_1 = split(axis = var_5346_axis_0, split_sizes = tile_36, x = var_5343_cast_fp16)[name = string("op_5346_cast_fp16")]; tensor var_5353_begin_0 = const()[name = string("op_5353_begin_0"), val = tensor([18, 0, 0, 0])]; tensor var_5353_end_0 = const()[name = string("op_5353_end_0"), val = tensor([19, 2, 2048, 64])]; tensor var_5353_end_mask_0 = const()[name = string("op_5353_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_5353_cast_fp16 = slice_by_index(begin = var_5353_begin_0, end = var_5353_end_0, end_mask = var_5353_end_mask_0, x = coreml_update_state_85)[name = string("op_5353_cast_fp16")]; tensor tile_37 = const()[name = string("tile_37"), val = tensor([1, 1])]; int32 var_5356_axis_0 = const()[name = string("op_5356_axis_0"), val = int32(1)]; tensor var_5356_cast_fp16_0, tensor var_5356_cast_fp16_1 = split(axis = var_5356_axis_0, split_sizes = tile_37, x = var_5353_cast_fp16)[name = string("op_5356_cast_fp16")]; tensor var_5359_split_sizes_0 = const()[name = string("op_5359_split_sizes_0"), val = tensor([8, 8])]; int32 var_5359_axis_0 = const()[name = string("op_5359_axis_0"), val = int32(1)]; tensor var_5359_cast_fp16_0, tensor var_5359_cast_fp16_1 = split(axis = var_5359_axis_0, split_sizes = var_5359_split_sizes_0, x = query_states_75_cast_fp16)[name = string("op_5359_cast_fp16")]; bool attn_weights_289_transpose_x_0 = const()[name = string("attn_weights_289_transpose_x_0"), val = bool(false)]; bool attn_weights_289_transpose_y_0 = const()[name = string("attn_weights_289_transpose_y_0"), val = bool(false)]; tensor attn_weights_289_cast_fp16 = matmul(transpose_x = attn_weights_289_transpose_x_0, transpose_y = attn_weights_289_transpose_y_0, x = var_5346_cast_fp16_0, y = var_5359_cast_fp16_0)[name = string("attn_weights_289_cast_fp16")]; fp16 _inversed_attn_weights_291_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_291_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_291_cast_fp16 = mul(x = attn_weights_289_cast_fp16, y = _inversed_attn_weights_291_y_0_to_fp16)[name = string("_inversed_attn_weights_291_cast_fp16")]; tensor attn_weights_293_cast_fp16 = add(x = _inversed_attn_weights_291_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_293_cast_fp16")]; int32 var_5366 = const()[name = string("op_5366"), val = int32(2)]; tensor attn_weights_295_cast_fp16 = softmax(axis = var_5366, x = attn_weights_293_cast_fp16)[name = string("attn_weights_295_cast_fp16")]; bool var_5372_transpose_x_1 = const()[name = string("op_5372_transpose_x_1"), val = bool(true)]; bool var_5372_transpose_y_1 = const()[name = string("op_5372_transpose_y_1"), val = bool(false)]; tensor var_5372_cast_fp16 = matmul(transpose_x = var_5372_transpose_x_1, transpose_y = var_5372_transpose_y_1, x = attn_weights_295_cast_fp16, y = var_5356_cast_fp16_0)[name = string("op_5372_cast_fp16")]; bool attn_weights_297_transpose_x_0 = const()[name = string("attn_weights_297_transpose_x_0"), val = bool(false)]; bool attn_weights_297_transpose_y_0 = const()[name = string("attn_weights_297_transpose_y_0"), val = bool(false)]; tensor attn_weights_297_cast_fp16 = matmul(transpose_x = attn_weights_297_transpose_x_0, transpose_y = attn_weights_297_transpose_y_0, x = var_5346_cast_fp16_1, y = var_5359_cast_fp16_1)[name = string("attn_weights_297_cast_fp16")]; fp16 _inversed_attn_weights_299_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_299_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_299_cast_fp16 = mul(x = attn_weights_297_cast_fp16, y = _inversed_attn_weights_299_y_0_to_fp16)[name = string("_inversed_attn_weights_299_cast_fp16")]; tensor attn_weights_301_cast_fp16 = add(x = _inversed_attn_weights_299_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_301_cast_fp16")]; int32 var_5378 = const()[name = string("op_5378"), val = int32(2)]; tensor attn_weights_303_cast_fp16 = softmax(axis = var_5378, x = attn_weights_301_cast_fp16)[name = string("attn_weights_303_cast_fp16")]; bool attn_output_109_transpose_x_1 = const()[name = string("attn_output_109_transpose_x_1"), val = bool(true)]; bool attn_output_109_transpose_y_1 = const()[name = string("attn_output_109_transpose_y_1"), val = bool(false)]; tensor attn_output_109_cast_fp16 = matmul(transpose_x = attn_output_109_transpose_x_1, transpose_y = attn_output_109_transpose_y_1, x = attn_weights_303_cast_fp16, y = var_5356_cast_fp16_1)[name = string("attn_output_109_cast_fp16")]; int32 var_5386 = const()[name = string("op_5386"), val = int32(1)]; bool attn_output_111_interleave_0 = const()[name = string("attn_output_111_interleave_0"), val = bool(false)]; tensor attn_output_111_cast_fp16 = concat(axis = var_5386, interleave = attn_output_111_interleave_0, values = (var_5372_cast_fp16, attn_output_109_cast_fp16))[name = string("attn_output_111_cast_fp16")]; tensor var_5390_perm_0 = const()[name = string("op_5390_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_5395 = const()[name = string("op_5395"), val = tensor([1, 1024, 1, 1])]; tensor var_5390_cast_fp16 = transpose(perm = var_5390_perm_0, x = attn_output_111_cast_fp16)[name = string("transpose_15")]; tensor x_329_cast_fp16 = reshape(shape = var_5395, x = var_5390_cast_fp16)[name = string("x_329_cast_fp16")]; string hidden_states_111_pad_type_0 = const()[name = string("hidden_states_111_pad_type_0"), val = string("valid")]; tensor hidden_states_111_strides_0 = const()[name = string("hidden_states_111_strides_0"), val = tensor([1, 1])]; tensor hidden_states_111_pad_0 = const()[name = string("hidden_states_111_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_111_dilations_0 = const()[name = string("hidden_states_111_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_111_groups_0 = const()[name = string("hidden_states_111_groups_0"), val = int32(1)]; tensor var_5402_to_fp16 = const()[name = string("op_5402_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(549104320)))]; tensor hidden_states_111_cast_fp16 = conv(dilations = hidden_states_111_dilations_0, groups = hidden_states_111_groups_0, pad = hidden_states_111_pad_0, pad_type = hidden_states_111_pad_type_0, strides = hidden_states_111_strides_0, weight = var_5402_to_fp16, x = x_329_cast_fp16)[name = string("hidden_states_111_cast_fp16")]; tensor x_331_cast_fp16 = add(x = x_323_cast_fp16, y = hidden_states_111_cast_fp16)[name = string("x_331_cast_fp16")]; int32 var_5414 = const()[name = string("op_5414"), val = int32(1)]; fp16 const_193_promoted_to_fp16 = const()[name = string("const_193_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_5417_cast_fp16 = mul(x = x_331_cast_fp16, y = const_193_promoted_to_fp16)[name = string("op_5417_cast_fp16")]; bool x_333_interleave_0 = const()[name = string("x_333_interleave_0"), val = bool(false)]; tensor x_333_cast_fp16 = concat(axis = var_5414, interleave = x_333_interleave_0, values = (x_331_cast_fp16, var_5417_cast_fp16))[name = string("x_333_cast_fp16")]; tensor out_223_axes_0 = const()[name = string("out_223_axes_0"), val = tensor([1])]; fp16 var_5427_to_fp16 = const()[name = string("op_5427_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_223_cast_fp16 = layer_norm(axes = out_223_axes_0, epsilon = var_5427_to_fp16, x = x_333_cast_fp16)[name = string("out_223_cast_fp16")]; tensor layer_layers_18_post_attention_layernorm_weight_to_fp16 = const()[name = string("layer_layers_18_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(551201536)))]; tensor out_225_cast_fp16 = mul(x = out_223_cast_fp16, y = layer_layers_18_post_attention_layernorm_weight_to_fp16)[name = string("out_225_cast_fp16")]; tensor var_5433_split_sizes_0 = const()[name = string("op_5433_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_5433_axis_0 = const()[name = string("op_5433_axis_0"), val = int32(1)]; tensor var_5433_cast_fp16_0, tensor var_5433_cast_fp16_1 = split(axis = var_5433_axis_0, split_sizes = var_5433_split_sizes_0, x = out_225_cast_fp16)[name = string("op_5433_cast_fp16")]; string input_37_pad_type_0 = const()[name = string("input_37_pad_type_0"), val = string("valid")]; tensor input_37_strides_0 = const()[name = string("input_37_strides_0"), val = tensor([1, 1])]; tensor input_37_pad_0 = const()[name = string("input_37_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_37_dilations_0 = const()[name = string("input_37_dilations_0"), val = tensor([1, 1])]; int32 input_37_groups_0 = const()[name = string("input_37_groups_0"), val = int32(1)]; tensor var_5438_to_fp16 = const()[name = string("op_5438_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(551205696)))]; tensor input_37_cast_fp16 = conv(dilations = input_37_dilations_0, groups = input_37_groups_0, pad = input_37_pad_0, pad_type = input_37_pad_type_0, strides = input_37_strides_0, weight = var_5438_to_fp16, x = var_5433_cast_fp16_0)[name = string("input_37_cast_fp16")]; tensor var_5449_cast_fp16 = silu(x = input_37_cast_fp16)[name = string("op_5449_cast_fp16")]; string var_5454_pad_type_0 = const()[name = string("op_5454_pad_type_0"), val = string("valid")]; tensor var_5454_strides_0 = const()[name = string("op_5454_strides_0"), val = tensor([1, 1])]; tensor var_5454_pad_0 = const()[name = string("op_5454_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_5454_dilations_0 = const()[name = string("op_5454_dilations_0"), val = tensor([1, 1])]; int32 var_5454_groups_0 = const()[name = string("op_5454_groups_0"), val = int32(1)]; tensor var_5437_to_fp16 = const()[name = string("op_5437_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(559594368)))]; tensor var_5454_cast_fp16 = conv(dilations = var_5454_dilations_0, groups = var_5454_groups_0, pad = var_5454_pad_0, pad_type = var_5454_pad_type_0, strides = var_5454_strides_0, weight = var_5437_to_fp16, x = var_5433_cast_fp16_0)[name = string("op_5454_cast_fp16")]; tensor x_339_cast_fp16 = mul(x = var_5449_cast_fp16, y = var_5454_cast_fp16)[name = string("x_339_cast_fp16")]; string hidden_states_113_pad_type_0 = const()[name = string("hidden_states_113_pad_type_0"), val = string("valid")]; tensor hidden_states_113_strides_0 = const()[name = string("hidden_states_113_strides_0"), val = tensor([1, 1])]; tensor hidden_states_113_pad_0 = const()[name = string("hidden_states_113_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_113_dilations_0 = const()[name = string("hidden_states_113_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_113_groups_0 = const()[name = string("hidden_states_113_groups_0"), val = int32(1)]; tensor var_5436_to_fp16 = const()[name = string("op_5436_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(567983040)))]; tensor hidden_states_113_cast_fp16 = conv(dilations = hidden_states_113_dilations_0, groups = hidden_states_113_groups_0, pad = hidden_states_113_pad_0, pad_type = hidden_states_113_pad_type_0, strides = hidden_states_113_strides_0, weight = var_5436_to_fp16, x = x_339_cast_fp16)[name = string("hidden_states_113_cast_fp16")]; tensor x_341_cast_fp16 = add(x = x_331_cast_fp16, y = hidden_states_113_cast_fp16)[name = string("x_341_cast_fp16")]; int32 var_5467 = const()[name = string("op_5467"), val = int32(1)]; fp16 const_194_promoted_to_fp16 = const()[name = string("const_194_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_5470_cast_fp16 = mul(x = x_341_cast_fp16, y = const_194_promoted_to_fp16)[name = string("op_5470_cast_fp16")]; bool x_343_interleave_0 = const()[name = string("x_343_interleave_0"), val = bool(false)]; tensor x_343_cast_fp16 = concat(axis = var_5467, interleave = x_343_interleave_0, values = (x_341_cast_fp16, var_5470_cast_fp16))[name = string("x_343_cast_fp16")]; tensor out_229_axes_0 = const()[name = string("out_229_axes_0"), val = tensor([1])]; fp16 var_5480_to_fp16 = const()[name = string("op_5480_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_229_cast_fp16 = layer_norm(axes = out_229_axes_0, epsilon = var_5480_to_fp16, x = x_343_cast_fp16)[name = string("out_229_cast_fp16")]; tensor layer_layers_19_input_layernorm_weight_to_fp16 = const()[name = string("layer_layers_19_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(576371712)))]; tensor out_231_cast_fp16 = mul(x = out_229_cast_fp16, y = layer_layers_19_input_layernorm_weight_to_fp16)[name = string("out_231_cast_fp16")]; tensor var_5486_split_sizes_0 = const()[name = string("op_5486_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_5486_axis_0 = const()[name = string("op_5486_axis_0"), val = int32(1)]; tensor var_5486_cast_fp16_0, tensor var_5486_cast_fp16_1 = split(axis = var_5486_axis_0, split_sizes = var_5486_split_sizes_0, x = out_231_cast_fp16)[name = string("op_5486_cast_fp16")]; string query_states_77_pad_type_0 = const()[name = string("query_states_77_pad_type_0"), val = string("valid")]; tensor query_states_77_strides_0 = const()[name = string("query_states_77_strides_0"), val = tensor([1, 1])]; tensor query_states_77_pad_0 = const()[name = string("query_states_77_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_states_77_dilations_0 = const()[name = string("query_states_77_dilations_0"), val = tensor([1, 1])]; int32 query_states_77_groups_0 = const()[name = string("query_states_77_groups_0"), val = int32(1)]; tensor var_5508_to_fp16 = const()[name = string("op_5508_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(576375872)))]; tensor query_states_77_cast_fp16 = conv(dilations = query_states_77_dilations_0, groups = query_states_77_groups_0, pad = query_states_77_pad_0, pad_type = query_states_77_pad_type_0, strides = query_states_77_strides_0, weight = var_5508_to_fp16, x = var_5486_cast_fp16_0)[name = string("query_states_77_cast_fp16")]; string key_states_77_pad_type_0 = const()[name = string("key_states_77_pad_type_0"), val = string("valid")]; tensor key_states_77_strides_0 = const()[name = string("key_states_77_strides_0"), val = tensor([1, 1])]; tensor key_states_77_pad_0 = const()[name = string("key_states_77_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_states_77_dilations_0 = const()[name = string("key_states_77_dilations_0"), val = tensor([1, 1])]; int32 key_states_77_groups_0 = const()[name = string("key_states_77_groups_0"), val = int32(1)]; tensor var_5519_to_fp16 = const()[name = string("op_5519_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(578473088)))]; tensor key_states_77_cast_fp16 = conv(dilations = key_states_77_dilations_0, groups = key_states_77_groups_0, pad = key_states_77_pad_0, pad_type = key_states_77_pad_type_0, strides = key_states_77_strides_0, weight = var_5519_to_fp16, x = var_5486_cast_fp16_0)[name = string("key_states_77_cast_fp16")]; string value_states_77_pad_type_0 = const()[name = string("value_states_77_pad_type_0"), val = string("valid")]; tensor value_states_77_strides_0 = const()[name = string("value_states_77_strides_0"), val = tensor([1, 1])]; tensor value_states_77_pad_0 = const()[name = string("value_states_77_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_states_77_dilations_0 = const()[name = string("value_states_77_dilations_0"), val = tensor([1, 1])]; int32 value_states_77_groups_0 = const()[name = string("value_states_77_groups_0"), val = int32(1)]; tensor var_5530_to_fp16 = const()[name = string("op_5530_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(578735296)))]; tensor value_states_77_cast_fp16 = conv(dilations = value_states_77_dilations_0, groups = value_states_77_groups_0, pad = value_states_77_pad_0, pad_type = value_states_77_pad_type_0, strides = value_states_77_strides_0, weight = var_5530_to_fp16, x = var_5486_cast_fp16_0)[name = string("value_states_77_cast_fp16")]; tensor var_5538 = const()[name = string("op_5538"), val = tensor([1, 16, 64, 1])]; tensor embed_77_cast_fp16 = reshape(shape = var_5538, x = query_states_77_cast_fp16)[name = string("embed_77_cast_fp16")]; tensor var_5542 = const()[name = string("op_5542"), val = tensor([1, 2, 64, 1])]; tensor var_5543_cast_fp16 = reshape(shape = var_5542, x = key_states_77_cast_fp16)[name = string("op_5543_cast_fp16")]; tensor embed_79_perm_0 = const()[name = string("embed_79_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_5549 = const()[name = string("op_5549"), val = tensor([1, 2, 64, 1])]; tensor var_5550_cast_fp16 = reshape(shape = var_5549, x = value_states_77_cast_fp16)[name = string("op_5550_cast_fp16")]; tensor value_states_79_perm_0 = const()[name = string("value_states_79_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_5554_cast_fp16 = mul(x = embed_77_cast_fp16, y = cos_1_cast_fp16)[name = string("op_5554_cast_fp16")]; tensor var_5555_split_sizes_0 = const()[name = string("op_5555_split_sizes_0"), val = tensor([32, 32])]; int32 var_5555_axis_0 = const()[name = string("op_5555_axis_0"), val = int32(-2)]; tensor var_5555_cast_fp16_0, tensor var_5555_cast_fp16_1 = split(axis = var_5555_axis_0, split_sizes = var_5555_split_sizes_0, x = embed_77_cast_fp16)[name = string("op_5555_cast_fp16")]; fp16 const_197_promoted_to_fp16 = const()[name = string("const_197_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_5557_cast_fp16 = mul(x = var_5555_cast_fp16_1, y = const_197_promoted_to_fp16)[name = string("op_5557_cast_fp16")]; int32 var_5559 = const()[name = string("op_5559"), val = int32(-2)]; bool var_5560_interleave_0 = const()[name = string("op_5560_interleave_0"), val = bool(false)]; tensor var_5560_cast_fp16 = concat(axis = var_5559, interleave = var_5560_interleave_0, values = (var_5557_cast_fp16, var_5555_cast_fp16_0))[name = string("op_5560_cast_fp16")]; tensor var_5561_cast_fp16 = mul(x = var_5560_cast_fp16, y = sin_1_cast_fp16)[name = string("op_5561_cast_fp16")]; tensor query_states_79_cast_fp16 = add(x = var_5554_cast_fp16, y = var_5561_cast_fp16)[name = string("query_states_79_cast_fp16")]; tensor embed_79_cast_fp16 = transpose(perm = embed_79_perm_0, x = var_5543_cast_fp16)[name = string("transpose_14")]; tensor var_5564_cast_fp16 = mul(x = embed_79_cast_fp16, y = cos_cast_fp16)[name = string("op_5564_cast_fp16")]; tensor var_5565_split_sizes_0 = const()[name = string("op_5565_split_sizes_0"), val = tensor([32, 32])]; int32 var_5565_axis_0 = const()[name = string("op_5565_axis_0"), val = int32(-1)]; tensor var_5565_cast_fp16_0, tensor var_5565_cast_fp16_1 = split(axis = var_5565_axis_0, split_sizes = var_5565_split_sizes_0, x = embed_79_cast_fp16)[name = string("op_5565_cast_fp16")]; fp16 const_198_promoted_to_fp16 = const()[name = string("const_198_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_5567_cast_fp16 = mul(x = var_5565_cast_fp16_1, y = const_198_promoted_to_fp16)[name = string("op_5567_cast_fp16")]; int32 var_5569 = const()[name = string("op_5569"), val = int32(-1)]; bool var_5570_interleave_0 = const()[name = string("op_5570_interleave_0"), val = bool(false)]; tensor var_5570_cast_fp16 = concat(axis = var_5569, interleave = var_5570_interleave_0, values = (var_5567_cast_fp16, var_5565_cast_fp16_0))[name = string("op_5570_cast_fp16")]; tensor var_5571_cast_fp16 = mul(x = var_5570_cast_fp16, y = sin_cast_fp16)[name = string("op_5571_cast_fp16")]; tensor key_states_79_cast_fp16 = add(x = var_5564_cast_fp16, y = var_5571_cast_fp16)[name = string("key_states_79_cast_fp16")]; tensor expand_dims_191 = const()[name = string("expand_dims_191"), val = tensor([19])]; tensor expand_dims_192 = const()[name = string("expand_dims_192"), val = tensor([0])]; tensor expand_dims_194 = const()[name = string("expand_dims_194"), val = tensor([20])]; tensor concat_155_values3_0 = const()[name = string("concat_155_values3_0"), val = tensor([0])]; int32 concat_155_axis_0 = const()[name = string("concat_155_axis_0"), val = int32(0)]; bool concat_155_interleave_0 = const()[name = string("concat_155_interleave_0"), val = bool(false)]; tensor concat_155 = concat(axis = concat_155_axis_0, interleave = concat_155_interleave_0, values = (expand_dims_191, expand_dims_192, position_id, concat_155_values3_0))[name = string("concat_155")]; tensor concat_156_values1_0 = const()[name = string("concat_156_values1_0"), val = tensor([0])]; tensor concat_156_values3_0 = const()[name = string("concat_156_values3_0"), val = tensor([0])]; int32 concat_156_axis_0 = const()[name = string("concat_156_axis_0"), val = int32(0)]; bool concat_156_interleave_0 = const()[name = string("concat_156_interleave_0"), val = bool(false)]; tensor concat_156 = concat(axis = concat_156_axis_0, interleave = concat_156_interleave_0, values = (expand_dims_194, concat_156_values1_0, var_426, concat_156_values3_0))[name = string("concat_156")]; tensor key_cache_internal_tensor_assign_20_stride_0 = const()[name = string("key_cache_internal_tensor_assign_20_stride_0"), val = tensor([1, 1, 1, 1])]; tensor key_cache_internal_tensor_assign_20_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_20_begin_mask_0"), val = tensor([false, false, false, true])]; tensor key_cache_internal_tensor_assign_20_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_20_end_mask_0"), val = tensor([false, true, false, true])]; tensor key_cache_internal_tensor_assign_20_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_20_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor key_cache_internal_tensor_assign_20_cast_fp16 = slice_update(begin = concat_155, begin_mask = key_cache_internal_tensor_assign_20_begin_mask_0, end = concat_156, end_mask = key_cache_internal_tensor_assign_20_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_20_squeeze_mask_0, stride = key_cache_internal_tensor_assign_20_stride_0, update = key_states_79_cast_fp16, x = coreml_update_state_84)[name = string("key_cache_internal_tensor_assign_20_cast_fp16")]; write_state(data = key_cache_internal_tensor_assign_20_cast_fp16, input = key_cache)[name = string("coreml_update_state_204_write_state")]; tensor coreml_update_state_86 = read_state(input = key_cache)[name = string("coreml_update_state_204")]; tensor value_cache_internal_tensor_assign_20_stride_0 = const()[name = string("value_cache_internal_tensor_assign_20_stride_0"), val = tensor([1, 1, 1, 1])]; tensor value_cache_internal_tensor_assign_20_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_20_begin_mask_0"), val = tensor([false, false, false, true])]; tensor value_cache_internal_tensor_assign_20_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_20_end_mask_0"), val = tensor([false, true, false, true])]; tensor value_cache_internal_tensor_assign_20_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_20_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor value_states_79_cast_fp16 = transpose(perm = value_states_79_perm_0, x = var_5550_cast_fp16)[name = string("transpose_13")]; tensor value_cache_internal_tensor_assign_20_cast_fp16 = slice_update(begin = concat_155, begin_mask = value_cache_internal_tensor_assign_20_begin_mask_0, end = concat_156, end_mask = value_cache_internal_tensor_assign_20_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_20_squeeze_mask_0, stride = value_cache_internal_tensor_assign_20_stride_0, update = value_states_79_cast_fp16, x = coreml_update_state_85)[name = string("value_cache_internal_tensor_assign_20_cast_fp16")]; write_state(data = value_cache_internal_tensor_assign_20_cast_fp16, input = value_cache)[name = string("coreml_update_state_205_write_state")]; tensor coreml_update_state_87 = read_state(input = value_cache)[name = string("coreml_update_state_205")]; tensor var_5614_begin_0 = const()[name = string("op_5614_begin_0"), val = tensor([19, 0, 0, 0])]; tensor var_5614_end_0 = const()[name = string("op_5614_end_0"), val = tensor([20, 2, 2048, 64])]; tensor var_5614_end_mask_0 = const()[name = string("op_5614_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_5614_cast_fp16 = slice_by_index(begin = var_5614_begin_0, end = var_5614_end_0, end_mask = var_5614_end_mask_0, x = coreml_update_state_86)[name = string("op_5614_cast_fp16")]; tensor tile_38 = const()[name = string("tile_38"), val = tensor([1, 1])]; int32 var_5617_axis_0 = const()[name = string("op_5617_axis_0"), val = int32(1)]; tensor var_5617_cast_fp16_0, tensor var_5617_cast_fp16_1 = split(axis = var_5617_axis_0, split_sizes = tile_38, x = var_5614_cast_fp16)[name = string("op_5617_cast_fp16")]; tensor var_5624_begin_0 = const()[name = string("op_5624_begin_0"), val = tensor([19, 0, 0, 0])]; tensor var_5624_end_0 = const()[name = string("op_5624_end_0"), val = tensor([20, 2, 2048, 64])]; tensor var_5624_end_mask_0 = const()[name = string("op_5624_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_5624_cast_fp16 = slice_by_index(begin = var_5624_begin_0, end = var_5624_end_0, end_mask = var_5624_end_mask_0, x = coreml_update_state_87)[name = string("op_5624_cast_fp16")]; tensor tile_39 = const()[name = string("tile_39"), val = tensor([1, 1])]; int32 var_5627_axis_0 = const()[name = string("op_5627_axis_0"), val = int32(1)]; tensor var_5627_cast_fp16_0, tensor var_5627_cast_fp16_1 = split(axis = var_5627_axis_0, split_sizes = tile_39, x = var_5624_cast_fp16)[name = string("op_5627_cast_fp16")]; tensor var_5630_split_sizes_0 = const()[name = string("op_5630_split_sizes_0"), val = tensor([8, 8])]; int32 var_5630_axis_0 = const()[name = string("op_5630_axis_0"), val = int32(1)]; tensor var_5630_cast_fp16_0, tensor var_5630_cast_fp16_1 = split(axis = var_5630_axis_0, split_sizes = var_5630_split_sizes_0, x = query_states_79_cast_fp16)[name = string("op_5630_cast_fp16")]; bool attn_weights_305_transpose_x_0 = const()[name = string("attn_weights_305_transpose_x_0"), val = bool(false)]; bool attn_weights_305_transpose_y_0 = const()[name = string("attn_weights_305_transpose_y_0"), val = bool(false)]; tensor attn_weights_305_cast_fp16 = matmul(transpose_x = attn_weights_305_transpose_x_0, transpose_y = attn_weights_305_transpose_y_0, x = var_5617_cast_fp16_0, y = var_5630_cast_fp16_0)[name = string("attn_weights_305_cast_fp16")]; fp16 _inversed_attn_weights_307_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_307_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_307_cast_fp16 = mul(x = attn_weights_305_cast_fp16, y = _inversed_attn_weights_307_y_0_to_fp16)[name = string("_inversed_attn_weights_307_cast_fp16")]; tensor attn_weights_309_cast_fp16 = add(x = _inversed_attn_weights_307_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_309_cast_fp16")]; int32 var_5637 = const()[name = string("op_5637"), val = int32(2)]; tensor attn_weights_311_cast_fp16 = softmax(axis = var_5637, x = attn_weights_309_cast_fp16)[name = string("attn_weights_311_cast_fp16")]; bool var_5643_transpose_x_1 = const()[name = string("op_5643_transpose_x_1"), val = bool(true)]; bool var_5643_transpose_y_1 = const()[name = string("op_5643_transpose_y_1"), val = bool(false)]; tensor var_5643_cast_fp16 = matmul(transpose_x = var_5643_transpose_x_1, transpose_y = var_5643_transpose_y_1, x = attn_weights_311_cast_fp16, y = var_5627_cast_fp16_0)[name = string("op_5643_cast_fp16")]; bool attn_weights_313_transpose_x_0 = const()[name = string("attn_weights_313_transpose_x_0"), val = bool(false)]; bool attn_weights_313_transpose_y_0 = const()[name = string("attn_weights_313_transpose_y_0"), val = bool(false)]; tensor attn_weights_313_cast_fp16 = matmul(transpose_x = attn_weights_313_transpose_x_0, transpose_y = attn_weights_313_transpose_y_0, x = var_5617_cast_fp16_1, y = var_5630_cast_fp16_1)[name = string("attn_weights_313_cast_fp16")]; fp16 _inversed_attn_weights_315_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_315_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_315_cast_fp16 = mul(x = attn_weights_313_cast_fp16, y = _inversed_attn_weights_315_y_0_to_fp16)[name = string("_inversed_attn_weights_315_cast_fp16")]; tensor attn_weights_317_cast_fp16 = add(x = _inversed_attn_weights_315_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_317_cast_fp16")]; int32 var_5649 = const()[name = string("op_5649"), val = int32(2)]; tensor attn_weights_319_cast_fp16 = softmax(axis = var_5649, x = attn_weights_317_cast_fp16)[name = string("attn_weights_319_cast_fp16")]; bool attn_output_115_transpose_x_1 = const()[name = string("attn_output_115_transpose_x_1"), val = bool(true)]; bool attn_output_115_transpose_y_1 = const()[name = string("attn_output_115_transpose_y_1"), val = bool(false)]; tensor attn_output_115_cast_fp16 = matmul(transpose_x = attn_output_115_transpose_x_1, transpose_y = attn_output_115_transpose_y_1, x = attn_weights_319_cast_fp16, y = var_5627_cast_fp16_1)[name = string("attn_output_115_cast_fp16")]; int32 var_5657 = const()[name = string("op_5657"), val = int32(1)]; bool attn_output_117_interleave_0 = const()[name = string("attn_output_117_interleave_0"), val = bool(false)]; tensor attn_output_117_cast_fp16 = concat(axis = var_5657, interleave = attn_output_117_interleave_0, values = (var_5643_cast_fp16, attn_output_115_cast_fp16))[name = string("attn_output_117_cast_fp16")]; tensor var_5661_perm_0 = const()[name = string("op_5661_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_5666 = const()[name = string("op_5666"), val = tensor([1, 1024, 1, 1])]; tensor var_5661_cast_fp16 = transpose(perm = var_5661_perm_0, x = attn_output_117_cast_fp16)[name = string("transpose_12")]; tensor x_347_cast_fp16 = reshape(shape = var_5666, x = var_5661_cast_fp16)[name = string("x_347_cast_fp16")]; string hidden_states_117_pad_type_0 = const()[name = string("hidden_states_117_pad_type_0"), val = string("valid")]; tensor hidden_states_117_strides_0 = const()[name = string("hidden_states_117_strides_0"), val = tensor([1, 1])]; tensor hidden_states_117_pad_0 = const()[name = string("hidden_states_117_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_117_dilations_0 = const()[name = string("hidden_states_117_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_117_groups_0 = const()[name = string("hidden_states_117_groups_0"), val = int32(1)]; tensor var_5673_to_fp16 = const()[name = string("op_5673_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(578997504)))]; tensor hidden_states_117_cast_fp16 = conv(dilations = hidden_states_117_dilations_0, groups = hidden_states_117_groups_0, pad = hidden_states_117_pad_0, pad_type = hidden_states_117_pad_type_0, strides = hidden_states_117_strides_0, weight = var_5673_to_fp16, x = x_347_cast_fp16)[name = string("hidden_states_117_cast_fp16")]; tensor x_349_cast_fp16 = add(x = x_341_cast_fp16, y = hidden_states_117_cast_fp16)[name = string("x_349_cast_fp16")]; int32 var_5685 = const()[name = string("op_5685"), val = int32(1)]; fp16 const_203_promoted_to_fp16 = const()[name = string("const_203_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_5688_cast_fp16 = mul(x = x_349_cast_fp16, y = const_203_promoted_to_fp16)[name = string("op_5688_cast_fp16")]; bool x_351_interleave_0 = const()[name = string("x_351_interleave_0"), val = bool(false)]; tensor x_351_cast_fp16 = concat(axis = var_5685, interleave = x_351_interleave_0, values = (x_349_cast_fp16, var_5688_cast_fp16))[name = string("x_351_cast_fp16")]; tensor out_235_axes_0 = const()[name = string("out_235_axes_0"), val = tensor([1])]; fp16 var_5698_to_fp16 = const()[name = string("op_5698_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_235_cast_fp16 = layer_norm(axes = out_235_axes_0, epsilon = var_5698_to_fp16, x = x_351_cast_fp16)[name = string("out_235_cast_fp16")]; tensor layer_layers_19_post_attention_layernorm_weight_to_fp16 = const()[name = string("layer_layers_19_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(581094720)))]; tensor out_237_cast_fp16 = mul(x = out_235_cast_fp16, y = layer_layers_19_post_attention_layernorm_weight_to_fp16)[name = string("out_237_cast_fp16")]; tensor var_5704_split_sizes_0 = const()[name = string("op_5704_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_5704_axis_0 = const()[name = string("op_5704_axis_0"), val = int32(1)]; tensor var_5704_cast_fp16_0, tensor var_5704_cast_fp16_1 = split(axis = var_5704_axis_0, split_sizes = var_5704_split_sizes_0, x = out_237_cast_fp16)[name = string("op_5704_cast_fp16")]; string input_39_pad_type_0 = const()[name = string("input_39_pad_type_0"), val = string("valid")]; tensor input_39_strides_0 = const()[name = string("input_39_strides_0"), val = tensor([1, 1])]; tensor input_39_pad_0 = const()[name = string("input_39_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_39_dilations_0 = const()[name = string("input_39_dilations_0"), val = tensor([1, 1])]; int32 input_39_groups_0 = const()[name = string("input_39_groups_0"), val = int32(1)]; tensor var_5709_to_fp16 = const()[name = string("op_5709_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(581098880)))]; tensor input_39_cast_fp16 = conv(dilations = input_39_dilations_0, groups = input_39_groups_0, pad = input_39_pad_0, pad_type = input_39_pad_type_0, strides = input_39_strides_0, weight = var_5709_to_fp16, x = var_5704_cast_fp16_0)[name = string("input_39_cast_fp16")]; tensor var_5720_cast_fp16 = silu(x = input_39_cast_fp16)[name = string("op_5720_cast_fp16")]; string var_5725_pad_type_0 = const()[name = string("op_5725_pad_type_0"), val = string("valid")]; tensor var_5725_strides_0 = const()[name = string("op_5725_strides_0"), val = tensor([1, 1])]; tensor var_5725_pad_0 = const()[name = string("op_5725_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_5725_dilations_0 = const()[name = string("op_5725_dilations_0"), val = tensor([1, 1])]; int32 var_5725_groups_0 = const()[name = string("op_5725_groups_0"), val = int32(1)]; tensor var_5708_to_fp16 = const()[name = string("op_5708_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(589487552)))]; tensor var_5725_cast_fp16 = conv(dilations = var_5725_dilations_0, groups = var_5725_groups_0, pad = var_5725_pad_0, pad_type = var_5725_pad_type_0, strides = var_5725_strides_0, weight = var_5708_to_fp16, x = var_5704_cast_fp16_0)[name = string("op_5725_cast_fp16")]; tensor x_357_cast_fp16 = mul(x = var_5720_cast_fp16, y = var_5725_cast_fp16)[name = string("x_357_cast_fp16")]; string hidden_states_119_pad_type_0 = const()[name = string("hidden_states_119_pad_type_0"), val = string("valid")]; tensor hidden_states_119_strides_0 = const()[name = string("hidden_states_119_strides_0"), val = tensor([1, 1])]; tensor hidden_states_119_pad_0 = const()[name = string("hidden_states_119_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_119_dilations_0 = const()[name = string("hidden_states_119_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_119_groups_0 = const()[name = string("hidden_states_119_groups_0"), val = int32(1)]; tensor var_5707_to_fp16 = const()[name = string("op_5707_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(597876224)))]; tensor hidden_states_119_cast_fp16 = conv(dilations = hidden_states_119_dilations_0, groups = hidden_states_119_groups_0, pad = hidden_states_119_pad_0, pad_type = hidden_states_119_pad_type_0, strides = hidden_states_119_strides_0, weight = var_5707_to_fp16, x = x_357_cast_fp16)[name = string("hidden_states_119_cast_fp16")]; tensor x_359_cast_fp16 = add(x = x_349_cast_fp16, y = hidden_states_119_cast_fp16)[name = string("x_359_cast_fp16")]; int32 var_5738 = const()[name = string("op_5738"), val = int32(1)]; fp16 const_204_promoted_to_fp16 = const()[name = string("const_204_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_5741_cast_fp16 = mul(x = x_359_cast_fp16, y = const_204_promoted_to_fp16)[name = string("op_5741_cast_fp16")]; bool x_361_interleave_0 = const()[name = string("x_361_interleave_0"), val = bool(false)]; tensor x_361_cast_fp16 = concat(axis = var_5738, interleave = x_361_interleave_0, values = (x_359_cast_fp16, var_5741_cast_fp16))[name = string("x_361_cast_fp16")]; tensor out_241_axes_0 = const()[name = string("out_241_axes_0"), val = tensor([1])]; fp16 var_5751_to_fp16 = const()[name = string("op_5751_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_241_cast_fp16 = layer_norm(axes = out_241_axes_0, epsilon = var_5751_to_fp16, x = x_361_cast_fp16)[name = string("out_241_cast_fp16")]; tensor layer_layers_20_input_layernorm_weight_to_fp16 = const()[name = string("layer_layers_20_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(606264896)))]; tensor out_243_cast_fp16 = mul(x = out_241_cast_fp16, y = layer_layers_20_input_layernorm_weight_to_fp16)[name = string("out_243_cast_fp16")]; tensor var_5757_split_sizes_0 = const()[name = string("op_5757_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_5757_axis_0 = const()[name = string("op_5757_axis_0"), val = int32(1)]; tensor var_5757_cast_fp16_0, tensor var_5757_cast_fp16_1 = split(axis = var_5757_axis_0, split_sizes = var_5757_split_sizes_0, x = out_243_cast_fp16)[name = string("op_5757_cast_fp16")]; string query_states_81_pad_type_0 = const()[name = string("query_states_81_pad_type_0"), val = string("valid")]; tensor query_states_81_strides_0 = const()[name = string("query_states_81_strides_0"), val = tensor([1, 1])]; tensor query_states_81_pad_0 = const()[name = string("query_states_81_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_states_81_dilations_0 = const()[name = string("query_states_81_dilations_0"), val = tensor([1, 1])]; int32 query_states_81_groups_0 = const()[name = string("query_states_81_groups_0"), val = int32(1)]; tensor var_5779_to_fp16 = const()[name = string("op_5779_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(606269056)))]; tensor query_states_81_cast_fp16 = conv(dilations = query_states_81_dilations_0, groups = query_states_81_groups_0, pad = query_states_81_pad_0, pad_type = query_states_81_pad_type_0, strides = query_states_81_strides_0, weight = var_5779_to_fp16, x = var_5757_cast_fp16_0)[name = string("query_states_81_cast_fp16")]; string key_states_81_pad_type_0 = const()[name = string("key_states_81_pad_type_0"), val = string("valid")]; tensor key_states_81_strides_0 = const()[name = string("key_states_81_strides_0"), val = tensor([1, 1])]; tensor key_states_81_pad_0 = const()[name = string("key_states_81_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_states_81_dilations_0 = const()[name = string("key_states_81_dilations_0"), val = tensor([1, 1])]; int32 key_states_81_groups_0 = const()[name = string("key_states_81_groups_0"), val = int32(1)]; tensor var_5790_to_fp16 = const()[name = string("op_5790_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(608366272)))]; tensor key_states_81_cast_fp16 = conv(dilations = key_states_81_dilations_0, groups = key_states_81_groups_0, pad = key_states_81_pad_0, pad_type = key_states_81_pad_type_0, strides = key_states_81_strides_0, weight = var_5790_to_fp16, x = var_5757_cast_fp16_0)[name = string("key_states_81_cast_fp16")]; string value_states_81_pad_type_0 = const()[name = string("value_states_81_pad_type_0"), val = string("valid")]; tensor value_states_81_strides_0 = const()[name = string("value_states_81_strides_0"), val = tensor([1, 1])]; tensor value_states_81_pad_0 = const()[name = string("value_states_81_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_states_81_dilations_0 = const()[name = string("value_states_81_dilations_0"), val = tensor([1, 1])]; int32 value_states_81_groups_0 = const()[name = string("value_states_81_groups_0"), val = int32(1)]; tensor var_5801_to_fp16 = const()[name = string("op_5801_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(608628480)))]; tensor value_states_81_cast_fp16 = conv(dilations = value_states_81_dilations_0, groups = value_states_81_groups_0, pad = value_states_81_pad_0, pad_type = value_states_81_pad_type_0, strides = value_states_81_strides_0, weight = var_5801_to_fp16, x = var_5757_cast_fp16_0)[name = string("value_states_81_cast_fp16")]; tensor var_5809 = const()[name = string("op_5809"), val = tensor([1, 16, 64, 1])]; tensor embed_81_cast_fp16 = reshape(shape = var_5809, x = query_states_81_cast_fp16)[name = string("embed_81_cast_fp16")]; tensor var_5813 = const()[name = string("op_5813"), val = tensor([1, 2, 64, 1])]; tensor var_5814_cast_fp16 = reshape(shape = var_5813, x = key_states_81_cast_fp16)[name = string("op_5814_cast_fp16")]; tensor embed_83_perm_0 = const()[name = string("embed_83_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_5820 = const()[name = string("op_5820"), val = tensor([1, 2, 64, 1])]; tensor var_5821_cast_fp16 = reshape(shape = var_5820, x = value_states_81_cast_fp16)[name = string("op_5821_cast_fp16")]; tensor value_states_83_perm_0 = const()[name = string("value_states_83_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_5825_cast_fp16 = mul(x = embed_81_cast_fp16, y = cos_1_cast_fp16)[name = string("op_5825_cast_fp16")]; tensor var_5826_split_sizes_0 = const()[name = string("op_5826_split_sizes_0"), val = tensor([32, 32])]; int32 var_5826_axis_0 = const()[name = string("op_5826_axis_0"), val = int32(-2)]; tensor var_5826_cast_fp16_0, tensor var_5826_cast_fp16_1 = split(axis = var_5826_axis_0, split_sizes = var_5826_split_sizes_0, x = embed_81_cast_fp16)[name = string("op_5826_cast_fp16")]; fp16 const_207_promoted_to_fp16 = const()[name = string("const_207_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_5828_cast_fp16 = mul(x = var_5826_cast_fp16_1, y = const_207_promoted_to_fp16)[name = string("op_5828_cast_fp16")]; int32 var_5830 = const()[name = string("op_5830"), val = int32(-2)]; bool var_5831_interleave_0 = const()[name = string("op_5831_interleave_0"), val = bool(false)]; tensor var_5831_cast_fp16 = concat(axis = var_5830, interleave = var_5831_interleave_0, values = (var_5828_cast_fp16, var_5826_cast_fp16_0))[name = string("op_5831_cast_fp16")]; tensor var_5832_cast_fp16 = mul(x = var_5831_cast_fp16, y = sin_1_cast_fp16)[name = string("op_5832_cast_fp16")]; tensor query_states_83_cast_fp16 = add(x = var_5825_cast_fp16, y = var_5832_cast_fp16)[name = string("query_states_83_cast_fp16")]; tensor embed_83_cast_fp16 = transpose(perm = embed_83_perm_0, x = var_5814_cast_fp16)[name = string("transpose_11")]; tensor var_5835_cast_fp16 = mul(x = embed_83_cast_fp16, y = cos_cast_fp16)[name = string("op_5835_cast_fp16")]; tensor var_5836_split_sizes_0 = const()[name = string("op_5836_split_sizes_0"), val = tensor([32, 32])]; int32 var_5836_axis_0 = const()[name = string("op_5836_axis_0"), val = int32(-1)]; tensor var_5836_cast_fp16_0, tensor var_5836_cast_fp16_1 = split(axis = var_5836_axis_0, split_sizes = var_5836_split_sizes_0, x = embed_83_cast_fp16)[name = string("op_5836_cast_fp16")]; fp16 const_208_promoted_to_fp16 = const()[name = string("const_208_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_5838_cast_fp16 = mul(x = var_5836_cast_fp16_1, y = const_208_promoted_to_fp16)[name = string("op_5838_cast_fp16")]; int32 var_5840 = const()[name = string("op_5840"), val = int32(-1)]; bool var_5841_interleave_0 = const()[name = string("op_5841_interleave_0"), val = bool(false)]; tensor var_5841_cast_fp16 = concat(axis = var_5840, interleave = var_5841_interleave_0, values = (var_5838_cast_fp16, var_5836_cast_fp16_0))[name = string("op_5841_cast_fp16")]; tensor var_5842_cast_fp16 = mul(x = var_5841_cast_fp16, y = sin_cast_fp16)[name = string("op_5842_cast_fp16")]; tensor key_states_83_cast_fp16 = add(x = var_5835_cast_fp16, y = var_5842_cast_fp16)[name = string("key_states_83_cast_fp16")]; tensor expand_dims_201 = const()[name = string("expand_dims_201"), val = tensor([20])]; tensor expand_dims_202 = const()[name = string("expand_dims_202"), val = tensor([0])]; tensor expand_dims_204 = const()[name = string("expand_dims_204"), val = tensor([21])]; tensor concat_163_values3_0 = const()[name = string("concat_163_values3_0"), val = tensor([0])]; int32 concat_163_axis_0 = const()[name = string("concat_163_axis_0"), val = int32(0)]; bool concat_163_interleave_0 = const()[name = string("concat_163_interleave_0"), val = bool(false)]; tensor concat_163 = concat(axis = concat_163_axis_0, interleave = concat_163_interleave_0, values = (expand_dims_201, expand_dims_202, position_id, concat_163_values3_0))[name = string("concat_163")]; tensor concat_164_values1_0 = const()[name = string("concat_164_values1_0"), val = tensor([0])]; tensor concat_164_values3_0 = const()[name = string("concat_164_values3_0"), val = tensor([0])]; int32 concat_164_axis_0 = const()[name = string("concat_164_axis_0"), val = int32(0)]; bool concat_164_interleave_0 = const()[name = string("concat_164_interleave_0"), val = bool(false)]; tensor concat_164 = concat(axis = concat_164_axis_0, interleave = concat_164_interleave_0, values = (expand_dims_204, concat_164_values1_0, var_426, concat_164_values3_0))[name = string("concat_164")]; tensor key_cache_internal_tensor_assign_21_stride_0 = const()[name = string("key_cache_internal_tensor_assign_21_stride_0"), val = tensor([1, 1, 1, 1])]; tensor key_cache_internal_tensor_assign_21_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_21_begin_mask_0"), val = tensor([false, false, false, true])]; tensor key_cache_internal_tensor_assign_21_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_21_end_mask_0"), val = tensor([false, true, false, true])]; tensor key_cache_internal_tensor_assign_21_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_21_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor key_cache_internal_tensor_assign_21_cast_fp16 = slice_update(begin = concat_163, begin_mask = key_cache_internal_tensor_assign_21_begin_mask_0, end = concat_164, end_mask = key_cache_internal_tensor_assign_21_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_21_squeeze_mask_0, stride = key_cache_internal_tensor_assign_21_stride_0, update = key_states_83_cast_fp16, x = coreml_update_state_86)[name = string("key_cache_internal_tensor_assign_21_cast_fp16")]; write_state(data = key_cache_internal_tensor_assign_21_cast_fp16, input = key_cache)[name = string("coreml_update_state_206_write_state")]; tensor coreml_update_state_88 = read_state(input = key_cache)[name = string("coreml_update_state_206")]; tensor value_cache_internal_tensor_assign_21_stride_0 = const()[name = string("value_cache_internal_tensor_assign_21_stride_0"), val = tensor([1, 1, 1, 1])]; tensor value_cache_internal_tensor_assign_21_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_21_begin_mask_0"), val = tensor([false, false, false, true])]; tensor value_cache_internal_tensor_assign_21_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_21_end_mask_0"), val = tensor([false, true, false, true])]; tensor value_cache_internal_tensor_assign_21_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_21_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor value_states_83_cast_fp16 = transpose(perm = value_states_83_perm_0, x = var_5821_cast_fp16)[name = string("transpose_10")]; tensor value_cache_internal_tensor_assign_21_cast_fp16 = slice_update(begin = concat_163, begin_mask = value_cache_internal_tensor_assign_21_begin_mask_0, end = concat_164, end_mask = value_cache_internal_tensor_assign_21_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_21_squeeze_mask_0, stride = value_cache_internal_tensor_assign_21_stride_0, update = value_states_83_cast_fp16, x = coreml_update_state_87)[name = string("value_cache_internal_tensor_assign_21_cast_fp16")]; write_state(data = value_cache_internal_tensor_assign_21_cast_fp16, input = value_cache)[name = string("coreml_update_state_207_write_state")]; tensor coreml_update_state_89 = read_state(input = value_cache)[name = string("coreml_update_state_207")]; tensor var_5885_begin_0 = const()[name = string("op_5885_begin_0"), val = tensor([20, 0, 0, 0])]; tensor var_5885_end_0 = const()[name = string("op_5885_end_0"), val = tensor([21, 2, 2048, 64])]; tensor var_5885_end_mask_0 = const()[name = string("op_5885_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_5885_cast_fp16 = slice_by_index(begin = var_5885_begin_0, end = var_5885_end_0, end_mask = var_5885_end_mask_0, x = coreml_update_state_88)[name = string("op_5885_cast_fp16")]; tensor tile_40 = const()[name = string("tile_40"), val = tensor([1, 1])]; int32 var_5888_axis_0 = const()[name = string("op_5888_axis_0"), val = int32(1)]; tensor var_5888_cast_fp16_0, tensor var_5888_cast_fp16_1 = split(axis = var_5888_axis_0, split_sizes = tile_40, x = var_5885_cast_fp16)[name = string("op_5888_cast_fp16")]; tensor var_5895_begin_0 = const()[name = string("op_5895_begin_0"), val = tensor([20, 0, 0, 0])]; tensor var_5895_end_0 = const()[name = string("op_5895_end_0"), val = tensor([21, 2, 2048, 64])]; tensor var_5895_end_mask_0 = const()[name = string("op_5895_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_5895_cast_fp16 = slice_by_index(begin = var_5895_begin_0, end = var_5895_end_0, end_mask = var_5895_end_mask_0, x = coreml_update_state_89)[name = string("op_5895_cast_fp16")]; tensor tile_41 = const()[name = string("tile_41"), val = tensor([1, 1])]; int32 var_5898_axis_0 = const()[name = string("op_5898_axis_0"), val = int32(1)]; tensor var_5898_cast_fp16_0, tensor var_5898_cast_fp16_1 = split(axis = var_5898_axis_0, split_sizes = tile_41, x = var_5895_cast_fp16)[name = string("op_5898_cast_fp16")]; tensor var_5901_split_sizes_0 = const()[name = string("op_5901_split_sizes_0"), val = tensor([8, 8])]; int32 var_5901_axis_0 = const()[name = string("op_5901_axis_0"), val = int32(1)]; tensor var_5901_cast_fp16_0, tensor var_5901_cast_fp16_1 = split(axis = var_5901_axis_0, split_sizes = var_5901_split_sizes_0, x = query_states_83_cast_fp16)[name = string("op_5901_cast_fp16")]; bool attn_weights_321_transpose_x_0 = const()[name = string("attn_weights_321_transpose_x_0"), val = bool(false)]; bool attn_weights_321_transpose_y_0 = const()[name = string("attn_weights_321_transpose_y_0"), val = bool(false)]; tensor attn_weights_321_cast_fp16 = matmul(transpose_x = attn_weights_321_transpose_x_0, transpose_y = attn_weights_321_transpose_y_0, x = var_5888_cast_fp16_0, y = var_5901_cast_fp16_0)[name = string("attn_weights_321_cast_fp16")]; fp16 _inversed_attn_weights_323_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_323_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_323_cast_fp16 = mul(x = attn_weights_321_cast_fp16, y = _inversed_attn_weights_323_y_0_to_fp16)[name = string("_inversed_attn_weights_323_cast_fp16")]; tensor attn_weights_325_cast_fp16 = add(x = _inversed_attn_weights_323_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_325_cast_fp16")]; int32 var_5908 = const()[name = string("op_5908"), val = int32(2)]; tensor attn_weights_327_cast_fp16 = softmax(axis = var_5908, x = attn_weights_325_cast_fp16)[name = string("attn_weights_327_cast_fp16")]; bool var_5914_transpose_x_1 = const()[name = string("op_5914_transpose_x_1"), val = bool(true)]; bool var_5914_transpose_y_1 = const()[name = string("op_5914_transpose_y_1"), val = bool(false)]; tensor var_5914_cast_fp16 = matmul(transpose_x = var_5914_transpose_x_1, transpose_y = var_5914_transpose_y_1, x = attn_weights_327_cast_fp16, y = var_5898_cast_fp16_0)[name = string("op_5914_cast_fp16")]; bool attn_weights_329_transpose_x_0 = const()[name = string("attn_weights_329_transpose_x_0"), val = bool(false)]; bool attn_weights_329_transpose_y_0 = const()[name = string("attn_weights_329_transpose_y_0"), val = bool(false)]; tensor attn_weights_329_cast_fp16 = matmul(transpose_x = attn_weights_329_transpose_x_0, transpose_y = attn_weights_329_transpose_y_0, x = var_5888_cast_fp16_1, y = var_5901_cast_fp16_1)[name = string("attn_weights_329_cast_fp16")]; fp16 _inversed_attn_weights_331_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_331_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_331_cast_fp16 = mul(x = attn_weights_329_cast_fp16, y = _inversed_attn_weights_331_y_0_to_fp16)[name = string("_inversed_attn_weights_331_cast_fp16")]; tensor attn_weights_333_cast_fp16 = add(x = _inversed_attn_weights_331_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_333_cast_fp16")]; int32 var_5920 = const()[name = string("op_5920"), val = int32(2)]; tensor attn_weights_335_cast_fp16 = softmax(axis = var_5920, x = attn_weights_333_cast_fp16)[name = string("attn_weights_335_cast_fp16")]; bool attn_output_121_transpose_x_1 = const()[name = string("attn_output_121_transpose_x_1"), val = bool(true)]; bool attn_output_121_transpose_y_1 = const()[name = string("attn_output_121_transpose_y_1"), val = bool(false)]; tensor attn_output_121_cast_fp16 = matmul(transpose_x = attn_output_121_transpose_x_1, transpose_y = attn_output_121_transpose_y_1, x = attn_weights_335_cast_fp16, y = var_5898_cast_fp16_1)[name = string("attn_output_121_cast_fp16")]; int32 var_5928 = const()[name = string("op_5928"), val = int32(1)]; bool attn_output_123_interleave_0 = const()[name = string("attn_output_123_interleave_0"), val = bool(false)]; tensor attn_output_123_cast_fp16 = concat(axis = var_5928, interleave = attn_output_123_interleave_0, values = (var_5914_cast_fp16, attn_output_121_cast_fp16))[name = string("attn_output_123_cast_fp16")]; tensor var_5932_perm_0 = const()[name = string("op_5932_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_5937 = const()[name = string("op_5937"), val = tensor([1, 1024, 1, 1])]; tensor var_5932_cast_fp16 = transpose(perm = var_5932_perm_0, x = attn_output_123_cast_fp16)[name = string("transpose_9")]; tensor x_365_cast_fp16 = reshape(shape = var_5937, x = var_5932_cast_fp16)[name = string("x_365_cast_fp16")]; string hidden_states_123_pad_type_0 = const()[name = string("hidden_states_123_pad_type_0"), val = string("valid")]; tensor hidden_states_123_strides_0 = const()[name = string("hidden_states_123_strides_0"), val = tensor([1, 1])]; tensor hidden_states_123_pad_0 = const()[name = string("hidden_states_123_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_123_dilations_0 = const()[name = string("hidden_states_123_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_123_groups_0 = const()[name = string("hidden_states_123_groups_0"), val = int32(1)]; tensor var_5944_to_fp16 = const()[name = string("op_5944_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(608890688)))]; tensor hidden_states_123_cast_fp16 = conv(dilations = hidden_states_123_dilations_0, groups = hidden_states_123_groups_0, pad = hidden_states_123_pad_0, pad_type = hidden_states_123_pad_type_0, strides = hidden_states_123_strides_0, weight = var_5944_to_fp16, x = x_365_cast_fp16)[name = string("hidden_states_123_cast_fp16")]; tensor x_367_cast_fp16 = add(x = x_359_cast_fp16, y = hidden_states_123_cast_fp16)[name = string("x_367_cast_fp16")]; int32 var_5956 = const()[name = string("op_5956"), val = int32(1)]; fp16 const_213_promoted_to_fp16 = const()[name = string("const_213_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_5959_cast_fp16 = mul(x = x_367_cast_fp16, y = const_213_promoted_to_fp16)[name = string("op_5959_cast_fp16")]; bool x_369_interleave_0 = const()[name = string("x_369_interleave_0"), val = bool(false)]; tensor x_369_cast_fp16 = concat(axis = var_5956, interleave = x_369_interleave_0, values = (x_367_cast_fp16, var_5959_cast_fp16))[name = string("x_369_cast_fp16")]; tensor out_247_axes_0 = const()[name = string("out_247_axes_0"), val = tensor([1])]; fp16 var_5969_to_fp16 = const()[name = string("op_5969_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_247_cast_fp16 = layer_norm(axes = out_247_axes_0, epsilon = var_5969_to_fp16, x = x_369_cast_fp16)[name = string("out_247_cast_fp16")]; tensor layer_layers_20_post_attention_layernorm_weight_to_fp16 = const()[name = string("layer_layers_20_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(610987904)))]; tensor out_249_cast_fp16 = mul(x = out_247_cast_fp16, y = layer_layers_20_post_attention_layernorm_weight_to_fp16)[name = string("out_249_cast_fp16")]; tensor var_5975_split_sizes_0 = const()[name = string("op_5975_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_5975_axis_0 = const()[name = string("op_5975_axis_0"), val = int32(1)]; tensor var_5975_cast_fp16_0, tensor var_5975_cast_fp16_1 = split(axis = var_5975_axis_0, split_sizes = var_5975_split_sizes_0, x = out_249_cast_fp16)[name = string("op_5975_cast_fp16")]; string input_41_pad_type_0 = const()[name = string("input_41_pad_type_0"), val = string("valid")]; tensor input_41_strides_0 = const()[name = string("input_41_strides_0"), val = tensor([1, 1])]; tensor input_41_pad_0 = const()[name = string("input_41_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_41_dilations_0 = const()[name = string("input_41_dilations_0"), val = tensor([1, 1])]; int32 input_41_groups_0 = const()[name = string("input_41_groups_0"), val = int32(1)]; tensor var_5980_to_fp16 = const()[name = string("op_5980_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(610992064)))]; tensor input_41_cast_fp16 = conv(dilations = input_41_dilations_0, groups = input_41_groups_0, pad = input_41_pad_0, pad_type = input_41_pad_type_0, strides = input_41_strides_0, weight = var_5980_to_fp16, x = var_5975_cast_fp16_0)[name = string("input_41_cast_fp16")]; tensor var_5991_cast_fp16 = silu(x = input_41_cast_fp16)[name = string("op_5991_cast_fp16")]; string var_5996_pad_type_0 = const()[name = string("op_5996_pad_type_0"), val = string("valid")]; tensor var_5996_strides_0 = const()[name = string("op_5996_strides_0"), val = tensor([1, 1])]; tensor var_5996_pad_0 = const()[name = string("op_5996_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_5996_dilations_0 = const()[name = string("op_5996_dilations_0"), val = tensor([1, 1])]; int32 var_5996_groups_0 = const()[name = string("op_5996_groups_0"), val = int32(1)]; tensor var_5979_to_fp16 = const()[name = string("op_5979_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(619380736)))]; tensor var_5996_cast_fp16 = conv(dilations = var_5996_dilations_0, groups = var_5996_groups_0, pad = var_5996_pad_0, pad_type = var_5996_pad_type_0, strides = var_5996_strides_0, weight = var_5979_to_fp16, x = var_5975_cast_fp16_0)[name = string("op_5996_cast_fp16")]; tensor x_375_cast_fp16 = mul(x = var_5991_cast_fp16, y = var_5996_cast_fp16)[name = string("x_375_cast_fp16")]; string hidden_states_125_pad_type_0 = const()[name = string("hidden_states_125_pad_type_0"), val = string("valid")]; tensor hidden_states_125_strides_0 = const()[name = string("hidden_states_125_strides_0"), val = tensor([1, 1])]; tensor hidden_states_125_pad_0 = const()[name = string("hidden_states_125_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_125_dilations_0 = const()[name = string("hidden_states_125_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_125_groups_0 = const()[name = string("hidden_states_125_groups_0"), val = int32(1)]; tensor var_5978_to_fp16 = const()[name = string("op_5978_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(627769408)))]; tensor hidden_states_125_cast_fp16 = conv(dilations = hidden_states_125_dilations_0, groups = hidden_states_125_groups_0, pad = hidden_states_125_pad_0, pad_type = hidden_states_125_pad_type_0, strides = hidden_states_125_strides_0, weight = var_5978_to_fp16, x = x_375_cast_fp16)[name = string("hidden_states_125_cast_fp16")]; tensor x_377_cast_fp16 = add(x = x_367_cast_fp16, y = hidden_states_125_cast_fp16)[name = string("x_377_cast_fp16")]; int32 var_6009 = const()[name = string("op_6009"), val = int32(1)]; fp16 const_214_promoted_to_fp16 = const()[name = string("const_214_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_6012_cast_fp16 = mul(x = x_377_cast_fp16, y = const_214_promoted_to_fp16)[name = string("op_6012_cast_fp16")]; bool x_379_interleave_0 = const()[name = string("x_379_interleave_0"), val = bool(false)]; tensor x_379_cast_fp16 = concat(axis = var_6009, interleave = x_379_interleave_0, values = (x_377_cast_fp16, var_6012_cast_fp16))[name = string("x_379_cast_fp16")]; tensor out_253_axes_0 = const()[name = string("out_253_axes_0"), val = tensor([1])]; fp16 var_6022_to_fp16 = const()[name = string("op_6022_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_253_cast_fp16 = layer_norm(axes = out_253_axes_0, epsilon = var_6022_to_fp16, x = x_379_cast_fp16)[name = string("out_253_cast_fp16")]; tensor layer_layers_21_input_layernorm_weight_to_fp16 = const()[name = string("layer_layers_21_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(636158080)))]; tensor out_255_cast_fp16 = mul(x = out_253_cast_fp16, y = layer_layers_21_input_layernorm_weight_to_fp16)[name = string("out_255_cast_fp16")]; tensor var_6028_split_sizes_0 = const()[name = string("op_6028_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_6028_axis_0 = const()[name = string("op_6028_axis_0"), val = int32(1)]; tensor var_6028_cast_fp16_0, tensor var_6028_cast_fp16_1 = split(axis = var_6028_axis_0, split_sizes = var_6028_split_sizes_0, x = out_255_cast_fp16)[name = string("op_6028_cast_fp16")]; string query_states_85_pad_type_0 = const()[name = string("query_states_85_pad_type_0"), val = string("valid")]; tensor query_states_85_strides_0 = const()[name = string("query_states_85_strides_0"), val = tensor([1, 1])]; tensor query_states_85_pad_0 = const()[name = string("query_states_85_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_states_85_dilations_0 = const()[name = string("query_states_85_dilations_0"), val = tensor([1, 1])]; int32 query_states_85_groups_0 = const()[name = string("query_states_85_groups_0"), val = int32(1)]; tensor var_6050_to_fp16 = const()[name = string("op_6050_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(636162240)))]; tensor query_states_85_cast_fp16 = conv(dilations = query_states_85_dilations_0, groups = query_states_85_groups_0, pad = query_states_85_pad_0, pad_type = query_states_85_pad_type_0, strides = query_states_85_strides_0, weight = var_6050_to_fp16, x = var_6028_cast_fp16_0)[name = string("query_states_85_cast_fp16")]; string key_states_85_pad_type_0 = const()[name = string("key_states_85_pad_type_0"), val = string("valid")]; tensor key_states_85_strides_0 = const()[name = string("key_states_85_strides_0"), val = tensor([1, 1])]; tensor key_states_85_pad_0 = const()[name = string("key_states_85_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_states_85_dilations_0 = const()[name = string("key_states_85_dilations_0"), val = tensor([1, 1])]; int32 key_states_85_groups_0 = const()[name = string("key_states_85_groups_0"), val = int32(1)]; tensor var_6061_to_fp16 = const()[name = string("op_6061_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(638259456)))]; tensor key_states_85_cast_fp16 = conv(dilations = key_states_85_dilations_0, groups = key_states_85_groups_0, pad = key_states_85_pad_0, pad_type = key_states_85_pad_type_0, strides = key_states_85_strides_0, weight = var_6061_to_fp16, x = var_6028_cast_fp16_0)[name = string("key_states_85_cast_fp16")]; string value_states_85_pad_type_0 = const()[name = string("value_states_85_pad_type_0"), val = string("valid")]; tensor value_states_85_strides_0 = const()[name = string("value_states_85_strides_0"), val = tensor([1, 1])]; tensor value_states_85_pad_0 = const()[name = string("value_states_85_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_states_85_dilations_0 = const()[name = string("value_states_85_dilations_0"), val = tensor([1, 1])]; int32 value_states_85_groups_0 = const()[name = string("value_states_85_groups_0"), val = int32(1)]; tensor var_6072_to_fp16 = const()[name = string("op_6072_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(638521664)))]; tensor value_states_85_cast_fp16 = conv(dilations = value_states_85_dilations_0, groups = value_states_85_groups_0, pad = value_states_85_pad_0, pad_type = value_states_85_pad_type_0, strides = value_states_85_strides_0, weight = var_6072_to_fp16, x = var_6028_cast_fp16_0)[name = string("value_states_85_cast_fp16")]; tensor var_6080 = const()[name = string("op_6080"), val = tensor([1, 16, 64, 1])]; tensor embed_85_cast_fp16 = reshape(shape = var_6080, x = query_states_85_cast_fp16)[name = string("embed_85_cast_fp16")]; tensor var_6084 = const()[name = string("op_6084"), val = tensor([1, 2, 64, 1])]; tensor var_6085_cast_fp16 = reshape(shape = var_6084, x = key_states_85_cast_fp16)[name = string("op_6085_cast_fp16")]; tensor embed_87_perm_0 = const()[name = string("embed_87_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_6091 = const()[name = string("op_6091"), val = tensor([1, 2, 64, 1])]; tensor var_6092_cast_fp16 = reshape(shape = var_6091, x = value_states_85_cast_fp16)[name = string("op_6092_cast_fp16")]; tensor value_states_87_perm_0 = const()[name = string("value_states_87_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_6096_cast_fp16 = mul(x = embed_85_cast_fp16, y = cos_1_cast_fp16)[name = string("op_6096_cast_fp16")]; tensor var_6097_split_sizes_0 = const()[name = string("op_6097_split_sizes_0"), val = tensor([32, 32])]; int32 var_6097_axis_0 = const()[name = string("op_6097_axis_0"), val = int32(-2)]; tensor var_6097_cast_fp16_0, tensor var_6097_cast_fp16_1 = split(axis = var_6097_axis_0, split_sizes = var_6097_split_sizes_0, x = embed_85_cast_fp16)[name = string("op_6097_cast_fp16")]; fp16 const_217_promoted_to_fp16 = const()[name = string("const_217_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_6099_cast_fp16 = mul(x = var_6097_cast_fp16_1, y = const_217_promoted_to_fp16)[name = string("op_6099_cast_fp16")]; int32 var_6101 = const()[name = string("op_6101"), val = int32(-2)]; bool var_6102_interleave_0 = const()[name = string("op_6102_interleave_0"), val = bool(false)]; tensor var_6102_cast_fp16 = concat(axis = var_6101, interleave = var_6102_interleave_0, values = (var_6099_cast_fp16, var_6097_cast_fp16_0))[name = string("op_6102_cast_fp16")]; tensor var_6103_cast_fp16 = mul(x = var_6102_cast_fp16, y = sin_1_cast_fp16)[name = string("op_6103_cast_fp16")]; tensor query_states_87_cast_fp16 = add(x = var_6096_cast_fp16, y = var_6103_cast_fp16)[name = string("query_states_87_cast_fp16")]; tensor embed_87_cast_fp16 = transpose(perm = embed_87_perm_0, x = var_6085_cast_fp16)[name = string("transpose_8")]; tensor var_6106_cast_fp16 = mul(x = embed_87_cast_fp16, y = cos_cast_fp16)[name = string("op_6106_cast_fp16")]; tensor var_6107_split_sizes_0 = const()[name = string("op_6107_split_sizes_0"), val = tensor([32, 32])]; int32 var_6107_axis_0 = const()[name = string("op_6107_axis_0"), val = int32(-1)]; tensor var_6107_cast_fp16_0, tensor var_6107_cast_fp16_1 = split(axis = var_6107_axis_0, split_sizes = var_6107_split_sizes_0, x = embed_87_cast_fp16)[name = string("op_6107_cast_fp16")]; fp16 const_218_promoted_to_fp16 = const()[name = string("const_218_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_6109_cast_fp16 = mul(x = var_6107_cast_fp16_1, y = const_218_promoted_to_fp16)[name = string("op_6109_cast_fp16")]; int32 var_6111 = const()[name = string("op_6111"), val = int32(-1)]; bool var_6112_interleave_0 = const()[name = string("op_6112_interleave_0"), val = bool(false)]; tensor var_6112_cast_fp16 = concat(axis = var_6111, interleave = var_6112_interleave_0, values = (var_6109_cast_fp16, var_6107_cast_fp16_0))[name = string("op_6112_cast_fp16")]; tensor var_6113_cast_fp16 = mul(x = var_6112_cast_fp16, y = sin_cast_fp16)[name = string("op_6113_cast_fp16")]; tensor key_states_87_cast_fp16 = add(x = var_6106_cast_fp16, y = var_6113_cast_fp16)[name = string("key_states_87_cast_fp16")]; tensor expand_dims_211 = const()[name = string("expand_dims_211"), val = tensor([21])]; tensor expand_dims_212 = const()[name = string("expand_dims_212"), val = tensor([0])]; tensor expand_dims_214 = const()[name = string("expand_dims_214"), val = tensor([22])]; tensor concat_171_values3_0 = const()[name = string("concat_171_values3_0"), val = tensor([0])]; int32 concat_171_axis_0 = const()[name = string("concat_171_axis_0"), val = int32(0)]; bool concat_171_interleave_0 = const()[name = string("concat_171_interleave_0"), val = bool(false)]; tensor concat_171 = concat(axis = concat_171_axis_0, interleave = concat_171_interleave_0, values = (expand_dims_211, expand_dims_212, position_id, concat_171_values3_0))[name = string("concat_171")]; tensor concat_172_values1_0 = const()[name = string("concat_172_values1_0"), val = tensor([0])]; tensor concat_172_values3_0 = const()[name = string("concat_172_values3_0"), val = tensor([0])]; int32 concat_172_axis_0 = const()[name = string("concat_172_axis_0"), val = int32(0)]; bool concat_172_interleave_0 = const()[name = string("concat_172_interleave_0"), val = bool(false)]; tensor concat_172 = concat(axis = concat_172_axis_0, interleave = concat_172_interleave_0, values = (expand_dims_214, concat_172_values1_0, var_426, concat_172_values3_0))[name = string("concat_172")]; tensor key_cache_internal_tensor_assign_22_stride_0 = const()[name = string("key_cache_internal_tensor_assign_22_stride_0"), val = tensor([1, 1, 1, 1])]; tensor key_cache_internal_tensor_assign_22_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_22_begin_mask_0"), val = tensor([false, false, false, true])]; tensor key_cache_internal_tensor_assign_22_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_22_end_mask_0"), val = tensor([false, true, false, true])]; tensor key_cache_internal_tensor_assign_22_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_22_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor key_cache_internal_tensor_assign_22_cast_fp16 = slice_update(begin = concat_171, begin_mask = key_cache_internal_tensor_assign_22_begin_mask_0, end = concat_172, end_mask = key_cache_internal_tensor_assign_22_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_22_squeeze_mask_0, stride = key_cache_internal_tensor_assign_22_stride_0, update = key_states_87_cast_fp16, x = coreml_update_state_88)[name = string("key_cache_internal_tensor_assign_22_cast_fp16")]; write_state(data = key_cache_internal_tensor_assign_22_cast_fp16, input = key_cache)[name = string("coreml_update_state_208_write_state")]; tensor coreml_update_state_90 = read_state(input = key_cache)[name = string("coreml_update_state_208")]; tensor value_cache_internal_tensor_assign_22_stride_0 = const()[name = string("value_cache_internal_tensor_assign_22_stride_0"), val = tensor([1, 1, 1, 1])]; tensor value_cache_internal_tensor_assign_22_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_22_begin_mask_0"), val = tensor([false, false, false, true])]; tensor value_cache_internal_tensor_assign_22_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_22_end_mask_0"), val = tensor([false, true, false, true])]; tensor value_cache_internal_tensor_assign_22_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_22_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor value_states_87_cast_fp16 = transpose(perm = value_states_87_perm_0, x = var_6092_cast_fp16)[name = string("transpose_7")]; tensor value_cache_internal_tensor_assign_22_cast_fp16 = slice_update(begin = concat_171, begin_mask = value_cache_internal_tensor_assign_22_begin_mask_0, end = concat_172, end_mask = value_cache_internal_tensor_assign_22_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_22_squeeze_mask_0, stride = value_cache_internal_tensor_assign_22_stride_0, update = value_states_87_cast_fp16, x = coreml_update_state_89)[name = string("value_cache_internal_tensor_assign_22_cast_fp16")]; write_state(data = value_cache_internal_tensor_assign_22_cast_fp16, input = value_cache)[name = string("coreml_update_state_209_write_state")]; tensor coreml_update_state_91 = read_state(input = value_cache)[name = string("coreml_update_state_209")]; tensor var_6156_begin_0 = const()[name = string("op_6156_begin_0"), val = tensor([21, 0, 0, 0])]; tensor var_6156_end_0 = const()[name = string("op_6156_end_0"), val = tensor([22, 2, 2048, 64])]; tensor var_6156_end_mask_0 = const()[name = string("op_6156_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_6156_cast_fp16 = slice_by_index(begin = var_6156_begin_0, end = var_6156_end_0, end_mask = var_6156_end_mask_0, x = coreml_update_state_90)[name = string("op_6156_cast_fp16")]; tensor tile_42 = const()[name = string("tile_42"), val = tensor([1, 1])]; int32 var_6159_axis_0 = const()[name = string("op_6159_axis_0"), val = int32(1)]; tensor var_6159_cast_fp16_0, tensor var_6159_cast_fp16_1 = split(axis = var_6159_axis_0, split_sizes = tile_42, x = var_6156_cast_fp16)[name = string("op_6159_cast_fp16")]; tensor var_6166_begin_0 = const()[name = string("op_6166_begin_0"), val = tensor([21, 0, 0, 0])]; tensor var_6166_end_0 = const()[name = string("op_6166_end_0"), val = tensor([22, 2, 2048, 64])]; tensor var_6166_end_mask_0 = const()[name = string("op_6166_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_6166_cast_fp16 = slice_by_index(begin = var_6166_begin_0, end = var_6166_end_0, end_mask = var_6166_end_mask_0, x = coreml_update_state_91)[name = string("op_6166_cast_fp16")]; tensor tile_43 = const()[name = string("tile_43"), val = tensor([1, 1])]; int32 var_6169_axis_0 = const()[name = string("op_6169_axis_0"), val = int32(1)]; tensor var_6169_cast_fp16_0, tensor var_6169_cast_fp16_1 = split(axis = var_6169_axis_0, split_sizes = tile_43, x = var_6166_cast_fp16)[name = string("op_6169_cast_fp16")]; tensor var_6172_split_sizes_0 = const()[name = string("op_6172_split_sizes_0"), val = tensor([8, 8])]; int32 var_6172_axis_0 = const()[name = string("op_6172_axis_0"), val = int32(1)]; tensor var_6172_cast_fp16_0, tensor var_6172_cast_fp16_1 = split(axis = var_6172_axis_0, split_sizes = var_6172_split_sizes_0, x = query_states_87_cast_fp16)[name = string("op_6172_cast_fp16")]; bool attn_weights_337_transpose_x_0 = const()[name = string("attn_weights_337_transpose_x_0"), val = bool(false)]; bool attn_weights_337_transpose_y_0 = const()[name = string("attn_weights_337_transpose_y_0"), val = bool(false)]; tensor attn_weights_337_cast_fp16 = matmul(transpose_x = attn_weights_337_transpose_x_0, transpose_y = attn_weights_337_transpose_y_0, x = var_6159_cast_fp16_0, y = var_6172_cast_fp16_0)[name = string("attn_weights_337_cast_fp16")]; fp16 _inversed_attn_weights_339_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_339_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_339_cast_fp16 = mul(x = attn_weights_337_cast_fp16, y = _inversed_attn_weights_339_y_0_to_fp16)[name = string("_inversed_attn_weights_339_cast_fp16")]; tensor attn_weights_341_cast_fp16 = add(x = _inversed_attn_weights_339_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_341_cast_fp16")]; int32 var_6179 = const()[name = string("op_6179"), val = int32(2)]; tensor attn_weights_343_cast_fp16 = softmax(axis = var_6179, x = attn_weights_341_cast_fp16)[name = string("attn_weights_343_cast_fp16")]; bool var_6185_transpose_x_1 = const()[name = string("op_6185_transpose_x_1"), val = bool(true)]; bool var_6185_transpose_y_1 = const()[name = string("op_6185_transpose_y_1"), val = bool(false)]; tensor var_6185_cast_fp16 = matmul(transpose_x = var_6185_transpose_x_1, transpose_y = var_6185_transpose_y_1, x = attn_weights_343_cast_fp16, y = var_6169_cast_fp16_0)[name = string("op_6185_cast_fp16")]; bool attn_weights_345_transpose_x_0 = const()[name = string("attn_weights_345_transpose_x_0"), val = bool(false)]; bool attn_weights_345_transpose_y_0 = const()[name = string("attn_weights_345_transpose_y_0"), val = bool(false)]; tensor attn_weights_345_cast_fp16 = matmul(transpose_x = attn_weights_345_transpose_x_0, transpose_y = attn_weights_345_transpose_y_0, x = var_6159_cast_fp16_1, y = var_6172_cast_fp16_1)[name = string("attn_weights_345_cast_fp16")]; fp16 _inversed_attn_weights_347_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_347_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_347_cast_fp16 = mul(x = attn_weights_345_cast_fp16, y = _inversed_attn_weights_347_y_0_to_fp16)[name = string("_inversed_attn_weights_347_cast_fp16")]; tensor attn_weights_349_cast_fp16 = add(x = _inversed_attn_weights_347_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_349_cast_fp16")]; int32 var_6191 = const()[name = string("op_6191"), val = int32(2)]; tensor attn_weights_351_cast_fp16 = softmax(axis = var_6191, x = attn_weights_349_cast_fp16)[name = string("attn_weights_351_cast_fp16")]; bool attn_output_127_transpose_x_1 = const()[name = string("attn_output_127_transpose_x_1"), val = bool(true)]; bool attn_output_127_transpose_y_1 = const()[name = string("attn_output_127_transpose_y_1"), val = bool(false)]; tensor attn_output_127_cast_fp16 = matmul(transpose_x = attn_output_127_transpose_x_1, transpose_y = attn_output_127_transpose_y_1, x = attn_weights_351_cast_fp16, y = var_6169_cast_fp16_1)[name = string("attn_output_127_cast_fp16")]; int32 var_6199 = const()[name = string("op_6199"), val = int32(1)]; bool attn_output_129_interleave_0 = const()[name = string("attn_output_129_interleave_0"), val = bool(false)]; tensor attn_output_129_cast_fp16 = concat(axis = var_6199, interleave = attn_output_129_interleave_0, values = (var_6185_cast_fp16, attn_output_127_cast_fp16))[name = string("attn_output_129_cast_fp16")]; tensor var_6203_perm_0 = const()[name = string("op_6203_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_6208 = const()[name = string("op_6208"), val = tensor([1, 1024, 1, 1])]; tensor var_6203_cast_fp16 = transpose(perm = var_6203_perm_0, x = attn_output_129_cast_fp16)[name = string("transpose_6")]; tensor x_383_cast_fp16 = reshape(shape = var_6208, x = var_6203_cast_fp16)[name = string("x_383_cast_fp16")]; string hidden_states_129_pad_type_0 = const()[name = string("hidden_states_129_pad_type_0"), val = string("valid")]; tensor hidden_states_129_strides_0 = const()[name = string("hidden_states_129_strides_0"), val = tensor([1, 1])]; tensor hidden_states_129_pad_0 = const()[name = string("hidden_states_129_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_129_dilations_0 = const()[name = string("hidden_states_129_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_129_groups_0 = const()[name = string("hidden_states_129_groups_0"), val = int32(1)]; tensor var_6215_to_fp16 = const()[name = string("op_6215_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(638783872)))]; tensor hidden_states_129_cast_fp16 = conv(dilations = hidden_states_129_dilations_0, groups = hidden_states_129_groups_0, pad = hidden_states_129_pad_0, pad_type = hidden_states_129_pad_type_0, strides = hidden_states_129_strides_0, weight = var_6215_to_fp16, x = x_383_cast_fp16)[name = string("hidden_states_129_cast_fp16")]; tensor x_385_cast_fp16 = add(x = x_377_cast_fp16, y = hidden_states_129_cast_fp16)[name = string("x_385_cast_fp16")]; int32 var_6227 = const()[name = string("op_6227"), val = int32(1)]; fp16 const_223_promoted_to_fp16 = const()[name = string("const_223_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_6230_cast_fp16 = mul(x = x_385_cast_fp16, y = const_223_promoted_to_fp16)[name = string("op_6230_cast_fp16")]; bool x_387_interleave_0 = const()[name = string("x_387_interleave_0"), val = bool(false)]; tensor x_387_cast_fp16 = concat(axis = var_6227, interleave = x_387_interleave_0, values = (x_385_cast_fp16, var_6230_cast_fp16))[name = string("x_387_cast_fp16")]; tensor out_259_axes_0 = const()[name = string("out_259_axes_0"), val = tensor([1])]; fp16 var_6240_to_fp16 = const()[name = string("op_6240_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_259_cast_fp16 = layer_norm(axes = out_259_axes_0, epsilon = var_6240_to_fp16, x = x_387_cast_fp16)[name = string("out_259_cast_fp16")]; tensor layer_layers_21_post_attention_layernorm_weight_to_fp16 = const()[name = string("layer_layers_21_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(640881088)))]; tensor out_261_cast_fp16 = mul(x = out_259_cast_fp16, y = layer_layers_21_post_attention_layernorm_weight_to_fp16)[name = string("out_261_cast_fp16")]; tensor var_6246_split_sizes_0 = const()[name = string("op_6246_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_6246_axis_0 = const()[name = string("op_6246_axis_0"), val = int32(1)]; tensor var_6246_cast_fp16_0, tensor var_6246_cast_fp16_1 = split(axis = var_6246_axis_0, split_sizes = var_6246_split_sizes_0, x = out_261_cast_fp16)[name = string("op_6246_cast_fp16")]; string input_43_pad_type_0 = const()[name = string("input_43_pad_type_0"), val = string("valid")]; tensor input_43_strides_0 = const()[name = string("input_43_strides_0"), val = tensor([1, 1])]; tensor input_43_pad_0 = const()[name = string("input_43_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_43_dilations_0 = const()[name = string("input_43_dilations_0"), val = tensor([1, 1])]; int32 input_43_groups_0 = const()[name = string("input_43_groups_0"), val = int32(1)]; tensor var_6251_to_fp16 = const()[name = string("op_6251_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(640885248)))]; tensor input_43_cast_fp16 = conv(dilations = input_43_dilations_0, groups = input_43_groups_0, pad = input_43_pad_0, pad_type = input_43_pad_type_0, strides = input_43_strides_0, weight = var_6251_to_fp16, x = var_6246_cast_fp16_0)[name = string("input_43_cast_fp16")]; tensor var_6262_cast_fp16 = silu(x = input_43_cast_fp16)[name = string("op_6262_cast_fp16")]; string var_6267_pad_type_0 = const()[name = string("op_6267_pad_type_0"), val = string("valid")]; tensor var_6267_strides_0 = const()[name = string("op_6267_strides_0"), val = tensor([1, 1])]; tensor var_6267_pad_0 = const()[name = string("op_6267_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_6267_dilations_0 = const()[name = string("op_6267_dilations_0"), val = tensor([1, 1])]; int32 var_6267_groups_0 = const()[name = string("op_6267_groups_0"), val = int32(1)]; tensor var_6250_to_fp16 = const()[name = string("op_6250_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(649273920)))]; tensor var_6267_cast_fp16 = conv(dilations = var_6267_dilations_0, groups = var_6267_groups_0, pad = var_6267_pad_0, pad_type = var_6267_pad_type_0, strides = var_6267_strides_0, weight = var_6250_to_fp16, x = var_6246_cast_fp16_0)[name = string("op_6267_cast_fp16")]; tensor x_393_cast_fp16 = mul(x = var_6262_cast_fp16, y = var_6267_cast_fp16)[name = string("x_393_cast_fp16")]; string hidden_states_131_pad_type_0 = const()[name = string("hidden_states_131_pad_type_0"), val = string("valid")]; tensor hidden_states_131_strides_0 = const()[name = string("hidden_states_131_strides_0"), val = tensor([1, 1])]; tensor hidden_states_131_pad_0 = const()[name = string("hidden_states_131_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_131_dilations_0 = const()[name = string("hidden_states_131_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_131_groups_0 = const()[name = string("hidden_states_131_groups_0"), val = int32(1)]; tensor var_6249_to_fp16 = const()[name = string("op_6249_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(657662592)))]; tensor hidden_states_131_cast_fp16 = conv(dilations = hidden_states_131_dilations_0, groups = hidden_states_131_groups_0, pad = hidden_states_131_pad_0, pad_type = hidden_states_131_pad_type_0, strides = hidden_states_131_strides_0, weight = var_6249_to_fp16, x = x_393_cast_fp16)[name = string("hidden_states_131_cast_fp16")]; tensor x_395_cast_fp16 = add(x = x_385_cast_fp16, y = hidden_states_131_cast_fp16)[name = string("x_395_cast_fp16")]; int32 var_6280 = const()[name = string("op_6280"), val = int32(1)]; fp16 const_224_promoted_to_fp16 = const()[name = string("const_224_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_6283_cast_fp16 = mul(x = x_395_cast_fp16, y = const_224_promoted_to_fp16)[name = string("op_6283_cast_fp16")]; bool x_397_interleave_0 = const()[name = string("x_397_interleave_0"), val = bool(false)]; tensor x_397_cast_fp16 = concat(axis = var_6280, interleave = x_397_interleave_0, values = (x_395_cast_fp16, var_6283_cast_fp16))[name = string("x_397_cast_fp16")]; tensor out_265_axes_0 = const()[name = string("out_265_axes_0"), val = tensor([1])]; fp16 var_6293_to_fp16 = const()[name = string("op_6293_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_265_cast_fp16 = layer_norm(axes = out_265_axes_0, epsilon = var_6293_to_fp16, x = x_397_cast_fp16)[name = string("out_265_cast_fp16")]; tensor layer_layers_22_input_layernorm_weight_to_fp16 = const()[name = string("layer_layers_22_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(666051264)))]; tensor out_267_cast_fp16 = mul(x = out_265_cast_fp16, y = layer_layers_22_input_layernorm_weight_to_fp16)[name = string("out_267_cast_fp16")]; tensor var_6299_split_sizes_0 = const()[name = string("op_6299_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_6299_axis_0 = const()[name = string("op_6299_axis_0"), val = int32(1)]; tensor var_6299_cast_fp16_0, tensor var_6299_cast_fp16_1 = split(axis = var_6299_axis_0, split_sizes = var_6299_split_sizes_0, x = out_267_cast_fp16)[name = string("op_6299_cast_fp16")]; string query_states_89_pad_type_0 = const()[name = string("query_states_89_pad_type_0"), val = string("valid")]; tensor query_states_89_strides_0 = const()[name = string("query_states_89_strides_0"), val = tensor([1, 1])]; tensor query_states_89_pad_0 = const()[name = string("query_states_89_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_states_89_dilations_0 = const()[name = string("query_states_89_dilations_0"), val = tensor([1, 1])]; int32 query_states_89_groups_0 = const()[name = string("query_states_89_groups_0"), val = int32(1)]; tensor var_6321_to_fp16 = const()[name = string("op_6321_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(666055424)))]; tensor query_states_89_cast_fp16 = conv(dilations = query_states_89_dilations_0, groups = query_states_89_groups_0, pad = query_states_89_pad_0, pad_type = query_states_89_pad_type_0, strides = query_states_89_strides_0, weight = var_6321_to_fp16, x = var_6299_cast_fp16_0)[name = string("query_states_89_cast_fp16")]; string key_states_89_pad_type_0 = const()[name = string("key_states_89_pad_type_0"), val = string("valid")]; tensor key_states_89_strides_0 = const()[name = string("key_states_89_strides_0"), val = tensor([1, 1])]; tensor key_states_89_pad_0 = const()[name = string("key_states_89_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_states_89_dilations_0 = const()[name = string("key_states_89_dilations_0"), val = tensor([1, 1])]; int32 key_states_89_groups_0 = const()[name = string("key_states_89_groups_0"), val = int32(1)]; tensor var_6332_to_fp16 = const()[name = string("op_6332_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(668152640)))]; tensor key_states_89_cast_fp16 = conv(dilations = key_states_89_dilations_0, groups = key_states_89_groups_0, pad = key_states_89_pad_0, pad_type = key_states_89_pad_type_0, strides = key_states_89_strides_0, weight = var_6332_to_fp16, x = var_6299_cast_fp16_0)[name = string("key_states_89_cast_fp16")]; string value_states_89_pad_type_0 = const()[name = string("value_states_89_pad_type_0"), val = string("valid")]; tensor value_states_89_strides_0 = const()[name = string("value_states_89_strides_0"), val = tensor([1, 1])]; tensor value_states_89_pad_0 = const()[name = string("value_states_89_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_states_89_dilations_0 = const()[name = string("value_states_89_dilations_0"), val = tensor([1, 1])]; int32 value_states_89_groups_0 = const()[name = string("value_states_89_groups_0"), val = int32(1)]; tensor var_6343_to_fp16 = const()[name = string("op_6343_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(668414848)))]; tensor value_states_89_cast_fp16 = conv(dilations = value_states_89_dilations_0, groups = value_states_89_groups_0, pad = value_states_89_pad_0, pad_type = value_states_89_pad_type_0, strides = value_states_89_strides_0, weight = var_6343_to_fp16, x = var_6299_cast_fp16_0)[name = string("value_states_89_cast_fp16")]; tensor var_6351 = const()[name = string("op_6351"), val = tensor([1, 16, 64, 1])]; tensor embed_89_cast_fp16 = reshape(shape = var_6351, x = query_states_89_cast_fp16)[name = string("embed_89_cast_fp16")]; tensor var_6355 = const()[name = string("op_6355"), val = tensor([1, 2, 64, 1])]; tensor var_6356_cast_fp16 = reshape(shape = var_6355, x = key_states_89_cast_fp16)[name = string("op_6356_cast_fp16")]; tensor embed_91_perm_0 = const()[name = string("embed_91_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_6362 = const()[name = string("op_6362"), val = tensor([1, 2, 64, 1])]; tensor var_6363_cast_fp16 = reshape(shape = var_6362, x = value_states_89_cast_fp16)[name = string("op_6363_cast_fp16")]; tensor value_states_91_perm_0 = const()[name = string("value_states_91_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_6367_cast_fp16 = mul(x = embed_89_cast_fp16, y = cos_1_cast_fp16)[name = string("op_6367_cast_fp16")]; tensor var_6368_split_sizes_0 = const()[name = string("op_6368_split_sizes_0"), val = tensor([32, 32])]; int32 var_6368_axis_0 = const()[name = string("op_6368_axis_0"), val = int32(-2)]; tensor var_6368_cast_fp16_0, tensor var_6368_cast_fp16_1 = split(axis = var_6368_axis_0, split_sizes = var_6368_split_sizes_0, x = embed_89_cast_fp16)[name = string("op_6368_cast_fp16")]; fp16 const_227_promoted_to_fp16 = const()[name = string("const_227_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_6370_cast_fp16 = mul(x = var_6368_cast_fp16_1, y = const_227_promoted_to_fp16)[name = string("op_6370_cast_fp16")]; int32 var_6372 = const()[name = string("op_6372"), val = int32(-2)]; bool var_6373_interleave_0 = const()[name = string("op_6373_interleave_0"), val = bool(false)]; tensor var_6373_cast_fp16 = concat(axis = var_6372, interleave = var_6373_interleave_0, values = (var_6370_cast_fp16, var_6368_cast_fp16_0))[name = string("op_6373_cast_fp16")]; tensor var_6374_cast_fp16 = mul(x = var_6373_cast_fp16, y = sin_1_cast_fp16)[name = string("op_6374_cast_fp16")]; tensor query_states_91_cast_fp16 = add(x = var_6367_cast_fp16, y = var_6374_cast_fp16)[name = string("query_states_91_cast_fp16")]; tensor embed_91_cast_fp16 = transpose(perm = embed_91_perm_0, x = var_6356_cast_fp16)[name = string("transpose_5")]; tensor var_6377_cast_fp16 = mul(x = embed_91_cast_fp16, y = cos_cast_fp16)[name = string("op_6377_cast_fp16")]; tensor var_6378_split_sizes_0 = const()[name = string("op_6378_split_sizes_0"), val = tensor([32, 32])]; int32 var_6378_axis_0 = const()[name = string("op_6378_axis_0"), val = int32(-1)]; tensor var_6378_cast_fp16_0, tensor var_6378_cast_fp16_1 = split(axis = var_6378_axis_0, split_sizes = var_6378_split_sizes_0, x = embed_91_cast_fp16)[name = string("op_6378_cast_fp16")]; fp16 const_228_promoted_to_fp16 = const()[name = string("const_228_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_6380_cast_fp16 = mul(x = var_6378_cast_fp16_1, y = const_228_promoted_to_fp16)[name = string("op_6380_cast_fp16")]; int32 var_6382 = const()[name = string("op_6382"), val = int32(-1)]; bool var_6383_interleave_0 = const()[name = string("op_6383_interleave_0"), val = bool(false)]; tensor var_6383_cast_fp16 = concat(axis = var_6382, interleave = var_6383_interleave_0, values = (var_6380_cast_fp16, var_6378_cast_fp16_0))[name = string("op_6383_cast_fp16")]; tensor var_6384_cast_fp16 = mul(x = var_6383_cast_fp16, y = sin_cast_fp16)[name = string("op_6384_cast_fp16")]; tensor key_states_91_cast_fp16 = add(x = var_6377_cast_fp16, y = var_6384_cast_fp16)[name = string("key_states_91_cast_fp16")]; tensor expand_dims_221 = const()[name = string("expand_dims_221"), val = tensor([22])]; tensor expand_dims_222 = const()[name = string("expand_dims_222"), val = tensor([0])]; tensor expand_dims_224 = const()[name = string("expand_dims_224"), val = tensor([23])]; tensor concat_179_values3_0 = const()[name = string("concat_179_values3_0"), val = tensor([0])]; int32 concat_179_axis_0 = const()[name = string("concat_179_axis_0"), val = int32(0)]; bool concat_179_interleave_0 = const()[name = string("concat_179_interleave_0"), val = bool(false)]; tensor concat_179 = concat(axis = concat_179_axis_0, interleave = concat_179_interleave_0, values = (expand_dims_221, expand_dims_222, position_id, concat_179_values3_0))[name = string("concat_179")]; tensor concat_180_values1_0 = const()[name = string("concat_180_values1_0"), val = tensor([0])]; tensor concat_180_values3_0 = const()[name = string("concat_180_values3_0"), val = tensor([0])]; int32 concat_180_axis_0 = const()[name = string("concat_180_axis_0"), val = int32(0)]; bool concat_180_interleave_0 = const()[name = string("concat_180_interleave_0"), val = bool(false)]; tensor concat_180 = concat(axis = concat_180_axis_0, interleave = concat_180_interleave_0, values = (expand_dims_224, concat_180_values1_0, var_426, concat_180_values3_0))[name = string("concat_180")]; tensor key_cache_internal_tensor_assign_23_stride_0 = const()[name = string("key_cache_internal_tensor_assign_23_stride_0"), val = tensor([1, 1, 1, 1])]; tensor key_cache_internal_tensor_assign_23_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_23_begin_mask_0"), val = tensor([false, false, false, true])]; tensor key_cache_internal_tensor_assign_23_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_23_end_mask_0"), val = tensor([false, true, false, true])]; tensor key_cache_internal_tensor_assign_23_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_23_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor key_cache_internal_tensor_assign_23_cast_fp16 = slice_update(begin = concat_179, begin_mask = key_cache_internal_tensor_assign_23_begin_mask_0, end = concat_180, end_mask = key_cache_internal_tensor_assign_23_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_23_squeeze_mask_0, stride = key_cache_internal_tensor_assign_23_stride_0, update = key_states_91_cast_fp16, x = coreml_update_state_90)[name = string("key_cache_internal_tensor_assign_23_cast_fp16")]; write_state(data = key_cache_internal_tensor_assign_23_cast_fp16, input = key_cache)[name = string("coreml_update_state_210_write_state")]; tensor coreml_update_state_92 = read_state(input = key_cache)[name = string("coreml_update_state_210")]; tensor value_cache_internal_tensor_assign_23_stride_0 = const()[name = string("value_cache_internal_tensor_assign_23_stride_0"), val = tensor([1, 1, 1, 1])]; tensor value_cache_internal_tensor_assign_23_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_23_begin_mask_0"), val = tensor([false, false, false, true])]; tensor value_cache_internal_tensor_assign_23_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_23_end_mask_0"), val = tensor([false, true, false, true])]; tensor value_cache_internal_tensor_assign_23_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_23_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor value_states_91_cast_fp16 = transpose(perm = value_states_91_perm_0, x = var_6363_cast_fp16)[name = string("transpose_4")]; tensor value_cache_internal_tensor_assign_23_cast_fp16 = slice_update(begin = concat_179, begin_mask = value_cache_internal_tensor_assign_23_begin_mask_0, end = concat_180, end_mask = value_cache_internal_tensor_assign_23_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_23_squeeze_mask_0, stride = value_cache_internal_tensor_assign_23_stride_0, update = value_states_91_cast_fp16, x = coreml_update_state_91)[name = string("value_cache_internal_tensor_assign_23_cast_fp16")]; write_state(data = value_cache_internal_tensor_assign_23_cast_fp16, input = value_cache)[name = string("coreml_update_state_211_write_state")]; tensor coreml_update_state_93 = read_state(input = value_cache)[name = string("coreml_update_state_211")]; tensor var_6427_begin_0 = const()[name = string("op_6427_begin_0"), val = tensor([22, 0, 0, 0])]; tensor var_6427_end_0 = const()[name = string("op_6427_end_0"), val = tensor([23, 2, 2048, 64])]; tensor var_6427_end_mask_0 = const()[name = string("op_6427_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_6427_cast_fp16 = slice_by_index(begin = var_6427_begin_0, end = var_6427_end_0, end_mask = var_6427_end_mask_0, x = coreml_update_state_92)[name = string("op_6427_cast_fp16")]; tensor tile_44 = const()[name = string("tile_44"), val = tensor([1, 1])]; int32 var_6430_axis_0 = const()[name = string("op_6430_axis_0"), val = int32(1)]; tensor var_6430_cast_fp16_0, tensor var_6430_cast_fp16_1 = split(axis = var_6430_axis_0, split_sizes = tile_44, x = var_6427_cast_fp16)[name = string("op_6430_cast_fp16")]; tensor var_6437_begin_0 = const()[name = string("op_6437_begin_0"), val = tensor([22, 0, 0, 0])]; tensor var_6437_end_0 = const()[name = string("op_6437_end_0"), val = tensor([23, 2, 2048, 64])]; tensor var_6437_end_mask_0 = const()[name = string("op_6437_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_6437_cast_fp16 = slice_by_index(begin = var_6437_begin_0, end = var_6437_end_0, end_mask = var_6437_end_mask_0, x = coreml_update_state_93)[name = string("op_6437_cast_fp16")]; tensor tile_45 = const()[name = string("tile_45"), val = tensor([1, 1])]; int32 var_6440_axis_0 = const()[name = string("op_6440_axis_0"), val = int32(1)]; tensor var_6440_cast_fp16_0, tensor var_6440_cast_fp16_1 = split(axis = var_6440_axis_0, split_sizes = tile_45, x = var_6437_cast_fp16)[name = string("op_6440_cast_fp16")]; tensor var_6443_split_sizes_0 = const()[name = string("op_6443_split_sizes_0"), val = tensor([8, 8])]; int32 var_6443_axis_0 = const()[name = string("op_6443_axis_0"), val = int32(1)]; tensor var_6443_cast_fp16_0, tensor var_6443_cast_fp16_1 = split(axis = var_6443_axis_0, split_sizes = var_6443_split_sizes_0, x = query_states_91_cast_fp16)[name = string("op_6443_cast_fp16")]; bool attn_weights_353_transpose_x_0 = const()[name = string("attn_weights_353_transpose_x_0"), val = bool(false)]; bool attn_weights_353_transpose_y_0 = const()[name = string("attn_weights_353_transpose_y_0"), val = bool(false)]; tensor attn_weights_353_cast_fp16 = matmul(transpose_x = attn_weights_353_transpose_x_0, transpose_y = attn_weights_353_transpose_y_0, x = var_6430_cast_fp16_0, y = var_6443_cast_fp16_0)[name = string("attn_weights_353_cast_fp16")]; fp16 _inversed_attn_weights_355_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_355_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_355_cast_fp16 = mul(x = attn_weights_353_cast_fp16, y = _inversed_attn_weights_355_y_0_to_fp16)[name = string("_inversed_attn_weights_355_cast_fp16")]; tensor attn_weights_357_cast_fp16 = add(x = _inversed_attn_weights_355_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_357_cast_fp16")]; int32 var_6450 = const()[name = string("op_6450"), val = int32(2)]; tensor attn_weights_359_cast_fp16 = softmax(axis = var_6450, x = attn_weights_357_cast_fp16)[name = string("attn_weights_359_cast_fp16")]; bool var_6456_transpose_x_1 = const()[name = string("op_6456_transpose_x_1"), val = bool(true)]; bool var_6456_transpose_y_1 = const()[name = string("op_6456_transpose_y_1"), val = bool(false)]; tensor var_6456_cast_fp16 = matmul(transpose_x = var_6456_transpose_x_1, transpose_y = var_6456_transpose_y_1, x = attn_weights_359_cast_fp16, y = var_6440_cast_fp16_0)[name = string("op_6456_cast_fp16")]; bool attn_weights_361_transpose_x_0 = const()[name = string("attn_weights_361_transpose_x_0"), val = bool(false)]; bool attn_weights_361_transpose_y_0 = const()[name = string("attn_weights_361_transpose_y_0"), val = bool(false)]; tensor attn_weights_361_cast_fp16 = matmul(transpose_x = attn_weights_361_transpose_x_0, transpose_y = attn_weights_361_transpose_y_0, x = var_6430_cast_fp16_1, y = var_6443_cast_fp16_1)[name = string("attn_weights_361_cast_fp16")]; fp16 _inversed_attn_weights_363_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_363_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_363_cast_fp16 = mul(x = attn_weights_361_cast_fp16, y = _inversed_attn_weights_363_y_0_to_fp16)[name = string("_inversed_attn_weights_363_cast_fp16")]; tensor attn_weights_365_cast_fp16 = add(x = _inversed_attn_weights_363_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_365_cast_fp16")]; int32 var_6462 = const()[name = string("op_6462"), val = int32(2)]; tensor attn_weights_367_cast_fp16 = softmax(axis = var_6462, x = attn_weights_365_cast_fp16)[name = string("attn_weights_367_cast_fp16")]; bool attn_output_133_transpose_x_1 = const()[name = string("attn_output_133_transpose_x_1"), val = bool(true)]; bool attn_output_133_transpose_y_1 = const()[name = string("attn_output_133_transpose_y_1"), val = bool(false)]; tensor attn_output_133_cast_fp16 = matmul(transpose_x = attn_output_133_transpose_x_1, transpose_y = attn_output_133_transpose_y_1, x = attn_weights_367_cast_fp16, y = var_6440_cast_fp16_1)[name = string("attn_output_133_cast_fp16")]; int32 var_6470 = const()[name = string("op_6470"), val = int32(1)]; bool attn_output_135_interleave_0 = const()[name = string("attn_output_135_interleave_0"), val = bool(false)]; tensor attn_output_135_cast_fp16 = concat(axis = var_6470, interleave = attn_output_135_interleave_0, values = (var_6456_cast_fp16, attn_output_133_cast_fp16))[name = string("attn_output_135_cast_fp16")]; tensor var_6474_perm_0 = const()[name = string("op_6474_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_6479 = const()[name = string("op_6479"), val = tensor([1, 1024, 1, 1])]; tensor var_6474_cast_fp16 = transpose(perm = var_6474_perm_0, x = attn_output_135_cast_fp16)[name = string("transpose_3")]; tensor x_401_cast_fp16 = reshape(shape = var_6479, x = var_6474_cast_fp16)[name = string("x_401_cast_fp16")]; string hidden_states_135_pad_type_0 = const()[name = string("hidden_states_135_pad_type_0"), val = string("valid")]; tensor hidden_states_135_strides_0 = const()[name = string("hidden_states_135_strides_0"), val = tensor([1, 1])]; tensor hidden_states_135_pad_0 = const()[name = string("hidden_states_135_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_135_dilations_0 = const()[name = string("hidden_states_135_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_135_groups_0 = const()[name = string("hidden_states_135_groups_0"), val = int32(1)]; tensor var_6486_to_fp16 = const()[name = string("op_6486_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(668677056)))]; tensor hidden_states_135_cast_fp16 = conv(dilations = hidden_states_135_dilations_0, groups = hidden_states_135_groups_0, pad = hidden_states_135_pad_0, pad_type = hidden_states_135_pad_type_0, strides = hidden_states_135_strides_0, weight = var_6486_to_fp16, x = x_401_cast_fp16)[name = string("hidden_states_135_cast_fp16")]; tensor x_403_cast_fp16 = add(x = x_395_cast_fp16, y = hidden_states_135_cast_fp16)[name = string("x_403_cast_fp16")]; int32 var_6498 = const()[name = string("op_6498"), val = int32(1)]; fp16 const_233_promoted_to_fp16 = const()[name = string("const_233_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_6501_cast_fp16 = mul(x = x_403_cast_fp16, y = const_233_promoted_to_fp16)[name = string("op_6501_cast_fp16")]; bool x_405_interleave_0 = const()[name = string("x_405_interleave_0"), val = bool(false)]; tensor x_405_cast_fp16 = concat(axis = var_6498, interleave = x_405_interleave_0, values = (x_403_cast_fp16, var_6501_cast_fp16))[name = string("x_405_cast_fp16")]; tensor out_271_axes_0 = const()[name = string("out_271_axes_0"), val = tensor([1])]; fp16 var_6511_to_fp16 = const()[name = string("op_6511_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_271_cast_fp16 = layer_norm(axes = out_271_axes_0, epsilon = var_6511_to_fp16, x = x_405_cast_fp16)[name = string("out_271_cast_fp16")]; tensor layer_layers_22_post_attention_layernorm_weight_to_fp16 = const()[name = string("layer_layers_22_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(670774272)))]; tensor out_273_cast_fp16 = mul(x = out_271_cast_fp16, y = layer_layers_22_post_attention_layernorm_weight_to_fp16)[name = string("out_273_cast_fp16")]; tensor var_6517_split_sizes_0 = const()[name = string("op_6517_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_6517_axis_0 = const()[name = string("op_6517_axis_0"), val = int32(1)]; tensor var_6517_cast_fp16_0, tensor var_6517_cast_fp16_1 = split(axis = var_6517_axis_0, split_sizes = var_6517_split_sizes_0, x = out_273_cast_fp16)[name = string("op_6517_cast_fp16")]; string input_45_pad_type_0 = const()[name = string("input_45_pad_type_0"), val = string("valid")]; tensor input_45_strides_0 = const()[name = string("input_45_strides_0"), val = tensor([1, 1])]; tensor input_45_pad_0 = const()[name = string("input_45_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_45_dilations_0 = const()[name = string("input_45_dilations_0"), val = tensor([1, 1])]; int32 input_45_groups_0 = const()[name = string("input_45_groups_0"), val = int32(1)]; tensor var_6522_to_fp16 = const()[name = string("op_6522_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(670778432)))]; tensor input_45_cast_fp16 = conv(dilations = input_45_dilations_0, groups = input_45_groups_0, pad = input_45_pad_0, pad_type = input_45_pad_type_0, strides = input_45_strides_0, weight = var_6522_to_fp16, x = var_6517_cast_fp16_0)[name = string("input_45_cast_fp16")]; tensor var_6533_cast_fp16 = silu(x = input_45_cast_fp16)[name = string("op_6533_cast_fp16")]; string var_6538_pad_type_0 = const()[name = string("op_6538_pad_type_0"), val = string("valid")]; tensor var_6538_strides_0 = const()[name = string("op_6538_strides_0"), val = tensor([1, 1])]; tensor var_6538_pad_0 = const()[name = string("op_6538_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_6538_dilations_0 = const()[name = string("op_6538_dilations_0"), val = tensor([1, 1])]; int32 var_6538_groups_0 = const()[name = string("op_6538_groups_0"), val = int32(1)]; tensor var_6521_to_fp16 = const()[name = string("op_6521_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(679167104)))]; tensor var_6538_cast_fp16 = conv(dilations = var_6538_dilations_0, groups = var_6538_groups_0, pad = var_6538_pad_0, pad_type = var_6538_pad_type_0, strides = var_6538_strides_0, weight = var_6521_to_fp16, x = var_6517_cast_fp16_0)[name = string("op_6538_cast_fp16")]; tensor x_411_cast_fp16 = mul(x = var_6533_cast_fp16, y = var_6538_cast_fp16)[name = string("x_411_cast_fp16")]; string hidden_states_137_pad_type_0 = const()[name = string("hidden_states_137_pad_type_0"), val = string("valid")]; tensor hidden_states_137_strides_0 = const()[name = string("hidden_states_137_strides_0"), val = tensor([1, 1])]; tensor hidden_states_137_pad_0 = const()[name = string("hidden_states_137_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_137_dilations_0 = const()[name = string("hidden_states_137_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_137_groups_0 = const()[name = string("hidden_states_137_groups_0"), val = int32(1)]; tensor var_6520_to_fp16 = const()[name = string("op_6520_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(687555776)))]; tensor hidden_states_137_cast_fp16 = conv(dilations = hidden_states_137_dilations_0, groups = hidden_states_137_groups_0, pad = hidden_states_137_pad_0, pad_type = hidden_states_137_pad_type_0, strides = hidden_states_137_strides_0, weight = var_6520_to_fp16, x = x_411_cast_fp16)[name = string("hidden_states_137_cast_fp16")]; tensor x_413_cast_fp16 = add(x = x_403_cast_fp16, y = hidden_states_137_cast_fp16)[name = string("x_413_cast_fp16")]; int32 var_6551 = const()[name = string("op_6551"), val = int32(1)]; fp16 const_234_promoted_to_fp16 = const()[name = string("const_234_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_6554_cast_fp16 = mul(x = x_413_cast_fp16, y = const_234_promoted_to_fp16)[name = string("op_6554_cast_fp16")]; bool x_415_interleave_0 = const()[name = string("x_415_interleave_0"), val = bool(false)]; tensor x_415_cast_fp16 = concat(axis = var_6551, interleave = x_415_interleave_0, values = (x_413_cast_fp16, var_6554_cast_fp16))[name = string("x_415_cast_fp16")]; tensor out_277_axes_0 = const()[name = string("out_277_axes_0"), val = tensor([1])]; fp16 var_6564_to_fp16 = const()[name = string("op_6564_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_277_cast_fp16 = layer_norm(axes = out_277_axes_0, epsilon = var_6564_to_fp16, x = x_415_cast_fp16)[name = string("out_277_cast_fp16")]; tensor layer_layers_23_input_layernorm_weight_to_fp16 = const()[name = string("layer_layers_23_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(695944448)))]; tensor out_279_cast_fp16 = mul(x = out_277_cast_fp16, y = layer_layers_23_input_layernorm_weight_to_fp16)[name = string("out_279_cast_fp16")]; tensor var_6570_split_sizes_0 = const()[name = string("op_6570_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_6570_axis_0 = const()[name = string("op_6570_axis_0"), val = int32(1)]; tensor var_6570_cast_fp16_0, tensor var_6570_cast_fp16_1 = split(axis = var_6570_axis_0, split_sizes = var_6570_split_sizes_0, x = out_279_cast_fp16)[name = string("op_6570_cast_fp16")]; string query_states_93_pad_type_0 = const()[name = string("query_states_93_pad_type_0"), val = string("valid")]; tensor query_states_93_strides_0 = const()[name = string("query_states_93_strides_0"), val = tensor([1, 1])]; tensor query_states_93_pad_0 = const()[name = string("query_states_93_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_states_93_dilations_0 = const()[name = string("query_states_93_dilations_0"), val = tensor([1, 1])]; int32 query_states_93_groups_0 = const()[name = string("query_states_93_groups_0"), val = int32(1)]; tensor var_6592_to_fp16 = const()[name = string("op_6592_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(695948608)))]; tensor query_states_93_cast_fp16 = conv(dilations = query_states_93_dilations_0, groups = query_states_93_groups_0, pad = query_states_93_pad_0, pad_type = query_states_93_pad_type_0, strides = query_states_93_strides_0, weight = var_6592_to_fp16, x = var_6570_cast_fp16_0)[name = string("query_states_93_cast_fp16")]; string key_states_93_pad_type_0 = const()[name = string("key_states_93_pad_type_0"), val = string("valid")]; tensor key_states_93_strides_0 = const()[name = string("key_states_93_strides_0"), val = tensor([1, 1])]; tensor key_states_93_pad_0 = const()[name = string("key_states_93_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_states_93_dilations_0 = const()[name = string("key_states_93_dilations_0"), val = tensor([1, 1])]; int32 key_states_93_groups_0 = const()[name = string("key_states_93_groups_0"), val = int32(1)]; tensor var_6603_to_fp16 = const()[name = string("op_6603_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(698045824)))]; tensor key_states_93_cast_fp16 = conv(dilations = key_states_93_dilations_0, groups = key_states_93_groups_0, pad = key_states_93_pad_0, pad_type = key_states_93_pad_type_0, strides = key_states_93_strides_0, weight = var_6603_to_fp16, x = var_6570_cast_fp16_0)[name = string("key_states_93_cast_fp16")]; string value_states_93_pad_type_0 = const()[name = string("value_states_93_pad_type_0"), val = string("valid")]; tensor value_states_93_strides_0 = const()[name = string("value_states_93_strides_0"), val = tensor([1, 1])]; tensor value_states_93_pad_0 = const()[name = string("value_states_93_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_states_93_dilations_0 = const()[name = string("value_states_93_dilations_0"), val = tensor([1, 1])]; int32 value_states_93_groups_0 = const()[name = string("value_states_93_groups_0"), val = int32(1)]; tensor var_6614_to_fp16 = const()[name = string("op_6614_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(698308032)))]; tensor value_states_93_cast_fp16 = conv(dilations = value_states_93_dilations_0, groups = value_states_93_groups_0, pad = value_states_93_pad_0, pad_type = value_states_93_pad_type_0, strides = value_states_93_strides_0, weight = var_6614_to_fp16, x = var_6570_cast_fp16_0)[name = string("value_states_93_cast_fp16")]; tensor var_6622 = const()[name = string("op_6622"), val = tensor([1, 16, 64, 1])]; tensor embed_93_cast_fp16 = reshape(shape = var_6622, x = query_states_93_cast_fp16)[name = string("embed_93_cast_fp16")]; tensor var_6626 = const()[name = string("op_6626"), val = tensor([1, 2, 64, 1])]; tensor var_6627_cast_fp16 = reshape(shape = var_6626, x = key_states_93_cast_fp16)[name = string("op_6627_cast_fp16")]; tensor embed_perm_0 = const()[name = string("embed_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_6633 = const()[name = string("op_6633"), val = tensor([1, 2, 64, 1])]; tensor var_6634_cast_fp16 = reshape(shape = var_6633, x = value_states_93_cast_fp16)[name = string("op_6634_cast_fp16")]; tensor value_states_perm_0 = const()[name = string("value_states_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_6638_cast_fp16 = mul(x = embed_93_cast_fp16, y = cos_1_cast_fp16)[name = string("op_6638_cast_fp16")]; tensor var_6639_split_sizes_0 = const()[name = string("op_6639_split_sizes_0"), val = tensor([32, 32])]; int32 var_6639_axis_0 = const()[name = string("op_6639_axis_0"), val = int32(-2)]; tensor var_6639_cast_fp16_0, tensor var_6639_cast_fp16_1 = split(axis = var_6639_axis_0, split_sizes = var_6639_split_sizes_0, x = embed_93_cast_fp16)[name = string("op_6639_cast_fp16")]; fp16 const_237_promoted_to_fp16 = const()[name = string("const_237_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_6641_cast_fp16 = mul(x = var_6639_cast_fp16_1, y = const_237_promoted_to_fp16)[name = string("op_6641_cast_fp16")]; int32 var_6643 = const()[name = string("op_6643"), val = int32(-2)]; bool var_6644_interleave_0 = const()[name = string("op_6644_interleave_0"), val = bool(false)]; tensor var_6644_cast_fp16 = concat(axis = var_6643, interleave = var_6644_interleave_0, values = (var_6641_cast_fp16, var_6639_cast_fp16_0))[name = string("op_6644_cast_fp16")]; tensor var_6645_cast_fp16 = mul(x = var_6644_cast_fp16, y = sin_1_cast_fp16)[name = string("op_6645_cast_fp16")]; tensor query_states_cast_fp16 = add(x = var_6638_cast_fp16, y = var_6645_cast_fp16)[name = string("query_states_cast_fp16")]; tensor embed_cast_fp16 = transpose(perm = embed_perm_0, x = var_6627_cast_fp16)[name = string("transpose_2")]; tensor var_6648_cast_fp16 = mul(x = embed_cast_fp16, y = cos_cast_fp16)[name = string("op_6648_cast_fp16")]; tensor var_6649_split_sizes_0 = const()[name = string("op_6649_split_sizes_0"), val = tensor([32, 32])]; int32 var_6649_axis_0 = const()[name = string("op_6649_axis_0"), val = int32(-1)]; tensor var_6649_cast_fp16_0, tensor var_6649_cast_fp16_1 = split(axis = var_6649_axis_0, split_sizes = var_6649_split_sizes_0, x = embed_cast_fp16)[name = string("op_6649_cast_fp16")]; fp16 const_238_promoted_to_fp16 = const()[name = string("const_238_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_6651_cast_fp16 = mul(x = var_6649_cast_fp16_1, y = const_238_promoted_to_fp16)[name = string("op_6651_cast_fp16")]; int32 var_6653 = const()[name = string("op_6653"), val = int32(-1)]; bool var_6654_interleave_0 = const()[name = string("op_6654_interleave_0"), val = bool(false)]; tensor var_6654_cast_fp16 = concat(axis = var_6653, interleave = var_6654_interleave_0, values = (var_6651_cast_fp16, var_6649_cast_fp16_0))[name = string("op_6654_cast_fp16")]; tensor var_6655_cast_fp16 = mul(x = var_6654_cast_fp16, y = sin_cast_fp16)[name = string("op_6655_cast_fp16")]; tensor key_states_cast_fp16 = add(x = var_6648_cast_fp16, y = var_6655_cast_fp16)[name = string("key_states_cast_fp16")]; tensor expand_dims_231 = const()[name = string("expand_dims_231"), val = tensor([23])]; tensor expand_dims_232 = const()[name = string("expand_dims_232"), val = tensor([0])]; tensor expand_dims_234 = const()[name = string("expand_dims_234"), val = tensor([24])]; tensor concat_187_values3_0 = const()[name = string("concat_187_values3_0"), val = tensor([0])]; int32 concat_187_axis_0 = const()[name = string("concat_187_axis_0"), val = int32(0)]; bool concat_187_interleave_0 = const()[name = string("concat_187_interleave_0"), val = bool(false)]; tensor concat_187 = concat(axis = concat_187_axis_0, interleave = concat_187_interleave_0, values = (expand_dims_231, expand_dims_232, position_id, concat_187_values3_0))[name = string("concat_187")]; tensor concat_188_values1_0 = const()[name = string("concat_188_values1_0"), val = tensor([0])]; tensor concat_188_values3_0 = const()[name = string("concat_188_values3_0"), val = tensor([0])]; int32 concat_188_axis_0 = const()[name = string("concat_188_axis_0"), val = int32(0)]; bool concat_188_interleave_0 = const()[name = string("concat_188_interleave_0"), val = bool(false)]; tensor concat_188 = concat(axis = concat_188_axis_0, interleave = concat_188_interleave_0, values = (expand_dims_234, concat_188_values1_0, var_426, concat_188_values3_0))[name = string("concat_188")]; tensor key_cache_internal_tensor_assign_24_stride_0 = const()[name = string("key_cache_internal_tensor_assign_24_stride_0"), val = tensor([1, 1, 1, 1])]; tensor key_cache_internal_tensor_assign_24_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_24_begin_mask_0"), val = tensor([false, false, false, true])]; tensor key_cache_internal_tensor_assign_24_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_24_end_mask_0"), val = tensor([false, true, false, true])]; tensor key_cache_internal_tensor_assign_24_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_24_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor key_cache_internal_tensor_assign_24_cast_fp16 = slice_update(begin = concat_187, begin_mask = key_cache_internal_tensor_assign_24_begin_mask_0, end = concat_188, end_mask = key_cache_internal_tensor_assign_24_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_24_squeeze_mask_0, stride = key_cache_internal_tensor_assign_24_stride_0, update = key_states_cast_fp16, x = coreml_update_state_92)[name = string("key_cache_internal_tensor_assign_24_cast_fp16")]; write_state(data = key_cache_internal_tensor_assign_24_cast_fp16, input = key_cache)[name = string("coreml_update_state_212_write_state")]; tensor coreml_update_state_94 = read_state(input = key_cache)[name = string("coreml_update_state_212")]; tensor value_cache_internal_tensor_assign_24_stride_0 = const()[name = string("value_cache_internal_tensor_assign_24_stride_0"), val = tensor([1, 1, 1, 1])]; tensor value_cache_internal_tensor_assign_24_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_24_begin_mask_0"), val = tensor([false, false, false, true])]; tensor value_cache_internal_tensor_assign_24_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_24_end_mask_0"), val = tensor([false, true, false, true])]; tensor value_cache_internal_tensor_assign_24_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_24_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor value_states_cast_fp16 = transpose(perm = value_states_perm_0, x = var_6634_cast_fp16)[name = string("transpose_1")]; tensor value_cache_internal_tensor_assign_24_cast_fp16 = slice_update(begin = concat_187, begin_mask = value_cache_internal_tensor_assign_24_begin_mask_0, end = concat_188, end_mask = value_cache_internal_tensor_assign_24_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_24_squeeze_mask_0, stride = value_cache_internal_tensor_assign_24_stride_0, update = value_states_cast_fp16, x = coreml_update_state_93)[name = string("value_cache_internal_tensor_assign_24_cast_fp16")]; write_state(data = value_cache_internal_tensor_assign_24_cast_fp16, input = value_cache)[name = string("coreml_update_state_213_write_state")]; tensor coreml_update_state_95 = read_state(input = value_cache)[name = string("coreml_update_state_213")]; tensor var_6698_begin_0 = const()[name = string("op_6698_begin_0"), val = tensor([23, 0, 0, 0])]; tensor var_6698_end_0 = const()[name = string("op_6698_end_0"), val = tensor([1, 2, 2048, 64])]; tensor var_6698_end_mask_0 = const()[name = string("op_6698_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_6698_cast_fp16 = slice_by_index(begin = var_6698_begin_0, end = var_6698_end_0, end_mask = var_6698_end_mask_0, x = coreml_update_state_94)[name = string("op_6698_cast_fp16")]; tensor tile_46 = const()[name = string("tile_46"), val = tensor([1, 1])]; int32 var_6701_axis_0 = const()[name = string("op_6701_axis_0"), val = int32(1)]; tensor var_6701_cast_fp16_0, tensor var_6701_cast_fp16_1 = split(axis = var_6701_axis_0, split_sizes = tile_46, x = var_6698_cast_fp16)[name = string("op_6701_cast_fp16")]; tensor var_6708_begin_0 = const()[name = string("op_6708_begin_0"), val = tensor([23, 0, 0, 0])]; tensor var_6708_end_0 = const()[name = string("op_6708_end_0"), val = tensor([1, 2, 2048, 64])]; tensor var_6708_end_mask_0 = const()[name = string("op_6708_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_6708_cast_fp16 = slice_by_index(begin = var_6708_begin_0, end = var_6708_end_0, end_mask = var_6708_end_mask_0, x = coreml_update_state_95)[name = string("op_6708_cast_fp16")]; tensor tile_47 = const()[name = string("tile_47"), val = tensor([1, 1])]; int32 var_6711_axis_0 = const()[name = string("op_6711_axis_0"), val = int32(1)]; tensor var_6711_cast_fp16_0, tensor var_6711_cast_fp16_1 = split(axis = var_6711_axis_0, split_sizes = tile_47, x = var_6708_cast_fp16)[name = string("op_6711_cast_fp16")]; tensor var_6714_split_sizes_0 = const()[name = string("op_6714_split_sizes_0"), val = tensor([8, 8])]; int32 var_6714_axis_0 = const()[name = string("op_6714_axis_0"), val = int32(1)]; tensor var_6714_cast_fp16_0, tensor var_6714_cast_fp16_1 = split(axis = var_6714_axis_0, split_sizes = var_6714_split_sizes_0, x = query_states_cast_fp16)[name = string("op_6714_cast_fp16")]; bool attn_weights_369_transpose_x_0 = const()[name = string("attn_weights_369_transpose_x_0"), val = bool(false)]; bool attn_weights_369_transpose_y_0 = const()[name = string("attn_weights_369_transpose_y_0"), val = bool(false)]; tensor attn_weights_369_cast_fp16 = matmul(transpose_x = attn_weights_369_transpose_x_0, transpose_y = attn_weights_369_transpose_y_0, x = var_6701_cast_fp16_0, y = var_6714_cast_fp16_0)[name = string("attn_weights_369_cast_fp16")]; fp16 _inversed_attn_weights_371_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_371_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_371_cast_fp16 = mul(x = attn_weights_369_cast_fp16, y = _inversed_attn_weights_371_y_0_to_fp16)[name = string("_inversed_attn_weights_371_cast_fp16")]; tensor attn_weights_373_cast_fp16 = add(x = _inversed_attn_weights_371_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_373_cast_fp16")]; int32 var_6721 = const()[name = string("op_6721"), val = int32(2)]; tensor attn_weights_375_cast_fp16 = softmax(axis = var_6721, x = attn_weights_373_cast_fp16)[name = string("attn_weights_375_cast_fp16")]; bool var_6727_transpose_x_1 = const()[name = string("op_6727_transpose_x_1"), val = bool(true)]; bool var_6727_transpose_y_1 = const()[name = string("op_6727_transpose_y_1"), val = bool(false)]; tensor var_6727_cast_fp16 = matmul(transpose_x = var_6727_transpose_x_1, transpose_y = var_6727_transpose_y_1, x = attn_weights_375_cast_fp16, y = var_6711_cast_fp16_0)[name = string("op_6727_cast_fp16")]; bool attn_weights_377_transpose_x_0 = const()[name = string("attn_weights_377_transpose_x_0"), val = bool(false)]; bool attn_weights_377_transpose_y_0 = const()[name = string("attn_weights_377_transpose_y_0"), val = bool(false)]; tensor attn_weights_377_cast_fp16 = matmul(transpose_x = attn_weights_377_transpose_x_0, transpose_y = attn_weights_377_transpose_y_0, x = var_6701_cast_fp16_1, y = var_6714_cast_fp16_1)[name = string("attn_weights_377_cast_fp16")]; fp16 _inversed_attn_weights_379_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_379_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_379_cast_fp16 = mul(x = attn_weights_377_cast_fp16, y = _inversed_attn_weights_379_y_0_to_fp16)[name = string("_inversed_attn_weights_379_cast_fp16")]; tensor attn_weights_381_cast_fp16 = add(x = _inversed_attn_weights_379_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_381_cast_fp16")]; int32 var_6733 = const()[name = string("op_6733"), val = int32(2)]; tensor attn_weights_cast_fp16 = softmax(axis = var_6733, x = attn_weights_381_cast_fp16)[name = string("attn_weights_cast_fp16")]; bool attn_output_139_transpose_x_1 = const()[name = string("attn_output_139_transpose_x_1"), val = bool(true)]; bool attn_output_139_transpose_y_1 = const()[name = string("attn_output_139_transpose_y_1"), val = bool(false)]; tensor attn_output_139_cast_fp16 = matmul(transpose_x = attn_output_139_transpose_x_1, transpose_y = attn_output_139_transpose_y_1, x = attn_weights_cast_fp16, y = var_6711_cast_fp16_1)[name = string("attn_output_139_cast_fp16")]; int32 var_6741 = const()[name = string("op_6741"), val = int32(1)]; bool attn_output_141_interleave_0 = const()[name = string("attn_output_141_interleave_0"), val = bool(false)]; tensor attn_output_141_cast_fp16 = concat(axis = var_6741, interleave = attn_output_141_interleave_0, values = (var_6727_cast_fp16, attn_output_139_cast_fp16))[name = string("attn_output_141_cast_fp16")]; tensor var_6745_perm_0 = const()[name = string("op_6745_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_6750 = const()[name = string("op_6750"), val = tensor([1, 1024, 1, 1])]; tensor var_6745_cast_fp16 = transpose(perm = var_6745_perm_0, x = attn_output_141_cast_fp16)[name = string("transpose_0")]; tensor x_419_cast_fp16 = reshape(shape = var_6750, x = var_6745_cast_fp16)[name = string("x_419_cast_fp16")]; string hidden_states_141_pad_type_0 = const()[name = string("hidden_states_141_pad_type_0"), val = string("valid")]; tensor hidden_states_141_strides_0 = const()[name = string("hidden_states_141_strides_0"), val = tensor([1, 1])]; tensor hidden_states_141_pad_0 = const()[name = string("hidden_states_141_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_141_dilations_0 = const()[name = string("hidden_states_141_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_141_groups_0 = const()[name = string("hidden_states_141_groups_0"), val = int32(1)]; tensor var_6757_to_fp16 = const()[name = string("op_6757_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(698570240)))]; tensor hidden_states_141_cast_fp16 = conv(dilations = hidden_states_141_dilations_0, groups = hidden_states_141_groups_0, pad = hidden_states_141_pad_0, pad_type = hidden_states_141_pad_type_0, strides = hidden_states_141_strides_0, weight = var_6757_to_fp16, x = x_419_cast_fp16)[name = string("hidden_states_141_cast_fp16")]; tensor x_421_cast_fp16 = add(x = x_413_cast_fp16, y = hidden_states_141_cast_fp16)[name = string("x_421_cast_fp16")]; int32 var_6769 = const()[name = string("op_6769"), val = int32(1)]; fp16 const_243_promoted_to_fp16 = const()[name = string("const_243_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_6772_cast_fp16 = mul(x = x_421_cast_fp16, y = const_243_promoted_to_fp16)[name = string("op_6772_cast_fp16")]; bool x_423_interleave_0 = const()[name = string("x_423_interleave_0"), val = bool(false)]; tensor x_423_cast_fp16 = concat(axis = var_6769, interleave = x_423_interleave_0, values = (x_421_cast_fp16, var_6772_cast_fp16))[name = string("x_423_cast_fp16")]; tensor out_283_axes_0 = const()[name = string("out_283_axes_0"), val = tensor([1])]; fp16 var_6782_to_fp16 = const()[name = string("op_6782_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_283_cast_fp16 = layer_norm(axes = out_283_axes_0, epsilon = var_6782_to_fp16, x = x_423_cast_fp16)[name = string("out_283_cast_fp16")]; tensor layer_layers_23_post_attention_layernorm_weight_to_fp16 = const()[name = string("layer_layers_23_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(700667456)))]; tensor out_285_cast_fp16 = mul(x = out_283_cast_fp16, y = layer_layers_23_post_attention_layernorm_weight_to_fp16)[name = string("out_285_cast_fp16")]; tensor var_6788_split_sizes_0 = const()[name = string("op_6788_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_6788_axis_0 = const()[name = string("op_6788_axis_0"), val = int32(1)]; tensor var_6788_cast_fp16_0, tensor var_6788_cast_fp16_1 = split(axis = var_6788_axis_0, split_sizes = var_6788_split_sizes_0, x = out_285_cast_fp16)[name = string("op_6788_cast_fp16")]; string input_pad_type_0 = const()[name = string("input_pad_type_0"), val = string("valid")]; tensor input_strides_0 = const()[name = string("input_strides_0"), val = tensor([1, 1])]; tensor input_pad_0 = const()[name = string("input_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_dilations_0 = const()[name = string("input_dilations_0"), val = tensor([1, 1])]; int32 input_groups_0 = const()[name = string("input_groups_0"), val = int32(1)]; tensor var_6793_to_fp16 = const()[name = string("op_6793_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(700671616)))]; tensor input_cast_fp16 = conv(dilations = input_dilations_0, groups = input_groups_0, pad = input_pad_0, pad_type = input_pad_type_0, strides = input_strides_0, weight = var_6793_to_fp16, x = var_6788_cast_fp16_0)[name = string("input_cast_fp16")]; tensor var_6804_cast_fp16 = silu(x = input_cast_fp16)[name = string("op_6804_cast_fp16")]; string var_6809_pad_type_0 = const()[name = string("op_6809_pad_type_0"), val = string("valid")]; tensor var_6809_strides_0 = const()[name = string("op_6809_strides_0"), val = tensor([1, 1])]; tensor var_6809_pad_0 = const()[name = string("op_6809_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_6809_dilations_0 = const()[name = string("op_6809_dilations_0"), val = tensor([1, 1])]; int32 var_6809_groups_0 = const()[name = string("op_6809_groups_0"), val = int32(1)]; tensor var_6792_to_fp16 = const()[name = string("op_6792_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(709060288)))]; tensor var_6809_cast_fp16 = conv(dilations = var_6809_dilations_0, groups = var_6809_groups_0, pad = var_6809_pad_0, pad_type = var_6809_pad_type_0, strides = var_6809_strides_0, weight = var_6792_to_fp16, x = var_6788_cast_fp16_0)[name = string("op_6809_cast_fp16")]; tensor x_429_cast_fp16 = mul(x = var_6804_cast_fp16, y = var_6809_cast_fp16)[name = string("x_429_cast_fp16")]; string hidden_states_pad_type_0 = const()[name = string("hidden_states_pad_type_0"), val = string("valid")]; tensor hidden_states_strides_0 = const()[name = string("hidden_states_strides_0"), val = tensor([1, 1])]; tensor hidden_states_pad_0 = const()[name = string("hidden_states_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_dilations_0 = const()[name = string("hidden_states_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_groups_0 = const()[name = string("hidden_states_groups_0"), val = int32(1)]; tensor var_6791_to_fp16 = const()[name = string("op_6791_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(717448960)))]; tensor hidden_states_cast_fp16 = conv(dilations = hidden_states_dilations_0, groups = hidden_states_groups_0, pad = hidden_states_pad_0, pad_type = hidden_states_pad_type_0, strides = hidden_states_strides_0, weight = var_6791_to_fp16, x = x_429_cast_fp16)[name = string("hidden_states_cast_fp16")]; tensor x_431_cast_fp16 = add(x = x_421_cast_fp16, y = hidden_states_cast_fp16)[name = string("x_431_cast_fp16")]; int32 var_6822 = const()[name = string("op_6822"), val = int32(1)]; fp16 const_244_promoted_to_fp16 = const()[name = string("const_244_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_6825_cast_fp16 = mul(x = x_431_cast_fp16, y = const_244_promoted_to_fp16)[name = string("op_6825_cast_fp16")]; bool x_433_interleave_0 = const()[name = string("x_433_interleave_0"), val = bool(false)]; tensor x_433_cast_fp16 = concat(axis = var_6822, interleave = x_433_interleave_0, values = (x_431_cast_fp16, var_6825_cast_fp16))[name = string("x_433_cast_fp16")]; tensor out_289_axes_0 = const()[name = string("out_289_axes_0"), val = tensor([1])]; fp16 var_6835_to_fp16 = const()[name = string("op_6835_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_289_cast_fp16 = layer_norm(axes = out_289_axes_0, epsilon = var_6835_to_fp16, x = x_433_cast_fp16)[name = string("out_289_cast_fp16")]; tensor layer_norm_weight_to_fp16 = const()[name = string("layer_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(725837632)))]; tensor out_291_cast_fp16 = mul(x = out_289_cast_fp16, y = layer_norm_weight_to_fp16)[name = string("out_291_cast_fp16")]; tensor var_6841_split_sizes_0 = const()[name = string("op_6841_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_6841_axis_0 = const()[name = string("op_6841_axis_0"), val = int32(1)]; tensor output, tensor var_6841_cast_fp16_1 = split(axis = var_6841_axis_0, split_sizes = var_6841_split_sizes_0, x = out_291_cast_fp16)[name = string("op_6841_cast_fp16")]; } -> (output); func length_128(tensor inputs_embeds, state> key_cache, tensor position_id, state> value_cache) { tensor var_260 = const()[name = string("op_260"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(725841792)))]; tensor position_ids_1 = add(x = var_260, y = position_id)[name = string("position_ids_1")]; int32 cos_batch_dims_0 = const()[name = string("cos_batch_dims_0"), val = int32(0)]; bool cos_validate_indices_0 = const()[name = string("cos_validate_indices_0"), val = bool(false)]; tensor layer_rope_emb_cos_cached_to_fp16 = const()[name = string("layer_rope_emb_cos_cached_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64)))]; int32 greater_equal_0_y_0 = const()[name = string("greater_equal_0_y_0"), val = int32(0)]; tensor greater_equal_0 = greater_equal(x = position_ids_1, y = greater_equal_0_y_0)[name = string("greater_equal_0")]; int32 slice_by_index_0 = const()[name = string("slice_by_index_0"), val = int32(32768)]; tensor add_0 = add(x = position_ids_1, y = slice_by_index_0)[name = string("add_0")]; tensor select_0 = select(a = position_ids_1, b = add_0, cond = greater_equal_0)[name = string("select_0")]; int32 cos_cast_fp16_axis_0 = const()[name = string("cos_cast_fp16_axis_0"), val = int32(0)]; tensor cos_cast_fp16 = gather(axis = cos_cast_fp16_axis_0, batch_dims = cos_batch_dims_0, indices = select_0, validate_indices = cos_validate_indices_0, x = layer_rope_emb_cos_cached_to_fp16)[name = string("cos_cast_fp16")]; int32 sin_batch_dims_0 = const()[name = string("sin_batch_dims_0"), val = int32(0)]; bool sin_validate_indices_0 = const()[name = string("sin_validate_indices_0"), val = bool(false)]; tensor layer_rope_emb_sin_cached_to_fp16 = const()[name = string("layer_rope_emb_sin_cached_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(4194432)))]; int32 sin_cast_fp16_axis_0 = const()[name = string("sin_cast_fp16_axis_0"), val = int32(0)]; tensor sin_cast_fp16 = gather(axis = sin_cast_fp16_axis_0, batch_dims = sin_batch_dims_0, indices = select_0, validate_indices = sin_validate_indices_0, x = layer_rope_emb_sin_cached_to_fp16)[name = string("sin_cast_fp16")]; tensor var_285 = const()[name = string("op_285"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(8388800)))]; tensor var_292_axes_0 = const()[name = string("op_292_axes_0"), val = tensor([2])]; tensor var_292 = expand_dims(axes = var_292_axes_0, x = position_ids_1)[name = string("op_292")]; tensor var_293 = greater(x = var_285, y = var_292)[name = string("op_293")]; tensor attention_mask_3_axes_0 = const()[name = string("attention_mask_3_axes_0"), val = tensor([1])]; string attention_mask_1_to_fp16_dtype_0 = const()[name = string("attention_mask_1_to_fp16_dtype_0"), val = string("fp16")]; tensor var_293_to_fp16 = cast(dtype = attention_mask_1_to_fp16_dtype_0, x = var_293)[name = string("cast_245")]; tensor attention_mask_3_cast_fp16 = expand_dims(axes = attention_mask_3_axes_0, x = var_293_to_fp16)[name = string("attention_mask_3_cast_fp16")]; fp16 var_301_promoted_to_fp16 = const()[name = string("op_301_promoted_to_fp16"), val = fp16(0x0p+0)]; tensor var_302_cast_fp16 = equal(x = attention_mask_3_cast_fp16, y = var_301_promoted_to_fp16)[name = string("op_302_cast_fp16")]; tensor var_303_after_broadcast_to_fp16 = const()[name = string("op_303_after_broadcast_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(725842368)))]; tensor attention_mask_cast_fp16 = select(a = attention_mask_3_cast_fp16, b = var_303_after_broadcast_to_fp16, cond = var_302_cast_fp16)[name = string("attention_mask_cast_fp16")]; tensor cos_1_perm_0 = const()[name = string("cos_1_perm_0"), val = tensor([0, -1, -2])]; tensor sin_1_perm_0 = const()[name = string("sin_1_perm_0"), val = tensor([0, -1, -2])]; tensor attn_mask_perm_0 = const()[name = string("attn_mask_perm_0"), val = tensor([0, 1, -1, -2])]; int32 var_318 = const()[name = string("op_318"), val = int32(1)]; fp16 const_4_promoted_to_fp16 = const()[name = string("const_4_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_321_cast_fp16 = mul(x = inputs_embeds, y = const_4_promoted_to_fp16)[name = string("op_321_cast_fp16")]; bool x_1_interleave_0 = const()[name = string("x_1_interleave_0"), val = bool(false)]; tensor x_1_cast_fp16 = concat(axis = var_318, interleave = x_1_interleave_0, values = (inputs_embeds, var_321_cast_fp16))[name = string("x_1_cast_fp16")]; tensor out_1_axes_0 = const()[name = string("out_1_axes_0"), val = tensor([1])]; fp16 var_331_to_fp16 = const()[name = string("op_331_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_1_cast_fp16 = layer_norm(axes = out_1_axes_0, epsilon = var_331_to_fp16, x = x_1_cast_fp16)[name = string("out_1_cast_fp16")]; tensor layer_layers_0_input_layernorm_weight_to_fp16 = const()[name = string("layer_layers_0_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(8401216)))]; tensor out_3_cast_fp16 = mul(x = out_1_cast_fp16, y = layer_layers_0_input_layernorm_weight_to_fp16)[name = string("out_3_cast_fp16")]; tensor var_337_split_sizes_0 = const()[name = string("op_337_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_337_axis_0 = const()[name = string("op_337_axis_0"), val = int32(1)]; tensor var_337_cast_fp16_0, tensor var_337_cast_fp16_1 = split(axis = var_337_axis_0, split_sizes = var_337_split_sizes_0, x = out_3_cast_fp16)[name = string("op_337_cast_fp16")]; tensor q_len_1 = const()[name = string("q_len_1"), val = tensor([128])]; string query_states_1_pad_type_0 = const()[name = string("query_states_1_pad_type_0"), val = string("valid")]; tensor query_states_1_strides_0 = const()[name = string("query_states_1_strides_0"), val = tensor([1, 1])]; tensor query_states_1_pad_0 = const()[name = string("query_states_1_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_states_1_dilations_0 = const()[name = string("query_states_1_dilations_0"), val = tensor([1, 1])]; int32 query_states_1_groups_0 = const()[name = string("query_states_1_groups_0"), val = int32(1)]; tensor var_359_to_fp16 = const()[name = string("op_359_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(8405376)))]; tensor query_states_1_cast_fp16 = conv(dilations = query_states_1_dilations_0, groups = query_states_1_groups_0, pad = query_states_1_pad_0, pad_type = query_states_1_pad_type_0, strides = query_states_1_strides_0, weight = var_359_to_fp16, x = var_337_cast_fp16_0)[name = string("query_states_1_cast_fp16")]; string key_states_1_pad_type_0 = const()[name = string("key_states_1_pad_type_0"), val = string("valid")]; tensor key_states_1_strides_0 = const()[name = string("key_states_1_strides_0"), val = tensor([1, 1])]; tensor key_states_1_pad_0 = const()[name = string("key_states_1_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_states_1_dilations_0 = const()[name = string("key_states_1_dilations_0"), val = tensor([1, 1])]; int32 key_states_1_groups_0 = const()[name = string("key_states_1_groups_0"), val = int32(1)]; tensor var_370_to_fp16 = const()[name = string("op_370_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(10502592)))]; tensor key_states_1_cast_fp16 = conv(dilations = key_states_1_dilations_0, groups = key_states_1_groups_0, pad = key_states_1_pad_0, pad_type = key_states_1_pad_type_0, strides = key_states_1_strides_0, weight = var_370_to_fp16, x = var_337_cast_fp16_0)[name = string("key_states_1_cast_fp16")]; string value_states_1_pad_type_0 = const()[name = string("value_states_1_pad_type_0"), val = string("valid")]; tensor value_states_1_strides_0 = const()[name = string("value_states_1_strides_0"), val = tensor([1, 1])]; tensor value_states_1_pad_0 = const()[name = string("value_states_1_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_states_1_dilations_0 = const()[name = string("value_states_1_dilations_0"), val = tensor([1, 1])]; int32 value_states_1_groups_0 = const()[name = string("value_states_1_groups_0"), val = int32(1)]; tensor var_381_to_fp16 = const()[name = string("op_381_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(10764800)))]; tensor value_states_1_cast_fp16 = conv(dilations = value_states_1_dilations_0, groups = value_states_1_groups_0, pad = value_states_1_pad_0, pad_type = value_states_1_pad_type_0, strides = value_states_1_strides_0, weight = var_381_to_fp16, x = var_337_cast_fp16_0)[name = string("value_states_1_cast_fp16")]; tensor var_389 = const()[name = string("op_389"), val = tensor([1, 16, 64, 128])]; tensor embed_1_cast_fp16 = reshape(shape = var_389, x = query_states_1_cast_fp16)[name = string("embed_1_cast_fp16")]; tensor var_393 = const()[name = string("op_393"), val = tensor([1, 2, 64, 128])]; tensor var_394_cast_fp16 = reshape(shape = var_393, x = key_states_1_cast_fp16)[name = string("op_394_cast_fp16")]; tensor embed_3_perm_0 = const()[name = string("embed_3_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_400 = const()[name = string("op_400"), val = tensor([1, 2, 64, 128])]; tensor var_401_cast_fp16 = reshape(shape = var_400, x = value_states_1_cast_fp16)[name = string("op_401_cast_fp16")]; tensor value_states_3_perm_0 = const()[name = string("value_states_3_perm_0"), val = tensor([0, 1, 3, 2])]; tensor cos_1_cast_fp16 = transpose(perm = cos_1_perm_0, x = cos_cast_fp16)[name = string("transpose_74")]; tensor var_405_cast_fp16 = mul(x = embed_1_cast_fp16, y = cos_1_cast_fp16)[name = string("op_405_cast_fp16")]; tensor var_406_split_sizes_0 = const()[name = string("op_406_split_sizes_0"), val = tensor([32, 32])]; int32 var_406_axis_0 = const()[name = string("op_406_axis_0"), val = int32(-2)]; tensor var_406_cast_fp16_0, tensor var_406_cast_fp16_1 = split(axis = var_406_axis_0, split_sizes = var_406_split_sizes_0, x = embed_1_cast_fp16)[name = string("op_406_cast_fp16")]; fp16 const_7_promoted_to_fp16 = const()[name = string("const_7_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_408_cast_fp16 = mul(x = var_406_cast_fp16_1, y = const_7_promoted_to_fp16)[name = string("op_408_cast_fp16")]; int32 var_410 = const()[name = string("op_410"), val = int32(-2)]; bool var_411_interleave_0 = const()[name = string("op_411_interleave_0"), val = bool(false)]; tensor var_411_cast_fp16 = concat(axis = var_410, interleave = var_411_interleave_0, values = (var_408_cast_fp16, var_406_cast_fp16_0))[name = string("op_411_cast_fp16")]; tensor sin_1_cast_fp16 = transpose(perm = sin_1_perm_0, x = sin_cast_fp16)[name = string("transpose_73")]; tensor var_412_cast_fp16 = mul(x = var_411_cast_fp16, y = sin_1_cast_fp16)[name = string("op_412_cast_fp16")]; tensor query_states_3_cast_fp16 = add(x = var_405_cast_fp16, y = var_412_cast_fp16)[name = string("query_states_3_cast_fp16")]; tensor embed_3_cast_fp16 = transpose(perm = embed_3_perm_0, x = var_394_cast_fp16)[name = string("transpose_71")]; tensor var_415_cast_fp16 = mul(x = embed_3_cast_fp16, y = cos_cast_fp16)[name = string("op_415_cast_fp16")]; tensor var_416_split_sizes_0 = const()[name = string("op_416_split_sizes_0"), val = tensor([32, 32])]; int32 var_416_axis_0 = const()[name = string("op_416_axis_0"), val = int32(-1)]; tensor var_416_cast_fp16_0, tensor var_416_cast_fp16_1 = split(axis = var_416_axis_0, split_sizes = var_416_split_sizes_0, x = embed_3_cast_fp16)[name = string("op_416_cast_fp16")]; fp16 const_8_promoted_to_fp16 = const()[name = string("const_8_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_418_cast_fp16 = mul(x = var_416_cast_fp16_1, y = const_8_promoted_to_fp16)[name = string("op_418_cast_fp16")]; int32 var_420 = const()[name = string("op_420"), val = int32(-1)]; bool var_421_interleave_0 = const()[name = string("op_421_interleave_0"), val = bool(false)]; tensor var_421_cast_fp16 = concat(axis = var_420, interleave = var_421_interleave_0, values = (var_418_cast_fp16, var_416_cast_fp16_0))[name = string("op_421_cast_fp16")]; tensor var_422_cast_fp16 = mul(x = var_421_cast_fp16, y = sin_cast_fp16)[name = string("op_422_cast_fp16")]; tensor key_states_3_cast_fp16 = add(x = var_415_cast_fp16, y = var_422_cast_fp16)[name = string("key_states_3_cast_fp16")]; tensor var_426 = add(x = position_id, y = q_len_1)[name = string("op_426")]; tensor read_state_0 = read_state(input = key_cache)[name = string("read_state_0")]; tensor expand_dims_1 = const()[name = string("expand_dims_1"), val = tensor([0])]; tensor expand_dims_2 = const()[name = string("expand_dims_2"), val = tensor([0])]; tensor expand_dims_4 = const()[name = string("expand_dims_4"), val = tensor([1])]; tensor concat_3_values3_0 = const()[name = string("concat_3_values3_0"), val = tensor([0])]; int32 concat_3_axis_0 = const()[name = string("concat_3_axis_0"), val = int32(0)]; bool concat_3_interleave_0 = const()[name = string("concat_3_interleave_0"), val = bool(false)]; tensor concat_3 = concat(axis = concat_3_axis_0, interleave = concat_3_interleave_0, values = (expand_dims_1, expand_dims_2, position_id, concat_3_values3_0))[name = string("concat_3")]; tensor concat_4_values1_0 = const()[name = string("concat_4_values1_0"), val = tensor([0])]; tensor concat_4_values3_0 = const()[name = string("concat_4_values3_0"), val = tensor([0])]; int32 concat_4_axis_0 = const()[name = string("concat_4_axis_0"), val = int32(0)]; bool concat_4_interleave_0 = const()[name = string("concat_4_interleave_0"), val = bool(false)]; tensor concat_4 = concat(axis = concat_4_axis_0, interleave = concat_4_interleave_0, values = (expand_dims_4, concat_4_values1_0, var_426, concat_4_values3_0))[name = string("concat_4")]; tensor key_cache_internal_tensor_assign_1_stride_0 = const()[name = string("key_cache_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1, 1])]; tensor key_cache_internal_tensor_assign_1_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false, true])]; tensor key_cache_internal_tensor_assign_1_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_1_end_mask_0"), val = tensor([false, true, false, true])]; tensor key_cache_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor key_cache_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_3, begin_mask = key_cache_internal_tensor_assign_1_begin_mask_0, end = concat_4, end_mask = key_cache_internal_tensor_assign_1_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_1_squeeze_mask_0, stride = key_cache_internal_tensor_assign_1_stride_0, update = key_states_3_cast_fp16, x = read_state_0)[name = string("key_cache_internal_tensor_assign_1_cast_fp16")]; write_state(data = key_cache_internal_tensor_assign_1_cast_fp16, input = key_cache)[name = string("coreml_update_state_118_write_state")]; tensor coreml_update_state_48 = read_state(input = key_cache)[name = string("coreml_update_state_118")]; tensor read_state_1 = read_state(input = value_cache)[name = string("read_state_1")]; tensor value_cache_internal_tensor_assign_1_stride_0 = const()[name = string("value_cache_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1, 1])]; tensor value_cache_internal_tensor_assign_1_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false, true])]; tensor value_cache_internal_tensor_assign_1_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_1_end_mask_0"), val = tensor([false, true, false, true])]; tensor value_cache_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor value_states_3_cast_fp16 = transpose(perm = value_states_3_perm_0, x = var_401_cast_fp16)[name = string("transpose_70")]; tensor value_cache_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_3, begin_mask = value_cache_internal_tensor_assign_1_begin_mask_0, end = concat_4, end_mask = value_cache_internal_tensor_assign_1_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_1_squeeze_mask_0, stride = value_cache_internal_tensor_assign_1_stride_0, update = value_states_3_cast_fp16, x = read_state_1)[name = string("value_cache_internal_tensor_assign_1_cast_fp16")]; write_state(data = value_cache_internal_tensor_assign_1_cast_fp16, input = value_cache)[name = string("coreml_update_state_119_write_state")]; tensor coreml_update_state_49 = read_state(input = value_cache)[name = string("coreml_update_state_119")]; tensor var_465_begin_0 = const()[name = string("op_465_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_465_end_0 = const()[name = string("op_465_end_0"), val = tensor([1, 2, 2048, 64])]; tensor var_465_end_mask_0 = const()[name = string("op_465_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_465_cast_fp16 = slice_by_index(begin = var_465_begin_0, end = var_465_end_0, end_mask = var_465_end_mask_0, x = coreml_update_state_48)[name = string("op_465_cast_fp16")]; tensor tile_0 = const()[name = string("tile_0"), val = tensor([1, 1])]; int32 var_468_axis_0 = const()[name = string("op_468_axis_0"), val = int32(1)]; tensor var_468_cast_fp16_0, tensor var_468_cast_fp16_1 = split(axis = var_468_axis_0, split_sizes = tile_0, x = var_465_cast_fp16)[name = string("op_468_cast_fp16")]; tensor var_475_begin_0 = const()[name = string("op_475_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_475_end_0 = const()[name = string("op_475_end_0"), val = tensor([1, 2, 2048, 64])]; tensor var_475_end_mask_0 = const()[name = string("op_475_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_475_cast_fp16 = slice_by_index(begin = var_475_begin_0, end = var_475_end_0, end_mask = var_475_end_mask_0, x = coreml_update_state_49)[name = string("op_475_cast_fp16")]; tensor tile_1 = const()[name = string("tile_1"), val = tensor([1, 1])]; int32 var_478_axis_0 = const()[name = string("op_478_axis_0"), val = int32(1)]; tensor var_478_cast_fp16_0, tensor var_478_cast_fp16_1 = split(axis = var_478_axis_0, split_sizes = tile_1, x = var_475_cast_fp16)[name = string("op_478_cast_fp16")]; tensor var_481_split_sizes_0 = const()[name = string("op_481_split_sizes_0"), val = tensor([8, 8])]; int32 var_481_axis_0 = const()[name = string("op_481_axis_0"), val = int32(1)]; tensor var_481_cast_fp16_0, tensor var_481_cast_fp16_1 = split(axis = var_481_axis_0, split_sizes = var_481_split_sizes_0, x = query_states_3_cast_fp16)[name = string("op_481_cast_fp16")]; bool attn_weights_1_transpose_x_0 = const()[name = string("attn_weights_1_transpose_x_0"), val = bool(false)]; bool attn_weights_1_transpose_y_0 = const()[name = string("attn_weights_1_transpose_y_0"), val = bool(false)]; tensor attn_weights_1_cast_fp16 = matmul(transpose_x = attn_weights_1_transpose_x_0, transpose_y = attn_weights_1_transpose_y_0, x = var_468_cast_fp16_0, y = var_481_cast_fp16_0)[name = string("attn_weights_1_cast_fp16")]; fp16 _inversed_attn_weights_3_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_3_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_3_cast_fp16 = mul(x = attn_weights_1_cast_fp16, y = _inversed_attn_weights_3_y_0_to_fp16)[name = string("_inversed_attn_weights_3_cast_fp16")]; tensor attn_mask_cast_fp16 = transpose(perm = attn_mask_perm_0, x = attention_mask_cast_fp16)[name = string("transpose_72")]; tensor attn_weights_5_cast_fp16 = add(x = _inversed_attn_weights_3_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_5_cast_fp16")]; int32 var_488 = const()[name = string("op_488"), val = int32(2)]; tensor attn_weights_7_cast_fp16 = softmax(axis = var_488, x = attn_weights_5_cast_fp16)[name = string("attn_weights_7_cast_fp16")]; bool var_494_transpose_x_1 = const()[name = string("op_494_transpose_x_1"), val = bool(true)]; bool var_494_transpose_y_1 = const()[name = string("op_494_transpose_y_1"), val = bool(false)]; tensor var_494_cast_fp16 = matmul(transpose_x = var_494_transpose_x_1, transpose_y = var_494_transpose_y_1, x = attn_weights_7_cast_fp16, y = var_478_cast_fp16_0)[name = string("op_494_cast_fp16")]; bool attn_weights_9_transpose_x_0 = const()[name = string("attn_weights_9_transpose_x_0"), val = bool(false)]; bool attn_weights_9_transpose_y_0 = const()[name = string("attn_weights_9_transpose_y_0"), val = bool(false)]; tensor attn_weights_9_cast_fp16 = matmul(transpose_x = attn_weights_9_transpose_x_0, transpose_y = attn_weights_9_transpose_y_0, x = var_468_cast_fp16_1, y = var_481_cast_fp16_1)[name = string("attn_weights_9_cast_fp16")]; fp16 _inversed_attn_weights_11_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_11_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_11_cast_fp16 = mul(x = attn_weights_9_cast_fp16, y = _inversed_attn_weights_11_y_0_to_fp16)[name = string("_inversed_attn_weights_11_cast_fp16")]; tensor attn_weights_13_cast_fp16 = add(x = _inversed_attn_weights_11_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_13_cast_fp16")]; int32 var_500 = const()[name = string("op_500"), val = int32(2)]; tensor attn_weights_15_cast_fp16 = softmax(axis = var_500, x = attn_weights_13_cast_fp16)[name = string("attn_weights_15_cast_fp16")]; bool attn_output_1_transpose_x_1 = const()[name = string("attn_output_1_transpose_x_1"), val = bool(true)]; bool attn_output_1_transpose_y_1 = const()[name = string("attn_output_1_transpose_y_1"), val = bool(false)]; tensor attn_output_1_cast_fp16 = matmul(transpose_x = attn_output_1_transpose_x_1, transpose_y = attn_output_1_transpose_y_1, x = attn_weights_15_cast_fp16, y = var_478_cast_fp16_1)[name = string("attn_output_1_cast_fp16")]; int32 var_508 = const()[name = string("op_508"), val = int32(1)]; bool attn_output_3_interleave_0 = const()[name = string("attn_output_3_interleave_0"), val = bool(false)]; tensor attn_output_3_cast_fp16 = concat(axis = var_508, interleave = attn_output_3_interleave_0, values = (var_494_cast_fp16, attn_output_1_cast_fp16))[name = string("attn_output_3_cast_fp16")]; tensor var_512_perm_0 = const()[name = string("op_512_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_517 = const()[name = string("op_517"), val = tensor([1, 1024, 1, 128])]; tensor var_512_cast_fp16 = transpose(perm = var_512_perm_0, x = attn_output_3_cast_fp16)[name = string("transpose_69")]; tensor x_5_cast_fp16 = reshape(shape = var_517, x = var_512_cast_fp16)[name = string("x_5_cast_fp16")]; string hidden_states_3_pad_type_0 = const()[name = string("hidden_states_3_pad_type_0"), val = string("valid")]; tensor hidden_states_3_strides_0 = const()[name = string("hidden_states_3_strides_0"), val = tensor([1, 1])]; tensor hidden_states_3_pad_0 = const()[name = string("hidden_states_3_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_3_dilations_0 = const()[name = string("hidden_states_3_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_3_groups_0 = const()[name = string("hidden_states_3_groups_0"), val = int32(1)]; tensor var_524_to_fp16 = const()[name = string("op_524_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(11027008)))]; tensor hidden_states_3_cast_fp16 = conv(dilations = hidden_states_3_dilations_0, groups = hidden_states_3_groups_0, pad = hidden_states_3_pad_0, pad_type = hidden_states_3_pad_type_0, strides = hidden_states_3_strides_0, weight = var_524_to_fp16, x = x_5_cast_fp16)[name = string("hidden_states_3_cast_fp16")]; tensor x_7_cast_fp16 = add(x = inputs_embeds, y = hidden_states_3_cast_fp16)[name = string("x_7_cast_fp16")]; int32 var_536 = const()[name = string("op_536"), val = int32(1)]; fp16 const_13_promoted_to_fp16 = const()[name = string("const_13_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_539_cast_fp16 = mul(x = x_7_cast_fp16, y = const_13_promoted_to_fp16)[name = string("op_539_cast_fp16")]; bool x_9_interleave_0 = const()[name = string("x_9_interleave_0"), val = bool(false)]; tensor x_9_cast_fp16 = concat(axis = var_536, interleave = x_9_interleave_0, values = (x_7_cast_fp16, var_539_cast_fp16))[name = string("x_9_cast_fp16")]; tensor out_7_axes_0 = const()[name = string("out_7_axes_0"), val = tensor([1])]; fp16 var_549_to_fp16 = const()[name = string("op_549_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_7_cast_fp16 = layer_norm(axes = out_7_axes_0, epsilon = var_549_to_fp16, x = x_9_cast_fp16)[name = string("out_7_cast_fp16")]; tensor layer_layers_0_post_attention_layernorm_weight_to_fp16 = const()[name = string("layer_layers_0_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(13124224)))]; tensor out_9_cast_fp16 = mul(x = out_7_cast_fp16, y = layer_layers_0_post_attention_layernorm_weight_to_fp16)[name = string("out_9_cast_fp16")]; tensor var_555_split_sizes_0 = const()[name = string("op_555_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_555_axis_0 = const()[name = string("op_555_axis_0"), val = int32(1)]; tensor var_555_cast_fp16_0, tensor var_555_cast_fp16_1 = split(axis = var_555_axis_0, split_sizes = var_555_split_sizes_0, x = out_9_cast_fp16)[name = string("op_555_cast_fp16")]; string input_1_pad_type_0 = const()[name = string("input_1_pad_type_0"), val = string("valid")]; tensor input_1_strides_0 = const()[name = string("input_1_strides_0"), val = tensor([1, 1])]; tensor input_1_pad_0 = const()[name = string("input_1_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_1_dilations_0 = const()[name = string("input_1_dilations_0"), val = tensor([1, 1])]; int32 input_1_groups_0 = const()[name = string("input_1_groups_0"), val = int32(1)]; tensor var_560_to_fp16 = const()[name = string("op_560_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(13128384)))]; tensor input_1_cast_fp16 = conv(dilations = input_1_dilations_0, groups = input_1_groups_0, pad = input_1_pad_0, pad_type = input_1_pad_type_0, strides = input_1_strides_0, weight = var_560_to_fp16, x = var_555_cast_fp16_0)[name = string("input_1_cast_fp16")]; tensor var_571_cast_fp16 = silu(x = input_1_cast_fp16)[name = string("op_571_cast_fp16")]; string var_576_pad_type_0 = const()[name = string("op_576_pad_type_0"), val = string("valid")]; tensor var_576_strides_0 = const()[name = string("op_576_strides_0"), val = tensor([1, 1])]; tensor var_576_pad_0 = const()[name = string("op_576_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_576_dilations_0 = const()[name = string("op_576_dilations_0"), val = tensor([1, 1])]; int32 var_576_groups_0 = const()[name = string("op_576_groups_0"), val = int32(1)]; tensor var_559_to_fp16 = const()[name = string("op_559_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(21517056)))]; tensor var_576_cast_fp16 = conv(dilations = var_576_dilations_0, groups = var_576_groups_0, pad = var_576_pad_0, pad_type = var_576_pad_type_0, strides = var_576_strides_0, weight = var_559_to_fp16, x = var_555_cast_fp16_0)[name = string("op_576_cast_fp16")]; tensor x_15_cast_fp16 = mul(x = var_571_cast_fp16, y = var_576_cast_fp16)[name = string("x_15_cast_fp16")]; string hidden_states_5_pad_type_0 = const()[name = string("hidden_states_5_pad_type_0"), val = string("valid")]; tensor hidden_states_5_strides_0 = const()[name = string("hidden_states_5_strides_0"), val = tensor([1, 1])]; tensor hidden_states_5_pad_0 = const()[name = string("hidden_states_5_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_5_dilations_0 = const()[name = string("hidden_states_5_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_5_groups_0 = const()[name = string("hidden_states_5_groups_0"), val = int32(1)]; tensor var_558_to_fp16 = const()[name = string("op_558_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(29905728)))]; tensor hidden_states_5_cast_fp16 = conv(dilations = hidden_states_5_dilations_0, groups = hidden_states_5_groups_0, pad = hidden_states_5_pad_0, pad_type = hidden_states_5_pad_type_0, strides = hidden_states_5_strides_0, weight = var_558_to_fp16, x = x_15_cast_fp16)[name = string("hidden_states_5_cast_fp16")]; tensor x_17_cast_fp16 = add(x = x_7_cast_fp16, y = hidden_states_5_cast_fp16)[name = string("x_17_cast_fp16")]; int32 var_589 = const()[name = string("op_589"), val = int32(1)]; fp16 const_14_promoted_to_fp16 = const()[name = string("const_14_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_592_cast_fp16 = mul(x = x_17_cast_fp16, y = const_14_promoted_to_fp16)[name = string("op_592_cast_fp16")]; bool x_19_interleave_0 = const()[name = string("x_19_interleave_0"), val = bool(false)]; tensor x_19_cast_fp16 = concat(axis = var_589, interleave = x_19_interleave_0, values = (x_17_cast_fp16, var_592_cast_fp16))[name = string("x_19_cast_fp16")]; tensor out_13_axes_0 = const()[name = string("out_13_axes_0"), val = tensor([1])]; fp16 var_602_to_fp16 = const()[name = string("op_602_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_13_cast_fp16 = layer_norm(axes = out_13_axes_0, epsilon = var_602_to_fp16, x = x_19_cast_fp16)[name = string("out_13_cast_fp16")]; tensor layer_layers_1_input_layernorm_weight_to_fp16 = const()[name = string("layer_layers_1_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(38294400)))]; tensor out_15_cast_fp16 = mul(x = out_13_cast_fp16, y = layer_layers_1_input_layernorm_weight_to_fp16)[name = string("out_15_cast_fp16")]; tensor var_608_split_sizes_0 = const()[name = string("op_608_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_608_axis_0 = const()[name = string("op_608_axis_0"), val = int32(1)]; tensor var_608_cast_fp16_0, tensor var_608_cast_fp16_1 = split(axis = var_608_axis_0, split_sizes = var_608_split_sizes_0, x = out_15_cast_fp16)[name = string("op_608_cast_fp16")]; string query_states_5_pad_type_0 = const()[name = string("query_states_5_pad_type_0"), val = string("valid")]; tensor query_states_5_strides_0 = const()[name = string("query_states_5_strides_0"), val = tensor([1, 1])]; tensor query_states_5_pad_0 = const()[name = string("query_states_5_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_states_5_dilations_0 = const()[name = string("query_states_5_dilations_0"), val = tensor([1, 1])]; int32 query_states_5_groups_0 = const()[name = string("query_states_5_groups_0"), val = int32(1)]; tensor var_630_to_fp16 = const()[name = string("op_630_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(38298560)))]; tensor query_states_5_cast_fp16 = conv(dilations = query_states_5_dilations_0, groups = query_states_5_groups_0, pad = query_states_5_pad_0, pad_type = query_states_5_pad_type_0, strides = query_states_5_strides_0, weight = var_630_to_fp16, x = var_608_cast_fp16_0)[name = string("query_states_5_cast_fp16")]; string key_states_5_pad_type_0 = const()[name = string("key_states_5_pad_type_0"), val = string("valid")]; tensor key_states_5_strides_0 = const()[name = string("key_states_5_strides_0"), val = tensor([1, 1])]; tensor key_states_5_pad_0 = const()[name = string("key_states_5_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_states_5_dilations_0 = const()[name = string("key_states_5_dilations_0"), val = tensor([1, 1])]; int32 key_states_5_groups_0 = const()[name = string("key_states_5_groups_0"), val = int32(1)]; tensor var_641_to_fp16 = const()[name = string("op_641_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(40395776)))]; tensor key_states_5_cast_fp16 = conv(dilations = key_states_5_dilations_0, groups = key_states_5_groups_0, pad = key_states_5_pad_0, pad_type = key_states_5_pad_type_0, strides = key_states_5_strides_0, weight = var_641_to_fp16, x = var_608_cast_fp16_0)[name = string("key_states_5_cast_fp16")]; string value_states_5_pad_type_0 = const()[name = string("value_states_5_pad_type_0"), val = string("valid")]; tensor value_states_5_strides_0 = const()[name = string("value_states_5_strides_0"), val = tensor([1, 1])]; tensor value_states_5_pad_0 = const()[name = string("value_states_5_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_states_5_dilations_0 = const()[name = string("value_states_5_dilations_0"), val = tensor([1, 1])]; int32 value_states_5_groups_0 = const()[name = string("value_states_5_groups_0"), val = int32(1)]; tensor var_652_to_fp16 = const()[name = string("op_652_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(40657984)))]; tensor value_states_5_cast_fp16 = conv(dilations = value_states_5_dilations_0, groups = value_states_5_groups_0, pad = value_states_5_pad_0, pad_type = value_states_5_pad_type_0, strides = value_states_5_strides_0, weight = var_652_to_fp16, x = var_608_cast_fp16_0)[name = string("value_states_5_cast_fp16")]; tensor var_660 = const()[name = string("op_660"), val = tensor([1, 16, 64, 128])]; tensor embed_5_cast_fp16 = reshape(shape = var_660, x = query_states_5_cast_fp16)[name = string("embed_5_cast_fp16")]; tensor var_664 = const()[name = string("op_664"), val = tensor([1, 2, 64, 128])]; tensor var_665_cast_fp16 = reshape(shape = var_664, x = key_states_5_cast_fp16)[name = string("op_665_cast_fp16")]; tensor embed_7_perm_0 = const()[name = string("embed_7_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_671 = const()[name = string("op_671"), val = tensor([1, 2, 64, 128])]; tensor var_672_cast_fp16 = reshape(shape = var_671, x = value_states_5_cast_fp16)[name = string("op_672_cast_fp16")]; tensor value_states_7_perm_0 = const()[name = string("value_states_7_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_676_cast_fp16 = mul(x = embed_5_cast_fp16, y = cos_1_cast_fp16)[name = string("op_676_cast_fp16")]; tensor var_677_split_sizes_0 = const()[name = string("op_677_split_sizes_0"), val = tensor([32, 32])]; int32 var_677_axis_0 = const()[name = string("op_677_axis_0"), val = int32(-2)]; tensor var_677_cast_fp16_0, tensor var_677_cast_fp16_1 = split(axis = var_677_axis_0, split_sizes = var_677_split_sizes_0, x = embed_5_cast_fp16)[name = string("op_677_cast_fp16")]; fp16 const_17_promoted_to_fp16 = const()[name = string("const_17_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_679_cast_fp16 = mul(x = var_677_cast_fp16_1, y = const_17_promoted_to_fp16)[name = string("op_679_cast_fp16")]; int32 var_681 = const()[name = string("op_681"), val = int32(-2)]; bool var_682_interleave_0 = const()[name = string("op_682_interleave_0"), val = bool(false)]; tensor var_682_cast_fp16 = concat(axis = var_681, interleave = var_682_interleave_0, values = (var_679_cast_fp16, var_677_cast_fp16_0))[name = string("op_682_cast_fp16")]; tensor var_683_cast_fp16 = mul(x = var_682_cast_fp16, y = sin_1_cast_fp16)[name = string("op_683_cast_fp16")]; tensor query_states_7_cast_fp16 = add(x = var_676_cast_fp16, y = var_683_cast_fp16)[name = string("query_states_7_cast_fp16")]; tensor embed_7_cast_fp16 = transpose(perm = embed_7_perm_0, x = var_665_cast_fp16)[name = string("transpose_68")]; tensor var_686_cast_fp16 = mul(x = embed_7_cast_fp16, y = cos_cast_fp16)[name = string("op_686_cast_fp16")]; tensor var_687_split_sizes_0 = const()[name = string("op_687_split_sizes_0"), val = tensor([32, 32])]; int32 var_687_axis_0 = const()[name = string("op_687_axis_0"), val = int32(-1)]; tensor var_687_cast_fp16_0, tensor var_687_cast_fp16_1 = split(axis = var_687_axis_0, split_sizes = var_687_split_sizes_0, x = embed_7_cast_fp16)[name = string("op_687_cast_fp16")]; fp16 const_18_promoted_to_fp16 = const()[name = string("const_18_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_689_cast_fp16 = mul(x = var_687_cast_fp16_1, y = const_18_promoted_to_fp16)[name = string("op_689_cast_fp16")]; int32 var_691 = const()[name = string("op_691"), val = int32(-1)]; bool var_692_interleave_0 = const()[name = string("op_692_interleave_0"), val = bool(false)]; tensor var_692_cast_fp16 = concat(axis = var_691, interleave = var_692_interleave_0, values = (var_689_cast_fp16, var_687_cast_fp16_0))[name = string("op_692_cast_fp16")]; tensor var_693_cast_fp16 = mul(x = var_692_cast_fp16, y = sin_cast_fp16)[name = string("op_693_cast_fp16")]; tensor key_states_7_cast_fp16 = add(x = var_686_cast_fp16, y = var_693_cast_fp16)[name = string("key_states_7_cast_fp16")]; tensor expand_dims_11 = const()[name = string("expand_dims_11"), val = tensor([1])]; tensor expand_dims_12 = const()[name = string("expand_dims_12"), val = tensor([0])]; tensor expand_dims_14 = const()[name = string("expand_dims_14"), val = tensor([2])]; tensor concat_11_values3_0 = const()[name = string("concat_11_values3_0"), val = tensor([0])]; int32 concat_11_axis_0 = const()[name = string("concat_11_axis_0"), val = int32(0)]; bool concat_11_interleave_0 = const()[name = string("concat_11_interleave_0"), val = bool(false)]; tensor concat_11 = concat(axis = concat_11_axis_0, interleave = concat_11_interleave_0, values = (expand_dims_11, expand_dims_12, position_id, concat_11_values3_0))[name = string("concat_11")]; tensor concat_12_values1_0 = const()[name = string("concat_12_values1_0"), val = tensor([0])]; tensor concat_12_values3_0 = const()[name = string("concat_12_values3_0"), val = tensor([0])]; int32 concat_12_axis_0 = const()[name = string("concat_12_axis_0"), val = int32(0)]; bool concat_12_interleave_0 = const()[name = string("concat_12_interleave_0"), val = bool(false)]; tensor concat_12 = concat(axis = concat_12_axis_0, interleave = concat_12_interleave_0, values = (expand_dims_14, concat_12_values1_0, var_426, concat_12_values3_0))[name = string("concat_12")]; tensor key_cache_internal_tensor_assign_2_stride_0 = const()[name = string("key_cache_internal_tensor_assign_2_stride_0"), val = tensor([1, 1, 1, 1])]; tensor key_cache_internal_tensor_assign_2_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_2_begin_mask_0"), val = tensor([false, false, false, true])]; tensor key_cache_internal_tensor_assign_2_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_2_end_mask_0"), val = tensor([false, true, false, true])]; tensor key_cache_internal_tensor_assign_2_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_2_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor key_cache_internal_tensor_assign_2_cast_fp16 = slice_update(begin = concat_11, begin_mask = key_cache_internal_tensor_assign_2_begin_mask_0, end = concat_12, end_mask = key_cache_internal_tensor_assign_2_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_2_squeeze_mask_0, stride = key_cache_internal_tensor_assign_2_stride_0, update = key_states_7_cast_fp16, x = coreml_update_state_48)[name = string("key_cache_internal_tensor_assign_2_cast_fp16")]; write_state(data = key_cache_internal_tensor_assign_2_cast_fp16, input = key_cache)[name = string("coreml_update_state_120_write_state")]; tensor coreml_update_state_50 = read_state(input = key_cache)[name = string("coreml_update_state_120")]; tensor value_cache_internal_tensor_assign_2_stride_0 = const()[name = string("value_cache_internal_tensor_assign_2_stride_0"), val = tensor([1, 1, 1, 1])]; tensor value_cache_internal_tensor_assign_2_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_2_begin_mask_0"), val = tensor([false, false, false, true])]; tensor value_cache_internal_tensor_assign_2_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_2_end_mask_0"), val = tensor([false, true, false, true])]; tensor value_cache_internal_tensor_assign_2_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_2_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor value_states_7_cast_fp16 = transpose(perm = value_states_7_perm_0, x = var_672_cast_fp16)[name = string("transpose_67")]; tensor value_cache_internal_tensor_assign_2_cast_fp16 = slice_update(begin = concat_11, begin_mask = value_cache_internal_tensor_assign_2_begin_mask_0, end = concat_12, end_mask = value_cache_internal_tensor_assign_2_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_2_squeeze_mask_0, stride = value_cache_internal_tensor_assign_2_stride_0, update = value_states_7_cast_fp16, x = coreml_update_state_49)[name = string("value_cache_internal_tensor_assign_2_cast_fp16")]; write_state(data = value_cache_internal_tensor_assign_2_cast_fp16, input = value_cache)[name = string("coreml_update_state_121_write_state")]; tensor coreml_update_state_51 = read_state(input = value_cache)[name = string("coreml_update_state_121")]; tensor var_736_begin_0 = const()[name = string("op_736_begin_0"), val = tensor([1, 0, 0, 0])]; tensor var_736_end_0 = const()[name = string("op_736_end_0"), val = tensor([2, 2, 2048, 64])]; tensor var_736_end_mask_0 = const()[name = string("op_736_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_736_cast_fp16 = slice_by_index(begin = var_736_begin_0, end = var_736_end_0, end_mask = var_736_end_mask_0, x = coreml_update_state_50)[name = string("op_736_cast_fp16")]; tensor tile_2 = const()[name = string("tile_2"), val = tensor([1, 1])]; int32 var_739_axis_0 = const()[name = string("op_739_axis_0"), val = int32(1)]; tensor var_739_cast_fp16_0, tensor var_739_cast_fp16_1 = split(axis = var_739_axis_0, split_sizes = tile_2, x = var_736_cast_fp16)[name = string("op_739_cast_fp16")]; tensor var_746_begin_0 = const()[name = string("op_746_begin_0"), val = tensor([1, 0, 0, 0])]; tensor var_746_end_0 = const()[name = string("op_746_end_0"), val = tensor([2, 2, 2048, 64])]; tensor var_746_end_mask_0 = const()[name = string("op_746_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_746_cast_fp16 = slice_by_index(begin = var_746_begin_0, end = var_746_end_0, end_mask = var_746_end_mask_0, x = coreml_update_state_51)[name = string("op_746_cast_fp16")]; tensor tile_3 = const()[name = string("tile_3"), val = tensor([1, 1])]; int32 var_749_axis_0 = const()[name = string("op_749_axis_0"), val = int32(1)]; tensor var_749_cast_fp16_0, tensor var_749_cast_fp16_1 = split(axis = var_749_axis_0, split_sizes = tile_3, x = var_746_cast_fp16)[name = string("op_749_cast_fp16")]; tensor var_752_split_sizes_0 = const()[name = string("op_752_split_sizes_0"), val = tensor([8, 8])]; int32 var_752_axis_0 = const()[name = string("op_752_axis_0"), val = int32(1)]; tensor var_752_cast_fp16_0, tensor var_752_cast_fp16_1 = split(axis = var_752_axis_0, split_sizes = var_752_split_sizes_0, x = query_states_7_cast_fp16)[name = string("op_752_cast_fp16")]; bool attn_weights_17_transpose_x_0 = const()[name = string("attn_weights_17_transpose_x_0"), val = bool(false)]; bool attn_weights_17_transpose_y_0 = const()[name = string("attn_weights_17_transpose_y_0"), val = bool(false)]; tensor attn_weights_17_cast_fp16 = matmul(transpose_x = attn_weights_17_transpose_x_0, transpose_y = attn_weights_17_transpose_y_0, x = var_739_cast_fp16_0, y = var_752_cast_fp16_0)[name = string("attn_weights_17_cast_fp16")]; fp16 _inversed_attn_weights_19_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_19_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_19_cast_fp16 = mul(x = attn_weights_17_cast_fp16, y = _inversed_attn_weights_19_y_0_to_fp16)[name = string("_inversed_attn_weights_19_cast_fp16")]; tensor attn_weights_21_cast_fp16 = add(x = _inversed_attn_weights_19_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_21_cast_fp16")]; int32 var_759 = const()[name = string("op_759"), val = int32(2)]; tensor attn_weights_23_cast_fp16 = softmax(axis = var_759, x = attn_weights_21_cast_fp16)[name = string("attn_weights_23_cast_fp16")]; bool var_765_transpose_x_1 = const()[name = string("op_765_transpose_x_1"), val = bool(true)]; bool var_765_transpose_y_1 = const()[name = string("op_765_transpose_y_1"), val = bool(false)]; tensor var_765_cast_fp16 = matmul(transpose_x = var_765_transpose_x_1, transpose_y = var_765_transpose_y_1, x = attn_weights_23_cast_fp16, y = var_749_cast_fp16_0)[name = string("op_765_cast_fp16")]; bool attn_weights_25_transpose_x_0 = const()[name = string("attn_weights_25_transpose_x_0"), val = bool(false)]; bool attn_weights_25_transpose_y_0 = const()[name = string("attn_weights_25_transpose_y_0"), val = bool(false)]; tensor attn_weights_25_cast_fp16 = matmul(transpose_x = attn_weights_25_transpose_x_0, transpose_y = attn_weights_25_transpose_y_0, x = var_739_cast_fp16_1, y = var_752_cast_fp16_1)[name = string("attn_weights_25_cast_fp16")]; fp16 _inversed_attn_weights_27_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_27_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_27_cast_fp16 = mul(x = attn_weights_25_cast_fp16, y = _inversed_attn_weights_27_y_0_to_fp16)[name = string("_inversed_attn_weights_27_cast_fp16")]; tensor attn_weights_29_cast_fp16 = add(x = _inversed_attn_weights_27_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_29_cast_fp16")]; int32 var_771 = const()[name = string("op_771"), val = int32(2)]; tensor attn_weights_31_cast_fp16 = softmax(axis = var_771, x = attn_weights_29_cast_fp16)[name = string("attn_weights_31_cast_fp16")]; bool attn_output_7_transpose_x_1 = const()[name = string("attn_output_7_transpose_x_1"), val = bool(true)]; bool attn_output_7_transpose_y_1 = const()[name = string("attn_output_7_transpose_y_1"), val = bool(false)]; tensor attn_output_7_cast_fp16 = matmul(transpose_x = attn_output_7_transpose_x_1, transpose_y = attn_output_7_transpose_y_1, x = attn_weights_31_cast_fp16, y = var_749_cast_fp16_1)[name = string("attn_output_7_cast_fp16")]; int32 var_779 = const()[name = string("op_779"), val = int32(1)]; bool attn_output_9_interleave_0 = const()[name = string("attn_output_9_interleave_0"), val = bool(false)]; tensor attn_output_9_cast_fp16 = concat(axis = var_779, interleave = attn_output_9_interleave_0, values = (var_765_cast_fp16, attn_output_7_cast_fp16))[name = string("attn_output_9_cast_fp16")]; tensor var_783_perm_0 = const()[name = string("op_783_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_788 = const()[name = string("op_788"), val = tensor([1, 1024, 1, 128])]; tensor var_783_cast_fp16 = transpose(perm = var_783_perm_0, x = attn_output_9_cast_fp16)[name = string("transpose_66")]; tensor x_23_cast_fp16 = reshape(shape = var_788, x = var_783_cast_fp16)[name = string("x_23_cast_fp16")]; string hidden_states_9_pad_type_0 = const()[name = string("hidden_states_9_pad_type_0"), val = string("valid")]; tensor hidden_states_9_strides_0 = const()[name = string("hidden_states_9_strides_0"), val = tensor([1, 1])]; tensor hidden_states_9_pad_0 = const()[name = string("hidden_states_9_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_9_dilations_0 = const()[name = string("hidden_states_9_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_9_groups_0 = const()[name = string("hidden_states_9_groups_0"), val = int32(1)]; tensor var_795_to_fp16 = const()[name = string("op_795_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(40920192)))]; tensor hidden_states_9_cast_fp16 = conv(dilations = hidden_states_9_dilations_0, groups = hidden_states_9_groups_0, pad = hidden_states_9_pad_0, pad_type = hidden_states_9_pad_type_0, strides = hidden_states_9_strides_0, weight = var_795_to_fp16, x = x_23_cast_fp16)[name = string("hidden_states_9_cast_fp16")]; tensor x_25_cast_fp16 = add(x = x_17_cast_fp16, y = hidden_states_9_cast_fp16)[name = string("x_25_cast_fp16")]; int32 var_807 = const()[name = string("op_807"), val = int32(1)]; fp16 const_23_promoted_to_fp16 = const()[name = string("const_23_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_810_cast_fp16 = mul(x = x_25_cast_fp16, y = const_23_promoted_to_fp16)[name = string("op_810_cast_fp16")]; bool x_27_interleave_0 = const()[name = string("x_27_interleave_0"), val = bool(false)]; tensor x_27_cast_fp16 = concat(axis = var_807, interleave = x_27_interleave_0, values = (x_25_cast_fp16, var_810_cast_fp16))[name = string("x_27_cast_fp16")]; tensor out_19_axes_0 = const()[name = string("out_19_axes_0"), val = tensor([1])]; fp16 var_820_to_fp16 = const()[name = string("op_820_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_19_cast_fp16 = layer_norm(axes = out_19_axes_0, epsilon = var_820_to_fp16, x = x_27_cast_fp16)[name = string("out_19_cast_fp16")]; tensor layer_layers_1_post_attention_layernorm_weight_to_fp16 = const()[name = string("layer_layers_1_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(43017408)))]; tensor out_21_cast_fp16 = mul(x = out_19_cast_fp16, y = layer_layers_1_post_attention_layernorm_weight_to_fp16)[name = string("out_21_cast_fp16")]; tensor var_826_split_sizes_0 = const()[name = string("op_826_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_826_axis_0 = const()[name = string("op_826_axis_0"), val = int32(1)]; tensor var_826_cast_fp16_0, tensor var_826_cast_fp16_1 = split(axis = var_826_axis_0, split_sizes = var_826_split_sizes_0, x = out_21_cast_fp16)[name = string("op_826_cast_fp16")]; string input_3_pad_type_0 = const()[name = string("input_3_pad_type_0"), val = string("valid")]; tensor input_3_strides_0 = const()[name = string("input_3_strides_0"), val = tensor([1, 1])]; tensor input_3_pad_0 = const()[name = string("input_3_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_3_dilations_0 = const()[name = string("input_3_dilations_0"), val = tensor([1, 1])]; int32 input_3_groups_0 = const()[name = string("input_3_groups_0"), val = int32(1)]; tensor var_831_to_fp16 = const()[name = string("op_831_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(43021568)))]; tensor input_3_cast_fp16 = conv(dilations = input_3_dilations_0, groups = input_3_groups_0, pad = input_3_pad_0, pad_type = input_3_pad_type_0, strides = input_3_strides_0, weight = var_831_to_fp16, x = var_826_cast_fp16_0)[name = string("input_3_cast_fp16")]; tensor var_842_cast_fp16 = silu(x = input_3_cast_fp16)[name = string("op_842_cast_fp16")]; string var_847_pad_type_0 = const()[name = string("op_847_pad_type_0"), val = string("valid")]; tensor var_847_strides_0 = const()[name = string("op_847_strides_0"), val = tensor([1, 1])]; tensor var_847_pad_0 = const()[name = string("op_847_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_847_dilations_0 = const()[name = string("op_847_dilations_0"), val = tensor([1, 1])]; int32 var_847_groups_0 = const()[name = string("op_847_groups_0"), val = int32(1)]; tensor var_830_to_fp16 = const()[name = string("op_830_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(51410240)))]; tensor var_847_cast_fp16 = conv(dilations = var_847_dilations_0, groups = var_847_groups_0, pad = var_847_pad_0, pad_type = var_847_pad_type_0, strides = var_847_strides_0, weight = var_830_to_fp16, x = var_826_cast_fp16_0)[name = string("op_847_cast_fp16")]; tensor x_33_cast_fp16 = mul(x = var_842_cast_fp16, y = var_847_cast_fp16)[name = string("x_33_cast_fp16")]; string hidden_states_11_pad_type_0 = const()[name = string("hidden_states_11_pad_type_0"), val = string("valid")]; tensor hidden_states_11_strides_0 = const()[name = string("hidden_states_11_strides_0"), val = tensor([1, 1])]; tensor hidden_states_11_pad_0 = const()[name = string("hidden_states_11_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_11_dilations_0 = const()[name = string("hidden_states_11_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_11_groups_0 = const()[name = string("hidden_states_11_groups_0"), val = int32(1)]; tensor var_829_to_fp16 = const()[name = string("op_829_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(59798912)))]; tensor hidden_states_11_cast_fp16 = conv(dilations = hidden_states_11_dilations_0, groups = hidden_states_11_groups_0, pad = hidden_states_11_pad_0, pad_type = hidden_states_11_pad_type_0, strides = hidden_states_11_strides_0, weight = var_829_to_fp16, x = x_33_cast_fp16)[name = string("hidden_states_11_cast_fp16")]; tensor x_35_cast_fp16 = add(x = x_25_cast_fp16, y = hidden_states_11_cast_fp16)[name = string("x_35_cast_fp16")]; int32 var_860 = const()[name = string("op_860"), val = int32(1)]; fp16 const_24_promoted_to_fp16 = const()[name = string("const_24_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_863_cast_fp16 = mul(x = x_35_cast_fp16, y = const_24_promoted_to_fp16)[name = string("op_863_cast_fp16")]; bool x_37_interleave_0 = const()[name = string("x_37_interleave_0"), val = bool(false)]; tensor x_37_cast_fp16 = concat(axis = var_860, interleave = x_37_interleave_0, values = (x_35_cast_fp16, var_863_cast_fp16))[name = string("x_37_cast_fp16")]; tensor out_25_axes_0 = const()[name = string("out_25_axes_0"), val = tensor([1])]; fp16 var_873_to_fp16 = const()[name = string("op_873_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_25_cast_fp16 = layer_norm(axes = out_25_axes_0, epsilon = var_873_to_fp16, x = x_37_cast_fp16)[name = string("out_25_cast_fp16")]; tensor layer_layers_2_input_layernorm_weight_to_fp16 = const()[name = string("layer_layers_2_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(68187584)))]; tensor out_27_cast_fp16 = mul(x = out_25_cast_fp16, y = layer_layers_2_input_layernorm_weight_to_fp16)[name = string("out_27_cast_fp16")]; tensor var_879_split_sizes_0 = const()[name = string("op_879_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_879_axis_0 = const()[name = string("op_879_axis_0"), val = int32(1)]; tensor var_879_cast_fp16_0, tensor var_879_cast_fp16_1 = split(axis = var_879_axis_0, split_sizes = var_879_split_sizes_0, x = out_27_cast_fp16)[name = string("op_879_cast_fp16")]; string query_states_9_pad_type_0 = const()[name = string("query_states_9_pad_type_0"), val = string("valid")]; tensor query_states_9_strides_0 = const()[name = string("query_states_9_strides_0"), val = tensor([1, 1])]; tensor query_states_9_pad_0 = const()[name = string("query_states_9_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_states_9_dilations_0 = const()[name = string("query_states_9_dilations_0"), val = tensor([1, 1])]; int32 query_states_9_groups_0 = const()[name = string("query_states_9_groups_0"), val = int32(1)]; tensor var_901_to_fp16 = const()[name = string("op_901_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(68191744)))]; tensor query_states_9_cast_fp16 = conv(dilations = query_states_9_dilations_0, groups = query_states_9_groups_0, pad = query_states_9_pad_0, pad_type = query_states_9_pad_type_0, strides = query_states_9_strides_0, weight = var_901_to_fp16, x = var_879_cast_fp16_0)[name = string("query_states_9_cast_fp16")]; string key_states_9_pad_type_0 = const()[name = string("key_states_9_pad_type_0"), val = string("valid")]; tensor key_states_9_strides_0 = const()[name = string("key_states_9_strides_0"), val = tensor([1, 1])]; tensor key_states_9_pad_0 = const()[name = string("key_states_9_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_states_9_dilations_0 = const()[name = string("key_states_9_dilations_0"), val = tensor([1, 1])]; int32 key_states_9_groups_0 = const()[name = string("key_states_9_groups_0"), val = int32(1)]; tensor var_912_to_fp16 = const()[name = string("op_912_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(70288960)))]; tensor key_states_9_cast_fp16 = conv(dilations = key_states_9_dilations_0, groups = key_states_9_groups_0, pad = key_states_9_pad_0, pad_type = key_states_9_pad_type_0, strides = key_states_9_strides_0, weight = var_912_to_fp16, x = var_879_cast_fp16_0)[name = string("key_states_9_cast_fp16")]; string value_states_9_pad_type_0 = const()[name = string("value_states_9_pad_type_0"), val = string("valid")]; tensor value_states_9_strides_0 = const()[name = string("value_states_9_strides_0"), val = tensor([1, 1])]; tensor value_states_9_pad_0 = const()[name = string("value_states_9_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_states_9_dilations_0 = const()[name = string("value_states_9_dilations_0"), val = tensor([1, 1])]; int32 value_states_9_groups_0 = const()[name = string("value_states_9_groups_0"), val = int32(1)]; tensor var_923_to_fp16 = const()[name = string("op_923_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(70551168)))]; tensor value_states_9_cast_fp16 = conv(dilations = value_states_9_dilations_0, groups = value_states_9_groups_0, pad = value_states_9_pad_0, pad_type = value_states_9_pad_type_0, strides = value_states_9_strides_0, weight = var_923_to_fp16, x = var_879_cast_fp16_0)[name = string("value_states_9_cast_fp16")]; tensor var_931 = const()[name = string("op_931"), val = tensor([1, 16, 64, 128])]; tensor embed_9_cast_fp16 = reshape(shape = var_931, x = query_states_9_cast_fp16)[name = string("embed_9_cast_fp16")]; tensor var_935 = const()[name = string("op_935"), val = tensor([1, 2, 64, 128])]; tensor var_936_cast_fp16 = reshape(shape = var_935, x = key_states_9_cast_fp16)[name = string("op_936_cast_fp16")]; tensor embed_11_perm_0 = const()[name = string("embed_11_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_942 = const()[name = string("op_942"), val = tensor([1, 2, 64, 128])]; tensor var_943_cast_fp16 = reshape(shape = var_942, x = value_states_9_cast_fp16)[name = string("op_943_cast_fp16")]; tensor value_states_11_perm_0 = const()[name = string("value_states_11_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_947_cast_fp16 = mul(x = embed_9_cast_fp16, y = cos_1_cast_fp16)[name = string("op_947_cast_fp16")]; tensor var_948_split_sizes_0 = const()[name = string("op_948_split_sizes_0"), val = tensor([32, 32])]; int32 var_948_axis_0 = const()[name = string("op_948_axis_0"), val = int32(-2)]; tensor var_948_cast_fp16_0, tensor var_948_cast_fp16_1 = split(axis = var_948_axis_0, split_sizes = var_948_split_sizes_0, x = embed_9_cast_fp16)[name = string("op_948_cast_fp16")]; fp16 const_27_promoted_to_fp16 = const()[name = string("const_27_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_950_cast_fp16 = mul(x = var_948_cast_fp16_1, y = const_27_promoted_to_fp16)[name = string("op_950_cast_fp16")]; int32 var_952 = const()[name = string("op_952"), val = int32(-2)]; bool var_953_interleave_0 = const()[name = string("op_953_interleave_0"), val = bool(false)]; tensor var_953_cast_fp16 = concat(axis = var_952, interleave = var_953_interleave_0, values = (var_950_cast_fp16, var_948_cast_fp16_0))[name = string("op_953_cast_fp16")]; tensor var_954_cast_fp16 = mul(x = var_953_cast_fp16, y = sin_1_cast_fp16)[name = string("op_954_cast_fp16")]; tensor query_states_11_cast_fp16 = add(x = var_947_cast_fp16, y = var_954_cast_fp16)[name = string("query_states_11_cast_fp16")]; tensor embed_11_cast_fp16 = transpose(perm = embed_11_perm_0, x = var_936_cast_fp16)[name = string("transpose_65")]; tensor var_957_cast_fp16 = mul(x = embed_11_cast_fp16, y = cos_cast_fp16)[name = string("op_957_cast_fp16")]; tensor var_958_split_sizes_0 = const()[name = string("op_958_split_sizes_0"), val = tensor([32, 32])]; int32 var_958_axis_0 = const()[name = string("op_958_axis_0"), val = int32(-1)]; tensor var_958_cast_fp16_0, tensor var_958_cast_fp16_1 = split(axis = var_958_axis_0, split_sizes = var_958_split_sizes_0, x = embed_11_cast_fp16)[name = string("op_958_cast_fp16")]; fp16 const_28_promoted_to_fp16 = const()[name = string("const_28_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_960_cast_fp16 = mul(x = var_958_cast_fp16_1, y = const_28_promoted_to_fp16)[name = string("op_960_cast_fp16")]; int32 var_962 = const()[name = string("op_962"), val = int32(-1)]; bool var_963_interleave_0 = const()[name = string("op_963_interleave_0"), val = bool(false)]; tensor var_963_cast_fp16 = concat(axis = var_962, interleave = var_963_interleave_0, values = (var_960_cast_fp16, var_958_cast_fp16_0))[name = string("op_963_cast_fp16")]; tensor var_964_cast_fp16 = mul(x = var_963_cast_fp16, y = sin_cast_fp16)[name = string("op_964_cast_fp16")]; tensor key_states_11_cast_fp16 = add(x = var_957_cast_fp16, y = var_964_cast_fp16)[name = string("key_states_11_cast_fp16")]; tensor expand_dims_21 = const()[name = string("expand_dims_21"), val = tensor([2])]; tensor expand_dims_22 = const()[name = string("expand_dims_22"), val = tensor([0])]; tensor expand_dims_24 = const()[name = string("expand_dims_24"), val = tensor([3])]; tensor concat_19_values3_0 = const()[name = string("concat_19_values3_0"), val = tensor([0])]; int32 concat_19_axis_0 = const()[name = string("concat_19_axis_0"), val = int32(0)]; bool concat_19_interleave_0 = const()[name = string("concat_19_interleave_0"), val = bool(false)]; tensor concat_19 = concat(axis = concat_19_axis_0, interleave = concat_19_interleave_0, values = (expand_dims_21, expand_dims_22, position_id, concat_19_values3_0))[name = string("concat_19")]; tensor concat_20_values1_0 = const()[name = string("concat_20_values1_0"), val = tensor([0])]; tensor concat_20_values3_0 = const()[name = string("concat_20_values3_0"), val = tensor([0])]; int32 concat_20_axis_0 = const()[name = string("concat_20_axis_0"), val = int32(0)]; bool concat_20_interleave_0 = const()[name = string("concat_20_interleave_0"), val = bool(false)]; tensor concat_20 = concat(axis = concat_20_axis_0, interleave = concat_20_interleave_0, values = (expand_dims_24, concat_20_values1_0, var_426, concat_20_values3_0))[name = string("concat_20")]; tensor key_cache_internal_tensor_assign_3_stride_0 = const()[name = string("key_cache_internal_tensor_assign_3_stride_0"), val = tensor([1, 1, 1, 1])]; tensor key_cache_internal_tensor_assign_3_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_3_begin_mask_0"), val = tensor([false, false, false, true])]; tensor key_cache_internal_tensor_assign_3_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_3_end_mask_0"), val = tensor([false, true, false, true])]; tensor key_cache_internal_tensor_assign_3_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_3_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor key_cache_internal_tensor_assign_3_cast_fp16 = slice_update(begin = concat_19, begin_mask = key_cache_internal_tensor_assign_3_begin_mask_0, end = concat_20, end_mask = key_cache_internal_tensor_assign_3_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_3_squeeze_mask_0, stride = key_cache_internal_tensor_assign_3_stride_0, update = key_states_11_cast_fp16, x = coreml_update_state_50)[name = string("key_cache_internal_tensor_assign_3_cast_fp16")]; write_state(data = key_cache_internal_tensor_assign_3_cast_fp16, input = key_cache)[name = string("coreml_update_state_122_write_state")]; tensor coreml_update_state_52 = read_state(input = key_cache)[name = string("coreml_update_state_122")]; tensor value_cache_internal_tensor_assign_3_stride_0 = const()[name = string("value_cache_internal_tensor_assign_3_stride_0"), val = tensor([1, 1, 1, 1])]; tensor value_cache_internal_tensor_assign_3_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_3_begin_mask_0"), val = tensor([false, false, false, true])]; tensor value_cache_internal_tensor_assign_3_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_3_end_mask_0"), val = tensor([false, true, false, true])]; tensor value_cache_internal_tensor_assign_3_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_3_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor value_states_11_cast_fp16 = transpose(perm = value_states_11_perm_0, x = var_943_cast_fp16)[name = string("transpose_64")]; tensor value_cache_internal_tensor_assign_3_cast_fp16 = slice_update(begin = concat_19, begin_mask = value_cache_internal_tensor_assign_3_begin_mask_0, end = concat_20, end_mask = value_cache_internal_tensor_assign_3_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_3_squeeze_mask_0, stride = value_cache_internal_tensor_assign_3_stride_0, update = value_states_11_cast_fp16, x = coreml_update_state_51)[name = string("value_cache_internal_tensor_assign_3_cast_fp16")]; write_state(data = value_cache_internal_tensor_assign_3_cast_fp16, input = value_cache)[name = string("coreml_update_state_123_write_state")]; tensor coreml_update_state_53 = read_state(input = value_cache)[name = string("coreml_update_state_123")]; tensor var_1007_begin_0 = const()[name = string("op_1007_begin_0"), val = tensor([2, 0, 0, 0])]; tensor var_1007_end_0 = const()[name = string("op_1007_end_0"), val = tensor([3, 2, 2048, 64])]; tensor var_1007_end_mask_0 = const()[name = string("op_1007_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_1007_cast_fp16 = slice_by_index(begin = var_1007_begin_0, end = var_1007_end_0, end_mask = var_1007_end_mask_0, x = coreml_update_state_52)[name = string("op_1007_cast_fp16")]; tensor tile_4 = const()[name = string("tile_4"), val = tensor([1, 1])]; int32 var_1010_axis_0 = const()[name = string("op_1010_axis_0"), val = int32(1)]; tensor var_1010_cast_fp16_0, tensor var_1010_cast_fp16_1 = split(axis = var_1010_axis_0, split_sizes = tile_4, x = var_1007_cast_fp16)[name = string("op_1010_cast_fp16")]; tensor var_1017_begin_0 = const()[name = string("op_1017_begin_0"), val = tensor([2, 0, 0, 0])]; tensor var_1017_end_0 = const()[name = string("op_1017_end_0"), val = tensor([3, 2, 2048, 64])]; tensor var_1017_end_mask_0 = const()[name = string("op_1017_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_1017_cast_fp16 = slice_by_index(begin = var_1017_begin_0, end = var_1017_end_0, end_mask = var_1017_end_mask_0, x = coreml_update_state_53)[name = string("op_1017_cast_fp16")]; tensor tile_5 = const()[name = string("tile_5"), val = tensor([1, 1])]; int32 var_1020_axis_0 = const()[name = string("op_1020_axis_0"), val = int32(1)]; tensor var_1020_cast_fp16_0, tensor var_1020_cast_fp16_1 = split(axis = var_1020_axis_0, split_sizes = tile_5, x = var_1017_cast_fp16)[name = string("op_1020_cast_fp16")]; tensor var_1023_split_sizes_0 = const()[name = string("op_1023_split_sizes_0"), val = tensor([8, 8])]; int32 var_1023_axis_0 = const()[name = string("op_1023_axis_0"), val = int32(1)]; tensor var_1023_cast_fp16_0, tensor var_1023_cast_fp16_1 = split(axis = var_1023_axis_0, split_sizes = var_1023_split_sizes_0, x = query_states_11_cast_fp16)[name = string("op_1023_cast_fp16")]; bool attn_weights_33_transpose_x_0 = const()[name = string("attn_weights_33_transpose_x_0"), val = bool(false)]; bool attn_weights_33_transpose_y_0 = const()[name = string("attn_weights_33_transpose_y_0"), val = bool(false)]; tensor attn_weights_33_cast_fp16 = matmul(transpose_x = attn_weights_33_transpose_x_0, transpose_y = attn_weights_33_transpose_y_0, x = var_1010_cast_fp16_0, y = var_1023_cast_fp16_0)[name = string("attn_weights_33_cast_fp16")]; fp16 _inversed_attn_weights_35_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_35_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_35_cast_fp16 = mul(x = attn_weights_33_cast_fp16, y = _inversed_attn_weights_35_y_0_to_fp16)[name = string("_inversed_attn_weights_35_cast_fp16")]; tensor attn_weights_37_cast_fp16 = add(x = _inversed_attn_weights_35_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_37_cast_fp16")]; int32 var_1030 = const()[name = string("op_1030"), val = int32(2)]; tensor attn_weights_39_cast_fp16 = softmax(axis = var_1030, x = attn_weights_37_cast_fp16)[name = string("attn_weights_39_cast_fp16")]; bool var_1036_transpose_x_1 = const()[name = string("op_1036_transpose_x_1"), val = bool(true)]; bool var_1036_transpose_y_1 = const()[name = string("op_1036_transpose_y_1"), val = bool(false)]; tensor var_1036_cast_fp16 = matmul(transpose_x = var_1036_transpose_x_1, transpose_y = var_1036_transpose_y_1, x = attn_weights_39_cast_fp16, y = var_1020_cast_fp16_0)[name = string("op_1036_cast_fp16")]; bool attn_weights_41_transpose_x_0 = const()[name = string("attn_weights_41_transpose_x_0"), val = bool(false)]; bool attn_weights_41_transpose_y_0 = const()[name = string("attn_weights_41_transpose_y_0"), val = bool(false)]; tensor attn_weights_41_cast_fp16 = matmul(transpose_x = attn_weights_41_transpose_x_0, transpose_y = attn_weights_41_transpose_y_0, x = var_1010_cast_fp16_1, y = var_1023_cast_fp16_1)[name = string("attn_weights_41_cast_fp16")]; fp16 _inversed_attn_weights_43_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_43_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_43_cast_fp16 = mul(x = attn_weights_41_cast_fp16, y = _inversed_attn_weights_43_y_0_to_fp16)[name = string("_inversed_attn_weights_43_cast_fp16")]; tensor attn_weights_45_cast_fp16 = add(x = _inversed_attn_weights_43_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_45_cast_fp16")]; int32 var_1042 = const()[name = string("op_1042"), val = int32(2)]; tensor attn_weights_47_cast_fp16 = softmax(axis = var_1042, x = attn_weights_45_cast_fp16)[name = string("attn_weights_47_cast_fp16")]; bool attn_output_13_transpose_x_1 = const()[name = string("attn_output_13_transpose_x_1"), val = bool(true)]; bool attn_output_13_transpose_y_1 = const()[name = string("attn_output_13_transpose_y_1"), val = bool(false)]; tensor attn_output_13_cast_fp16 = matmul(transpose_x = attn_output_13_transpose_x_1, transpose_y = attn_output_13_transpose_y_1, x = attn_weights_47_cast_fp16, y = var_1020_cast_fp16_1)[name = string("attn_output_13_cast_fp16")]; int32 var_1050 = const()[name = string("op_1050"), val = int32(1)]; bool attn_output_15_interleave_0 = const()[name = string("attn_output_15_interleave_0"), val = bool(false)]; tensor attn_output_15_cast_fp16 = concat(axis = var_1050, interleave = attn_output_15_interleave_0, values = (var_1036_cast_fp16, attn_output_13_cast_fp16))[name = string("attn_output_15_cast_fp16")]; tensor var_1054_perm_0 = const()[name = string("op_1054_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_1059 = const()[name = string("op_1059"), val = tensor([1, 1024, 1, 128])]; tensor var_1054_cast_fp16 = transpose(perm = var_1054_perm_0, x = attn_output_15_cast_fp16)[name = string("transpose_63")]; tensor x_41_cast_fp16 = reshape(shape = var_1059, x = var_1054_cast_fp16)[name = string("x_41_cast_fp16")]; string hidden_states_15_pad_type_0 = const()[name = string("hidden_states_15_pad_type_0"), val = string("valid")]; tensor hidden_states_15_strides_0 = const()[name = string("hidden_states_15_strides_0"), val = tensor([1, 1])]; tensor hidden_states_15_pad_0 = const()[name = string("hidden_states_15_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_15_dilations_0 = const()[name = string("hidden_states_15_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_15_groups_0 = const()[name = string("hidden_states_15_groups_0"), val = int32(1)]; tensor var_1066_to_fp16 = const()[name = string("op_1066_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(70813376)))]; tensor hidden_states_15_cast_fp16 = conv(dilations = hidden_states_15_dilations_0, groups = hidden_states_15_groups_0, pad = hidden_states_15_pad_0, pad_type = hidden_states_15_pad_type_0, strides = hidden_states_15_strides_0, weight = var_1066_to_fp16, x = x_41_cast_fp16)[name = string("hidden_states_15_cast_fp16")]; tensor x_43_cast_fp16 = add(x = x_35_cast_fp16, y = hidden_states_15_cast_fp16)[name = string("x_43_cast_fp16")]; int32 var_1078 = const()[name = string("op_1078"), val = int32(1)]; fp16 const_33_promoted_to_fp16 = const()[name = string("const_33_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_1081_cast_fp16 = mul(x = x_43_cast_fp16, y = const_33_promoted_to_fp16)[name = string("op_1081_cast_fp16")]; bool x_45_interleave_0 = const()[name = string("x_45_interleave_0"), val = bool(false)]; tensor x_45_cast_fp16 = concat(axis = var_1078, interleave = x_45_interleave_0, values = (x_43_cast_fp16, var_1081_cast_fp16))[name = string("x_45_cast_fp16")]; tensor out_31_axes_0 = const()[name = string("out_31_axes_0"), val = tensor([1])]; fp16 var_1091_to_fp16 = const()[name = string("op_1091_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_31_cast_fp16 = layer_norm(axes = out_31_axes_0, epsilon = var_1091_to_fp16, x = x_45_cast_fp16)[name = string("out_31_cast_fp16")]; tensor layer_layers_2_post_attention_layernorm_weight_to_fp16 = const()[name = string("layer_layers_2_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(72910592)))]; tensor out_33_cast_fp16 = mul(x = out_31_cast_fp16, y = layer_layers_2_post_attention_layernorm_weight_to_fp16)[name = string("out_33_cast_fp16")]; tensor var_1097_split_sizes_0 = const()[name = string("op_1097_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_1097_axis_0 = const()[name = string("op_1097_axis_0"), val = int32(1)]; tensor var_1097_cast_fp16_0, tensor var_1097_cast_fp16_1 = split(axis = var_1097_axis_0, split_sizes = var_1097_split_sizes_0, x = out_33_cast_fp16)[name = string("op_1097_cast_fp16")]; string input_5_pad_type_0 = const()[name = string("input_5_pad_type_0"), val = string("valid")]; tensor input_5_strides_0 = const()[name = string("input_5_strides_0"), val = tensor([1, 1])]; tensor input_5_pad_0 = const()[name = string("input_5_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_5_dilations_0 = const()[name = string("input_5_dilations_0"), val = tensor([1, 1])]; int32 input_5_groups_0 = const()[name = string("input_5_groups_0"), val = int32(1)]; tensor var_1102_to_fp16 = const()[name = string("op_1102_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(72914752)))]; tensor input_5_cast_fp16 = conv(dilations = input_5_dilations_0, groups = input_5_groups_0, pad = input_5_pad_0, pad_type = input_5_pad_type_0, strides = input_5_strides_0, weight = var_1102_to_fp16, x = var_1097_cast_fp16_0)[name = string("input_5_cast_fp16")]; tensor var_1113_cast_fp16 = silu(x = input_5_cast_fp16)[name = string("op_1113_cast_fp16")]; string var_1118_pad_type_0 = const()[name = string("op_1118_pad_type_0"), val = string("valid")]; tensor var_1118_strides_0 = const()[name = string("op_1118_strides_0"), val = tensor([1, 1])]; tensor var_1118_pad_0 = const()[name = string("op_1118_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_1118_dilations_0 = const()[name = string("op_1118_dilations_0"), val = tensor([1, 1])]; int32 var_1118_groups_0 = const()[name = string("op_1118_groups_0"), val = int32(1)]; tensor var_1101_to_fp16 = const()[name = string("op_1101_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(81303424)))]; tensor var_1118_cast_fp16 = conv(dilations = var_1118_dilations_0, groups = var_1118_groups_0, pad = var_1118_pad_0, pad_type = var_1118_pad_type_0, strides = var_1118_strides_0, weight = var_1101_to_fp16, x = var_1097_cast_fp16_0)[name = string("op_1118_cast_fp16")]; tensor x_51_cast_fp16 = mul(x = var_1113_cast_fp16, y = var_1118_cast_fp16)[name = string("x_51_cast_fp16")]; string hidden_states_17_pad_type_0 = const()[name = string("hidden_states_17_pad_type_0"), val = string("valid")]; tensor hidden_states_17_strides_0 = const()[name = string("hidden_states_17_strides_0"), val = tensor([1, 1])]; tensor hidden_states_17_pad_0 = const()[name = string("hidden_states_17_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_17_dilations_0 = const()[name = string("hidden_states_17_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_17_groups_0 = const()[name = string("hidden_states_17_groups_0"), val = int32(1)]; tensor var_1100_to_fp16 = const()[name = string("op_1100_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(89692096)))]; tensor hidden_states_17_cast_fp16 = conv(dilations = hidden_states_17_dilations_0, groups = hidden_states_17_groups_0, pad = hidden_states_17_pad_0, pad_type = hidden_states_17_pad_type_0, strides = hidden_states_17_strides_0, weight = var_1100_to_fp16, x = x_51_cast_fp16)[name = string("hidden_states_17_cast_fp16")]; tensor x_53_cast_fp16 = add(x = x_43_cast_fp16, y = hidden_states_17_cast_fp16)[name = string("x_53_cast_fp16")]; int32 var_1131 = const()[name = string("op_1131"), val = int32(1)]; fp16 const_34_promoted_to_fp16 = const()[name = string("const_34_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_1134_cast_fp16 = mul(x = x_53_cast_fp16, y = const_34_promoted_to_fp16)[name = string("op_1134_cast_fp16")]; bool x_55_interleave_0 = const()[name = string("x_55_interleave_0"), val = bool(false)]; tensor x_55_cast_fp16 = concat(axis = var_1131, interleave = x_55_interleave_0, values = (x_53_cast_fp16, var_1134_cast_fp16))[name = string("x_55_cast_fp16")]; tensor out_37_axes_0 = const()[name = string("out_37_axes_0"), val = tensor([1])]; fp16 var_1144_to_fp16 = const()[name = string("op_1144_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_37_cast_fp16 = layer_norm(axes = out_37_axes_0, epsilon = var_1144_to_fp16, x = x_55_cast_fp16)[name = string("out_37_cast_fp16")]; tensor layer_layers_3_input_layernorm_weight_to_fp16 = const()[name = string("layer_layers_3_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(98080768)))]; tensor out_39_cast_fp16 = mul(x = out_37_cast_fp16, y = layer_layers_3_input_layernorm_weight_to_fp16)[name = string("out_39_cast_fp16")]; tensor var_1150_split_sizes_0 = const()[name = string("op_1150_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_1150_axis_0 = const()[name = string("op_1150_axis_0"), val = int32(1)]; tensor var_1150_cast_fp16_0, tensor var_1150_cast_fp16_1 = split(axis = var_1150_axis_0, split_sizes = var_1150_split_sizes_0, x = out_39_cast_fp16)[name = string("op_1150_cast_fp16")]; string query_states_13_pad_type_0 = const()[name = string("query_states_13_pad_type_0"), val = string("valid")]; tensor query_states_13_strides_0 = const()[name = string("query_states_13_strides_0"), val = tensor([1, 1])]; tensor query_states_13_pad_0 = const()[name = string("query_states_13_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_states_13_dilations_0 = const()[name = string("query_states_13_dilations_0"), val = tensor([1, 1])]; int32 query_states_13_groups_0 = const()[name = string("query_states_13_groups_0"), val = int32(1)]; tensor var_1172_to_fp16 = const()[name = string("op_1172_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(98084928)))]; tensor query_states_13_cast_fp16 = conv(dilations = query_states_13_dilations_0, groups = query_states_13_groups_0, pad = query_states_13_pad_0, pad_type = query_states_13_pad_type_0, strides = query_states_13_strides_0, weight = var_1172_to_fp16, x = var_1150_cast_fp16_0)[name = string("query_states_13_cast_fp16")]; string key_states_13_pad_type_0 = const()[name = string("key_states_13_pad_type_0"), val = string("valid")]; tensor key_states_13_strides_0 = const()[name = string("key_states_13_strides_0"), val = tensor([1, 1])]; tensor key_states_13_pad_0 = const()[name = string("key_states_13_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_states_13_dilations_0 = const()[name = string("key_states_13_dilations_0"), val = tensor([1, 1])]; int32 key_states_13_groups_0 = const()[name = string("key_states_13_groups_0"), val = int32(1)]; tensor var_1183_to_fp16 = const()[name = string("op_1183_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(100182144)))]; tensor key_states_13_cast_fp16 = conv(dilations = key_states_13_dilations_0, groups = key_states_13_groups_0, pad = key_states_13_pad_0, pad_type = key_states_13_pad_type_0, strides = key_states_13_strides_0, weight = var_1183_to_fp16, x = var_1150_cast_fp16_0)[name = string("key_states_13_cast_fp16")]; string value_states_13_pad_type_0 = const()[name = string("value_states_13_pad_type_0"), val = string("valid")]; tensor value_states_13_strides_0 = const()[name = string("value_states_13_strides_0"), val = tensor([1, 1])]; tensor value_states_13_pad_0 = const()[name = string("value_states_13_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_states_13_dilations_0 = const()[name = string("value_states_13_dilations_0"), val = tensor([1, 1])]; int32 value_states_13_groups_0 = const()[name = string("value_states_13_groups_0"), val = int32(1)]; tensor var_1194_to_fp16 = const()[name = string("op_1194_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(100444352)))]; tensor value_states_13_cast_fp16 = conv(dilations = value_states_13_dilations_0, groups = value_states_13_groups_0, pad = value_states_13_pad_0, pad_type = value_states_13_pad_type_0, strides = value_states_13_strides_0, weight = var_1194_to_fp16, x = var_1150_cast_fp16_0)[name = string("value_states_13_cast_fp16")]; tensor var_1202 = const()[name = string("op_1202"), val = tensor([1, 16, 64, 128])]; tensor embed_13_cast_fp16 = reshape(shape = var_1202, x = query_states_13_cast_fp16)[name = string("embed_13_cast_fp16")]; tensor var_1206 = const()[name = string("op_1206"), val = tensor([1, 2, 64, 128])]; tensor var_1207_cast_fp16 = reshape(shape = var_1206, x = key_states_13_cast_fp16)[name = string("op_1207_cast_fp16")]; tensor embed_15_perm_0 = const()[name = string("embed_15_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_1213 = const()[name = string("op_1213"), val = tensor([1, 2, 64, 128])]; tensor var_1214_cast_fp16 = reshape(shape = var_1213, x = value_states_13_cast_fp16)[name = string("op_1214_cast_fp16")]; tensor value_states_15_perm_0 = const()[name = string("value_states_15_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_1218_cast_fp16 = mul(x = embed_13_cast_fp16, y = cos_1_cast_fp16)[name = string("op_1218_cast_fp16")]; tensor var_1219_split_sizes_0 = const()[name = string("op_1219_split_sizes_0"), val = tensor([32, 32])]; int32 var_1219_axis_0 = const()[name = string("op_1219_axis_0"), val = int32(-2)]; tensor var_1219_cast_fp16_0, tensor var_1219_cast_fp16_1 = split(axis = var_1219_axis_0, split_sizes = var_1219_split_sizes_0, x = embed_13_cast_fp16)[name = string("op_1219_cast_fp16")]; fp16 const_37_promoted_to_fp16 = const()[name = string("const_37_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_1221_cast_fp16 = mul(x = var_1219_cast_fp16_1, y = const_37_promoted_to_fp16)[name = string("op_1221_cast_fp16")]; int32 var_1223 = const()[name = string("op_1223"), val = int32(-2)]; bool var_1224_interleave_0 = const()[name = string("op_1224_interleave_0"), val = bool(false)]; tensor var_1224_cast_fp16 = concat(axis = var_1223, interleave = var_1224_interleave_0, values = (var_1221_cast_fp16, var_1219_cast_fp16_0))[name = string("op_1224_cast_fp16")]; tensor var_1225_cast_fp16 = mul(x = var_1224_cast_fp16, y = sin_1_cast_fp16)[name = string("op_1225_cast_fp16")]; tensor query_states_15_cast_fp16 = add(x = var_1218_cast_fp16, y = var_1225_cast_fp16)[name = string("query_states_15_cast_fp16")]; tensor embed_15_cast_fp16 = transpose(perm = embed_15_perm_0, x = var_1207_cast_fp16)[name = string("transpose_62")]; tensor var_1228_cast_fp16 = mul(x = embed_15_cast_fp16, y = cos_cast_fp16)[name = string("op_1228_cast_fp16")]; tensor var_1229_split_sizes_0 = const()[name = string("op_1229_split_sizes_0"), val = tensor([32, 32])]; int32 var_1229_axis_0 = const()[name = string("op_1229_axis_0"), val = int32(-1)]; tensor var_1229_cast_fp16_0, tensor var_1229_cast_fp16_1 = split(axis = var_1229_axis_0, split_sizes = var_1229_split_sizes_0, x = embed_15_cast_fp16)[name = string("op_1229_cast_fp16")]; fp16 const_38_promoted_to_fp16 = const()[name = string("const_38_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_1231_cast_fp16 = mul(x = var_1229_cast_fp16_1, y = const_38_promoted_to_fp16)[name = string("op_1231_cast_fp16")]; int32 var_1233 = const()[name = string("op_1233"), val = int32(-1)]; bool var_1234_interleave_0 = const()[name = string("op_1234_interleave_0"), val = bool(false)]; tensor var_1234_cast_fp16 = concat(axis = var_1233, interleave = var_1234_interleave_0, values = (var_1231_cast_fp16, var_1229_cast_fp16_0))[name = string("op_1234_cast_fp16")]; tensor var_1235_cast_fp16 = mul(x = var_1234_cast_fp16, y = sin_cast_fp16)[name = string("op_1235_cast_fp16")]; tensor key_states_15_cast_fp16 = add(x = var_1228_cast_fp16, y = var_1235_cast_fp16)[name = string("key_states_15_cast_fp16")]; tensor expand_dims_31 = const()[name = string("expand_dims_31"), val = tensor([3])]; tensor expand_dims_32 = const()[name = string("expand_dims_32"), val = tensor([0])]; tensor expand_dims_34 = const()[name = string("expand_dims_34"), val = tensor([4])]; tensor concat_27_values3_0 = const()[name = string("concat_27_values3_0"), val = tensor([0])]; int32 concat_27_axis_0 = const()[name = string("concat_27_axis_0"), val = int32(0)]; bool concat_27_interleave_0 = const()[name = string("concat_27_interleave_0"), val = bool(false)]; tensor concat_27 = concat(axis = concat_27_axis_0, interleave = concat_27_interleave_0, values = (expand_dims_31, expand_dims_32, position_id, concat_27_values3_0))[name = string("concat_27")]; tensor concat_28_values1_0 = const()[name = string("concat_28_values1_0"), val = tensor([0])]; tensor concat_28_values3_0 = const()[name = string("concat_28_values3_0"), val = tensor([0])]; int32 concat_28_axis_0 = const()[name = string("concat_28_axis_0"), val = int32(0)]; bool concat_28_interleave_0 = const()[name = string("concat_28_interleave_0"), val = bool(false)]; tensor concat_28 = concat(axis = concat_28_axis_0, interleave = concat_28_interleave_0, values = (expand_dims_34, concat_28_values1_0, var_426, concat_28_values3_0))[name = string("concat_28")]; tensor key_cache_internal_tensor_assign_4_stride_0 = const()[name = string("key_cache_internal_tensor_assign_4_stride_0"), val = tensor([1, 1, 1, 1])]; tensor key_cache_internal_tensor_assign_4_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_4_begin_mask_0"), val = tensor([false, false, false, true])]; tensor key_cache_internal_tensor_assign_4_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_4_end_mask_0"), val = tensor([false, true, false, true])]; tensor key_cache_internal_tensor_assign_4_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_4_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor key_cache_internal_tensor_assign_4_cast_fp16 = slice_update(begin = concat_27, begin_mask = key_cache_internal_tensor_assign_4_begin_mask_0, end = concat_28, end_mask = key_cache_internal_tensor_assign_4_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_4_squeeze_mask_0, stride = key_cache_internal_tensor_assign_4_stride_0, update = key_states_15_cast_fp16, x = coreml_update_state_52)[name = string("key_cache_internal_tensor_assign_4_cast_fp16")]; write_state(data = key_cache_internal_tensor_assign_4_cast_fp16, input = key_cache)[name = string("coreml_update_state_124_write_state")]; tensor coreml_update_state_54 = read_state(input = key_cache)[name = string("coreml_update_state_124")]; tensor value_cache_internal_tensor_assign_4_stride_0 = const()[name = string("value_cache_internal_tensor_assign_4_stride_0"), val = tensor([1, 1, 1, 1])]; tensor value_cache_internal_tensor_assign_4_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_4_begin_mask_0"), val = tensor([false, false, false, true])]; tensor value_cache_internal_tensor_assign_4_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_4_end_mask_0"), val = tensor([false, true, false, true])]; tensor value_cache_internal_tensor_assign_4_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_4_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor value_states_15_cast_fp16 = transpose(perm = value_states_15_perm_0, x = var_1214_cast_fp16)[name = string("transpose_61")]; tensor value_cache_internal_tensor_assign_4_cast_fp16 = slice_update(begin = concat_27, begin_mask = value_cache_internal_tensor_assign_4_begin_mask_0, end = concat_28, end_mask = value_cache_internal_tensor_assign_4_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_4_squeeze_mask_0, stride = value_cache_internal_tensor_assign_4_stride_0, update = value_states_15_cast_fp16, x = coreml_update_state_53)[name = string("value_cache_internal_tensor_assign_4_cast_fp16")]; write_state(data = value_cache_internal_tensor_assign_4_cast_fp16, input = value_cache)[name = string("coreml_update_state_125_write_state")]; tensor coreml_update_state_55 = read_state(input = value_cache)[name = string("coreml_update_state_125")]; tensor var_1278_begin_0 = const()[name = string("op_1278_begin_0"), val = tensor([3, 0, 0, 0])]; tensor var_1278_end_0 = const()[name = string("op_1278_end_0"), val = tensor([4, 2, 2048, 64])]; tensor var_1278_end_mask_0 = const()[name = string("op_1278_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_1278_cast_fp16 = slice_by_index(begin = var_1278_begin_0, end = var_1278_end_0, end_mask = var_1278_end_mask_0, x = coreml_update_state_54)[name = string("op_1278_cast_fp16")]; tensor tile_6 = const()[name = string("tile_6"), val = tensor([1, 1])]; int32 var_1281_axis_0 = const()[name = string("op_1281_axis_0"), val = int32(1)]; tensor var_1281_cast_fp16_0, tensor var_1281_cast_fp16_1 = split(axis = var_1281_axis_0, split_sizes = tile_6, x = var_1278_cast_fp16)[name = string("op_1281_cast_fp16")]; tensor var_1288_begin_0 = const()[name = string("op_1288_begin_0"), val = tensor([3, 0, 0, 0])]; tensor var_1288_end_0 = const()[name = string("op_1288_end_0"), val = tensor([4, 2, 2048, 64])]; tensor var_1288_end_mask_0 = const()[name = string("op_1288_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_1288_cast_fp16 = slice_by_index(begin = var_1288_begin_0, end = var_1288_end_0, end_mask = var_1288_end_mask_0, x = coreml_update_state_55)[name = string("op_1288_cast_fp16")]; tensor tile_7 = const()[name = string("tile_7"), val = tensor([1, 1])]; int32 var_1291_axis_0 = const()[name = string("op_1291_axis_0"), val = int32(1)]; tensor var_1291_cast_fp16_0, tensor var_1291_cast_fp16_1 = split(axis = var_1291_axis_0, split_sizes = tile_7, x = var_1288_cast_fp16)[name = string("op_1291_cast_fp16")]; tensor var_1294_split_sizes_0 = const()[name = string("op_1294_split_sizes_0"), val = tensor([8, 8])]; int32 var_1294_axis_0 = const()[name = string("op_1294_axis_0"), val = int32(1)]; tensor var_1294_cast_fp16_0, tensor var_1294_cast_fp16_1 = split(axis = var_1294_axis_0, split_sizes = var_1294_split_sizes_0, x = query_states_15_cast_fp16)[name = string("op_1294_cast_fp16")]; bool attn_weights_49_transpose_x_0 = const()[name = string("attn_weights_49_transpose_x_0"), val = bool(false)]; bool attn_weights_49_transpose_y_0 = const()[name = string("attn_weights_49_transpose_y_0"), val = bool(false)]; tensor attn_weights_49_cast_fp16 = matmul(transpose_x = attn_weights_49_transpose_x_0, transpose_y = attn_weights_49_transpose_y_0, x = var_1281_cast_fp16_0, y = var_1294_cast_fp16_0)[name = string("attn_weights_49_cast_fp16")]; fp16 _inversed_attn_weights_51_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_51_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_51_cast_fp16 = mul(x = attn_weights_49_cast_fp16, y = _inversed_attn_weights_51_y_0_to_fp16)[name = string("_inversed_attn_weights_51_cast_fp16")]; tensor attn_weights_53_cast_fp16 = add(x = _inversed_attn_weights_51_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_53_cast_fp16")]; int32 var_1301 = const()[name = string("op_1301"), val = int32(2)]; tensor attn_weights_55_cast_fp16 = softmax(axis = var_1301, x = attn_weights_53_cast_fp16)[name = string("attn_weights_55_cast_fp16")]; bool var_1307_transpose_x_1 = const()[name = string("op_1307_transpose_x_1"), val = bool(true)]; bool var_1307_transpose_y_1 = const()[name = string("op_1307_transpose_y_1"), val = bool(false)]; tensor var_1307_cast_fp16 = matmul(transpose_x = var_1307_transpose_x_1, transpose_y = var_1307_transpose_y_1, x = attn_weights_55_cast_fp16, y = var_1291_cast_fp16_0)[name = string("op_1307_cast_fp16")]; bool attn_weights_57_transpose_x_0 = const()[name = string("attn_weights_57_transpose_x_0"), val = bool(false)]; bool attn_weights_57_transpose_y_0 = const()[name = string("attn_weights_57_transpose_y_0"), val = bool(false)]; tensor attn_weights_57_cast_fp16 = matmul(transpose_x = attn_weights_57_transpose_x_0, transpose_y = attn_weights_57_transpose_y_0, x = var_1281_cast_fp16_1, y = var_1294_cast_fp16_1)[name = string("attn_weights_57_cast_fp16")]; fp16 _inversed_attn_weights_59_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_59_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_59_cast_fp16 = mul(x = attn_weights_57_cast_fp16, y = _inversed_attn_weights_59_y_0_to_fp16)[name = string("_inversed_attn_weights_59_cast_fp16")]; tensor attn_weights_61_cast_fp16 = add(x = _inversed_attn_weights_59_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_61_cast_fp16")]; int32 var_1313 = const()[name = string("op_1313"), val = int32(2)]; tensor attn_weights_63_cast_fp16 = softmax(axis = var_1313, x = attn_weights_61_cast_fp16)[name = string("attn_weights_63_cast_fp16")]; bool attn_output_19_transpose_x_1 = const()[name = string("attn_output_19_transpose_x_1"), val = bool(true)]; bool attn_output_19_transpose_y_1 = const()[name = string("attn_output_19_transpose_y_1"), val = bool(false)]; tensor attn_output_19_cast_fp16 = matmul(transpose_x = attn_output_19_transpose_x_1, transpose_y = attn_output_19_transpose_y_1, x = attn_weights_63_cast_fp16, y = var_1291_cast_fp16_1)[name = string("attn_output_19_cast_fp16")]; int32 var_1321 = const()[name = string("op_1321"), val = int32(1)]; bool attn_output_21_interleave_0 = const()[name = string("attn_output_21_interleave_0"), val = bool(false)]; tensor attn_output_21_cast_fp16 = concat(axis = var_1321, interleave = attn_output_21_interleave_0, values = (var_1307_cast_fp16, attn_output_19_cast_fp16))[name = string("attn_output_21_cast_fp16")]; tensor var_1325_perm_0 = const()[name = string("op_1325_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_1330 = const()[name = string("op_1330"), val = tensor([1, 1024, 1, 128])]; tensor var_1325_cast_fp16 = transpose(perm = var_1325_perm_0, x = attn_output_21_cast_fp16)[name = string("transpose_60")]; tensor x_59_cast_fp16 = reshape(shape = var_1330, x = var_1325_cast_fp16)[name = string("x_59_cast_fp16")]; string hidden_states_21_pad_type_0 = const()[name = string("hidden_states_21_pad_type_0"), val = string("valid")]; tensor hidden_states_21_strides_0 = const()[name = string("hidden_states_21_strides_0"), val = tensor([1, 1])]; tensor hidden_states_21_pad_0 = const()[name = string("hidden_states_21_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_21_dilations_0 = const()[name = string("hidden_states_21_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_21_groups_0 = const()[name = string("hidden_states_21_groups_0"), val = int32(1)]; tensor var_1337_to_fp16 = const()[name = string("op_1337_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(100706560)))]; tensor hidden_states_21_cast_fp16 = conv(dilations = hidden_states_21_dilations_0, groups = hidden_states_21_groups_0, pad = hidden_states_21_pad_0, pad_type = hidden_states_21_pad_type_0, strides = hidden_states_21_strides_0, weight = var_1337_to_fp16, x = x_59_cast_fp16)[name = string("hidden_states_21_cast_fp16")]; tensor x_61_cast_fp16 = add(x = x_53_cast_fp16, y = hidden_states_21_cast_fp16)[name = string("x_61_cast_fp16")]; int32 var_1349 = const()[name = string("op_1349"), val = int32(1)]; fp16 const_43_promoted_to_fp16 = const()[name = string("const_43_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_1352_cast_fp16 = mul(x = x_61_cast_fp16, y = const_43_promoted_to_fp16)[name = string("op_1352_cast_fp16")]; bool x_63_interleave_0 = const()[name = string("x_63_interleave_0"), val = bool(false)]; tensor x_63_cast_fp16 = concat(axis = var_1349, interleave = x_63_interleave_0, values = (x_61_cast_fp16, var_1352_cast_fp16))[name = string("x_63_cast_fp16")]; tensor out_43_axes_0 = const()[name = string("out_43_axes_0"), val = tensor([1])]; fp16 var_1362_to_fp16 = const()[name = string("op_1362_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_43_cast_fp16 = layer_norm(axes = out_43_axes_0, epsilon = var_1362_to_fp16, x = x_63_cast_fp16)[name = string("out_43_cast_fp16")]; tensor layer_layers_3_post_attention_layernorm_weight_to_fp16 = const()[name = string("layer_layers_3_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(102803776)))]; tensor out_45_cast_fp16 = mul(x = out_43_cast_fp16, y = layer_layers_3_post_attention_layernorm_weight_to_fp16)[name = string("out_45_cast_fp16")]; tensor var_1368_split_sizes_0 = const()[name = string("op_1368_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_1368_axis_0 = const()[name = string("op_1368_axis_0"), val = int32(1)]; tensor var_1368_cast_fp16_0, tensor var_1368_cast_fp16_1 = split(axis = var_1368_axis_0, split_sizes = var_1368_split_sizes_0, x = out_45_cast_fp16)[name = string("op_1368_cast_fp16")]; string input_7_pad_type_0 = const()[name = string("input_7_pad_type_0"), val = string("valid")]; tensor input_7_strides_0 = const()[name = string("input_7_strides_0"), val = tensor([1, 1])]; tensor input_7_pad_0 = const()[name = string("input_7_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_7_dilations_0 = const()[name = string("input_7_dilations_0"), val = tensor([1, 1])]; int32 input_7_groups_0 = const()[name = string("input_7_groups_0"), val = int32(1)]; tensor var_1373_to_fp16 = const()[name = string("op_1373_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(102807936)))]; tensor input_7_cast_fp16 = conv(dilations = input_7_dilations_0, groups = input_7_groups_0, pad = input_7_pad_0, pad_type = input_7_pad_type_0, strides = input_7_strides_0, weight = var_1373_to_fp16, x = var_1368_cast_fp16_0)[name = string("input_7_cast_fp16")]; tensor var_1384_cast_fp16 = silu(x = input_7_cast_fp16)[name = string("op_1384_cast_fp16")]; string var_1389_pad_type_0 = const()[name = string("op_1389_pad_type_0"), val = string("valid")]; tensor var_1389_strides_0 = const()[name = string("op_1389_strides_0"), val = tensor([1, 1])]; tensor var_1389_pad_0 = const()[name = string("op_1389_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_1389_dilations_0 = const()[name = string("op_1389_dilations_0"), val = tensor([1, 1])]; int32 var_1389_groups_0 = const()[name = string("op_1389_groups_0"), val = int32(1)]; tensor var_1372_to_fp16 = const()[name = string("op_1372_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(111196608)))]; tensor var_1389_cast_fp16 = conv(dilations = var_1389_dilations_0, groups = var_1389_groups_0, pad = var_1389_pad_0, pad_type = var_1389_pad_type_0, strides = var_1389_strides_0, weight = var_1372_to_fp16, x = var_1368_cast_fp16_0)[name = string("op_1389_cast_fp16")]; tensor x_69_cast_fp16 = mul(x = var_1384_cast_fp16, y = var_1389_cast_fp16)[name = string("x_69_cast_fp16")]; string hidden_states_23_pad_type_0 = const()[name = string("hidden_states_23_pad_type_0"), val = string("valid")]; tensor hidden_states_23_strides_0 = const()[name = string("hidden_states_23_strides_0"), val = tensor([1, 1])]; tensor hidden_states_23_pad_0 = const()[name = string("hidden_states_23_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_23_dilations_0 = const()[name = string("hidden_states_23_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_23_groups_0 = const()[name = string("hidden_states_23_groups_0"), val = int32(1)]; tensor var_1371_to_fp16 = const()[name = string("op_1371_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(119585280)))]; tensor hidden_states_23_cast_fp16 = conv(dilations = hidden_states_23_dilations_0, groups = hidden_states_23_groups_0, pad = hidden_states_23_pad_0, pad_type = hidden_states_23_pad_type_0, strides = hidden_states_23_strides_0, weight = var_1371_to_fp16, x = x_69_cast_fp16)[name = string("hidden_states_23_cast_fp16")]; tensor x_71_cast_fp16 = add(x = x_61_cast_fp16, y = hidden_states_23_cast_fp16)[name = string("x_71_cast_fp16")]; int32 var_1402 = const()[name = string("op_1402"), val = int32(1)]; fp16 const_44_promoted_to_fp16 = const()[name = string("const_44_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_1405_cast_fp16 = mul(x = x_71_cast_fp16, y = const_44_promoted_to_fp16)[name = string("op_1405_cast_fp16")]; bool x_73_interleave_0 = const()[name = string("x_73_interleave_0"), val = bool(false)]; tensor x_73_cast_fp16 = concat(axis = var_1402, interleave = x_73_interleave_0, values = (x_71_cast_fp16, var_1405_cast_fp16))[name = string("x_73_cast_fp16")]; tensor out_49_axes_0 = const()[name = string("out_49_axes_0"), val = tensor([1])]; fp16 var_1415_to_fp16 = const()[name = string("op_1415_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_49_cast_fp16 = layer_norm(axes = out_49_axes_0, epsilon = var_1415_to_fp16, x = x_73_cast_fp16)[name = string("out_49_cast_fp16")]; tensor layer_layers_4_input_layernorm_weight_to_fp16 = const()[name = string("layer_layers_4_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(127973952)))]; tensor out_51_cast_fp16 = mul(x = out_49_cast_fp16, y = layer_layers_4_input_layernorm_weight_to_fp16)[name = string("out_51_cast_fp16")]; tensor var_1421_split_sizes_0 = const()[name = string("op_1421_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_1421_axis_0 = const()[name = string("op_1421_axis_0"), val = int32(1)]; tensor var_1421_cast_fp16_0, tensor var_1421_cast_fp16_1 = split(axis = var_1421_axis_0, split_sizes = var_1421_split_sizes_0, x = out_51_cast_fp16)[name = string("op_1421_cast_fp16")]; string query_states_17_pad_type_0 = const()[name = string("query_states_17_pad_type_0"), val = string("valid")]; tensor query_states_17_strides_0 = const()[name = string("query_states_17_strides_0"), val = tensor([1, 1])]; tensor query_states_17_pad_0 = const()[name = string("query_states_17_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_states_17_dilations_0 = const()[name = string("query_states_17_dilations_0"), val = tensor([1, 1])]; int32 query_states_17_groups_0 = const()[name = string("query_states_17_groups_0"), val = int32(1)]; tensor var_1443_to_fp16 = const()[name = string("op_1443_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(127978112)))]; tensor query_states_17_cast_fp16 = conv(dilations = query_states_17_dilations_0, groups = query_states_17_groups_0, pad = query_states_17_pad_0, pad_type = query_states_17_pad_type_0, strides = query_states_17_strides_0, weight = var_1443_to_fp16, x = var_1421_cast_fp16_0)[name = string("query_states_17_cast_fp16")]; string key_states_17_pad_type_0 = const()[name = string("key_states_17_pad_type_0"), val = string("valid")]; tensor key_states_17_strides_0 = const()[name = string("key_states_17_strides_0"), val = tensor([1, 1])]; tensor key_states_17_pad_0 = const()[name = string("key_states_17_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_states_17_dilations_0 = const()[name = string("key_states_17_dilations_0"), val = tensor([1, 1])]; int32 key_states_17_groups_0 = const()[name = string("key_states_17_groups_0"), val = int32(1)]; tensor var_1454_to_fp16 = const()[name = string("op_1454_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(130075328)))]; tensor key_states_17_cast_fp16 = conv(dilations = key_states_17_dilations_0, groups = key_states_17_groups_0, pad = key_states_17_pad_0, pad_type = key_states_17_pad_type_0, strides = key_states_17_strides_0, weight = var_1454_to_fp16, x = var_1421_cast_fp16_0)[name = string("key_states_17_cast_fp16")]; string value_states_17_pad_type_0 = const()[name = string("value_states_17_pad_type_0"), val = string("valid")]; tensor value_states_17_strides_0 = const()[name = string("value_states_17_strides_0"), val = tensor([1, 1])]; tensor value_states_17_pad_0 = const()[name = string("value_states_17_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_states_17_dilations_0 = const()[name = string("value_states_17_dilations_0"), val = tensor([1, 1])]; int32 value_states_17_groups_0 = const()[name = string("value_states_17_groups_0"), val = int32(1)]; tensor var_1465_to_fp16 = const()[name = string("op_1465_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(130337536)))]; tensor value_states_17_cast_fp16 = conv(dilations = value_states_17_dilations_0, groups = value_states_17_groups_0, pad = value_states_17_pad_0, pad_type = value_states_17_pad_type_0, strides = value_states_17_strides_0, weight = var_1465_to_fp16, x = var_1421_cast_fp16_0)[name = string("value_states_17_cast_fp16")]; tensor var_1473 = const()[name = string("op_1473"), val = tensor([1, 16, 64, 128])]; tensor embed_17_cast_fp16 = reshape(shape = var_1473, x = query_states_17_cast_fp16)[name = string("embed_17_cast_fp16")]; tensor var_1477 = const()[name = string("op_1477"), val = tensor([1, 2, 64, 128])]; tensor var_1478_cast_fp16 = reshape(shape = var_1477, x = key_states_17_cast_fp16)[name = string("op_1478_cast_fp16")]; tensor embed_19_perm_0 = const()[name = string("embed_19_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_1484 = const()[name = string("op_1484"), val = tensor([1, 2, 64, 128])]; tensor var_1485_cast_fp16 = reshape(shape = var_1484, x = value_states_17_cast_fp16)[name = string("op_1485_cast_fp16")]; tensor value_states_19_perm_0 = const()[name = string("value_states_19_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_1489_cast_fp16 = mul(x = embed_17_cast_fp16, y = cos_1_cast_fp16)[name = string("op_1489_cast_fp16")]; tensor var_1490_split_sizes_0 = const()[name = string("op_1490_split_sizes_0"), val = tensor([32, 32])]; int32 var_1490_axis_0 = const()[name = string("op_1490_axis_0"), val = int32(-2)]; tensor var_1490_cast_fp16_0, tensor var_1490_cast_fp16_1 = split(axis = var_1490_axis_0, split_sizes = var_1490_split_sizes_0, x = embed_17_cast_fp16)[name = string("op_1490_cast_fp16")]; fp16 const_47_promoted_to_fp16 = const()[name = string("const_47_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_1492_cast_fp16 = mul(x = var_1490_cast_fp16_1, y = const_47_promoted_to_fp16)[name = string("op_1492_cast_fp16")]; int32 var_1494 = const()[name = string("op_1494"), val = int32(-2)]; bool var_1495_interleave_0 = const()[name = string("op_1495_interleave_0"), val = bool(false)]; tensor var_1495_cast_fp16 = concat(axis = var_1494, interleave = var_1495_interleave_0, values = (var_1492_cast_fp16, var_1490_cast_fp16_0))[name = string("op_1495_cast_fp16")]; tensor var_1496_cast_fp16 = mul(x = var_1495_cast_fp16, y = sin_1_cast_fp16)[name = string("op_1496_cast_fp16")]; tensor query_states_19_cast_fp16 = add(x = var_1489_cast_fp16, y = var_1496_cast_fp16)[name = string("query_states_19_cast_fp16")]; tensor embed_19_cast_fp16 = transpose(perm = embed_19_perm_0, x = var_1478_cast_fp16)[name = string("transpose_59")]; tensor var_1499_cast_fp16 = mul(x = embed_19_cast_fp16, y = cos_cast_fp16)[name = string("op_1499_cast_fp16")]; tensor var_1500_split_sizes_0 = const()[name = string("op_1500_split_sizes_0"), val = tensor([32, 32])]; int32 var_1500_axis_0 = const()[name = string("op_1500_axis_0"), val = int32(-1)]; tensor var_1500_cast_fp16_0, tensor var_1500_cast_fp16_1 = split(axis = var_1500_axis_0, split_sizes = var_1500_split_sizes_0, x = embed_19_cast_fp16)[name = string("op_1500_cast_fp16")]; fp16 const_48_promoted_to_fp16 = const()[name = string("const_48_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_1502_cast_fp16 = mul(x = var_1500_cast_fp16_1, y = const_48_promoted_to_fp16)[name = string("op_1502_cast_fp16")]; int32 var_1504 = const()[name = string("op_1504"), val = int32(-1)]; bool var_1505_interleave_0 = const()[name = string("op_1505_interleave_0"), val = bool(false)]; tensor var_1505_cast_fp16 = concat(axis = var_1504, interleave = var_1505_interleave_0, values = (var_1502_cast_fp16, var_1500_cast_fp16_0))[name = string("op_1505_cast_fp16")]; tensor var_1506_cast_fp16 = mul(x = var_1505_cast_fp16, y = sin_cast_fp16)[name = string("op_1506_cast_fp16")]; tensor key_states_19_cast_fp16 = add(x = var_1499_cast_fp16, y = var_1506_cast_fp16)[name = string("key_states_19_cast_fp16")]; tensor expand_dims_41 = const()[name = string("expand_dims_41"), val = tensor([4])]; tensor expand_dims_42 = const()[name = string("expand_dims_42"), val = tensor([0])]; tensor expand_dims_44 = const()[name = string("expand_dims_44"), val = tensor([5])]; tensor concat_35_values3_0 = const()[name = string("concat_35_values3_0"), val = tensor([0])]; int32 concat_35_axis_0 = const()[name = string("concat_35_axis_0"), val = int32(0)]; bool concat_35_interleave_0 = const()[name = string("concat_35_interleave_0"), val = bool(false)]; tensor concat_35 = concat(axis = concat_35_axis_0, interleave = concat_35_interleave_0, values = (expand_dims_41, expand_dims_42, position_id, concat_35_values3_0))[name = string("concat_35")]; tensor concat_36_values1_0 = const()[name = string("concat_36_values1_0"), val = tensor([0])]; tensor concat_36_values3_0 = const()[name = string("concat_36_values3_0"), val = tensor([0])]; int32 concat_36_axis_0 = const()[name = string("concat_36_axis_0"), val = int32(0)]; bool concat_36_interleave_0 = const()[name = string("concat_36_interleave_0"), val = bool(false)]; tensor concat_36 = concat(axis = concat_36_axis_0, interleave = concat_36_interleave_0, values = (expand_dims_44, concat_36_values1_0, var_426, concat_36_values3_0))[name = string("concat_36")]; tensor key_cache_internal_tensor_assign_5_stride_0 = const()[name = string("key_cache_internal_tensor_assign_5_stride_0"), val = tensor([1, 1, 1, 1])]; tensor key_cache_internal_tensor_assign_5_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_5_begin_mask_0"), val = tensor([false, false, false, true])]; tensor key_cache_internal_tensor_assign_5_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_5_end_mask_0"), val = tensor([false, true, false, true])]; tensor key_cache_internal_tensor_assign_5_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_5_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor key_cache_internal_tensor_assign_5_cast_fp16 = slice_update(begin = concat_35, begin_mask = key_cache_internal_tensor_assign_5_begin_mask_0, end = concat_36, end_mask = key_cache_internal_tensor_assign_5_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_5_squeeze_mask_0, stride = key_cache_internal_tensor_assign_5_stride_0, update = key_states_19_cast_fp16, x = coreml_update_state_54)[name = string("key_cache_internal_tensor_assign_5_cast_fp16")]; write_state(data = key_cache_internal_tensor_assign_5_cast_fp16, input = key_cache)[name = string("coreml_update_state_126_write_state")]; tensor coreml_update_state_56 = read_state(input = key_cache)[name = string("coreml_update_state_126")]; tensor value_cache_internal_tensor_assign_5_stride_0 = const()[name = string("value_cache_internal_tensor_assign_5_stride_0"), val = tensor([1, 1, 1, 1])]; tensor value_cache_internal_tensor_assign_5_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_5_begin_mask_0"), val = tensor([false, false, false, true])]; tensor value_cache_internal_tensor_assign_5_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_5_end_mask_0"), val = tensor([false, true, false, true])]; tensor value_cache_internal_tensor_assign_5_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_5_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor value_states_19_cast_fp16 = transpose(perm = value_states_19_perm_0, x = var_1485_cast_fp16)[name = string("transpose_58")]; tensor value_cache_internal_tensor_assign_5_cast_fp16 = slice_update(begin = concat_35, begin_mask = value_cache_internal_tensor_assign_5_begin_mask_0, end = concat_36, end_mask = value_cache_internal_tensor_assign_5_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_5_squeeze_mask_0, stride = value_cache_internal_tensor_assign_5_stride_0, update = value_states_19_cast_fp16, x = coreml_update_state_55)[name = string("value_cache_internal_tensor_assign_5_cast_fp16")]; write_state(data = value_cache_internal_tensor_assign_5_cast_fp16, input = value_cache)[name = string("coreml_update_state_127_write_state")]; tensor coreml_update_state_57 = read_state(input = value_cache)[name = string("coreml_update_state_127")]; tensor var_1549_begin_0 = const()[name = string("op_1549_begin_0"), val = tensor([4, 0, 0, 0])]; tensor var_1549_end_0 = const()[name = string("op_1549_end_0"), val = tensor([5, 2, 2048, 64])]; tensor var_1549_end_mask_0 = const()[name = string("op_1549_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_1549_cast_fp16 = slice_by_index(begin = var_1549_begin_0, end = var_1549_end_0, end_mask = var_1549_end_mask_0, x = coreml_update_state_56)[name = string("op_1549_cast_fp16")]; tensor tile_8 = const()[name = string("tile_8"), val = tensor([1, 1])]; int32 var_1552_axis_0 = const()[name = string("op_1552_axis_0"), val = int32(1)]; tensor var_1552_cast_fp16_0, tensor var_1552_cast_fp16_1 = split(axis = var_1552_axis_0, split_sizes = tile_8, x = var_1549_cast_fp16)[name = string("op_1552_cast_fp16")]; tensor var_1559_begin_0 = const()[name = string("op_1559_begin_0"), val = tensor([4, 0, 0, 0])]; tensor var_1559_end_0 = const()[name = string("op_1559_end_0"), val = tensor([5, 2, 2048, 64])]; tensor var_1559_end_mask_0 = const()[name = string("op_1559_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_1559_cast_fp16 = slice_by_index(begin = var_1559_begin_0, end = var_1559_end_0, end_mask = var_1559_end_mask_0, x = coreml_update_state_57)[name = string("op_1559_cast_fp16")]; tensor tile_9 = const()[name = string("tile_9"), val = tensor([1, 1])]; int32 var_1562_axis_0 = const()[name = string("op_1562_axis_0"), val = int32(1)]; tensor var_1562_cast_fp16_0, tensor var_1562_cast_fp16_1 = split(axis = var_1562_axis_0, split_sizes = tile_9, x = var_1559_cast_fp16)[name = string("op_1562_cast_fp16")]; tensor var_1565_split_sizes_0 = const()[name = string("op_1565_split_sizes_0"), val = tensor([8, 8])]; int32 var_1565_axis_0 = const()[name = string("op_1565_axis_0"), val = int32(1)]; tensor var_1565_cast_fp16_0, tensor var_1565_cast_fp16_1 = split(axis = var_1565_axis_0, split_sizes = var_1565_split_sizes_0, x = query_states_19_cast_fp16)[name = string("op_1565_cast_fp16")]; bool attn_weights_65_transpose_x_0 = const()[name = string("attn_weights_65_transpose_x_0"), val = bool(false)]; bool attn_weights_65_transpose_y_0 = const()[name = string("attn_weights_65_transpose_y_0"), val = bool(false)]; tensor attn_weights_65_cast_fp16 = matmul(transpose_x = attn_weights_65_transpose_x_0, transpose_y = attn_weights_65_transpose_y_0, x = var_1552_cast_fp16_0, y = var_1565_cast_fp16_0)[name = string("attn_weights_65_cast_fp16")]; fp16 _inversed_attn_weights_67_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_67_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_67_cast_fp16 = mul(x = attn_weights_65_cast_fp16, y = _inversed_attn_weights_67_y_0_to_fp16)[name = string("_inversed_attn_weights_67_cast_fp16")]; tensor attn_weights_69_cast_fp16 = add(x = _inversed_attn_weights_67_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_69_cast_fp16")]; int32 var_1572 = const()[name = string("op_1572"), val = int32(2)]; tensor attn_weights_71_cast_fp16 = softmax(axis = var_1572, x = attn_weights_69_cast_fp16)[name = string("attn_weights_71_cast_fp16")]; bool var_1578_transpose_x_1 = const()[name = string("op_1578_transpose_x_1"), val = bool(true)]; bool var_1578_transpose_y_1 = const()[name = string("op_1578_transpose_y_1"), val = bool(false)]; tensor var_1578_cast_fp16 = matmul(transpose_x = var_1578_transpose_x_1, transpose_y = var_1578_transpose_y_1, x = attn_weights_71_cast_fp16, y = var_1562_cast_fp16_0)[name = string("op_1578_cast_fp16")]; bool attn_weights_73_transpose_x_0 = const()[name = string("attn_weights_73_transpose_x_0"), val = bool(false)]; bool attn_weights_73_transpose_y_0 = const()[name = string("attn_weights_73_transpose_y_0"), val = bool(false)]; tensor attn_weights_73_cast_fp16 = matmul(transpose_x = attn_weights_73_transpose_x_0, transpose_y = attn_weights_73_transpose_y_0, x = var_1552_cast_fp16_1, y = var_1565_cast_fp16_1)[name = string("attn_weights_73_cast_fp16")]; fp16 _inversed_attn_weights_75_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_75_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_75_cast_fp16 = mul(x = attn_weights_73_cast_fp16, y = _inversed_attn_weights_75_y_0_to_fp16)[name = string("_inversed_attn_weights_75_cast_fp16")]; tensor attn_weights_77_cast_fp16 = add(x = _inversed_attn_weights_75_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_77_cast_fp16")]; int32 var_1584 = const()[name = string("op_1584"), val = int32(2)]; tensor attn_weights_79_cast_fp16 = softmax(axis = var_1584, x = attn_weights_77_cast_fp16)[name = string("attn_weights_79_cast_fp16")]; bool attn_output_25_transpose_x_1 = const()[name = string("attn_output_25_transpose_x_1"), val = bool(true)]; bool attn_output_25_transpose_y_1 = const()[name = string("attn_output_25_transpose_y_1"), val = bool(false)]; tensor attn_output_25_cast_fp16 = matmul(transpose_x = attn_output_25_transpose_x_1, transpose_y = attn_output_25_transpose_y_1, x = attn_weights_79_cast_fp16, y = var_1562_cast_fp16_1)[name = string("attn_output_25_cast_fp16")]; int32 var_1592 = const()[name = string("op_1592"), val = int32(1)]; bool attn_output_27_interleave_0 = const()[name = string("attn_output_27_interleave_0"), val = bool(false)]; tensor attn_output_27_cast_fp16 = concat(axis = var_1592, interleave = attn_output_27_interleave_0, values = (var_1578_cast_fp16, attn_output_25_cast_fp16))[name = string("attn_output_27_cast_fp16")]; tensor var_1596_perm_0 = const()[name = string("op_1596_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_1601 = const()[name = string("op_1601"), val = tensor([1, 1024, 1, 128])]; tensor var_1596_cast_fp16 = transpose(perm = var_1596_perm_0, x = attn_output_27_cast_fp16)[name = string("transpose_57")]; tensor x_77_cast_fp16 = reshape(shape = var_1601, x = var_1596_cast_fp16)[name = string("x_77_cast_fp16")]; string hidden_states_27_pad_type_0 = const()[name = string("hidden_states_27_pad_type_0"), val = string("valid")]; tensor hidden_states_27_strides_0 = const()[name = string("hidden_states_27_strides_0"), val = tensor([1, 1])]; tensor hidden_states_27_pad_0 = const()[name = string("hidden_states_27_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_27_dilations_0 = const()[name = string("hidden_states_27_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_27_groups_0 = const()[name = string("hidden_states_27_groups_0"), val = int32(1)]; tensor var_1608_to_fp16 = const()[name = string("op_1608_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(130599744)))]; tensor hidden_states_27_cast_fp16 = conv(dilations = hidden_states_27_dilations_0, groups = hidden_states_27_groups_0, pad = hidden_states_27_pad_0, pad_type = hidden_states_27_pad_type_0, strides = hidden_states_27_strides_0, weight = var_1608_to_fp16, x = x_77_cast_fp16)[name = string("hidden_states_27_cast_fp16")]; tensor x_79_cast_fp16 = add(x = x_71_cast_fp16, y = hidden_states_27_cast_fp16)[name = string("x_79_cast_fp16")]; int32 var_1620 = const()[name = string("op_1620"), val = int32(1)]; fp16 const_53_promoted_to_fp16 = const()[name = string("const_53_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_1623_cast_fp16 = mul(x = x_79_cast_fp16, y = const_53_promoted_to_fp16)[name = string("op_1623_cast_fp16")]; bool x_81_interleave_0 = const()[name = string("x_81_interleave_0"), val = bool(false)]; tensor x_81_cast_fp16 = concat(axis = var_1620, interleave = x_81_interleave_0, values = (x_79_cast_fp16, var_1623_cast_fp16))[name = string("x_81_cast_fp16")]; tensor out_55_axes_0 = const()[name = string("out_55_axes_0"), val = tensor([1])]; fp16 var_1633_to_fp16 = const()[name = string("op_1633_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_55_cast_fp16 = layer_norm(axes = out_55_axes_0, epsilon = var_1633_to_fp16, x = x_81_cast_fp16)[name = string("out_55_cast_fp16")]; tensor layer_layers_4_post_attention_layernorm_weight_to_fp16 = const()[name = string("layer_layers_4_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(132696960)))]; tensor out_57_cast_fp16 = mul(x = out_55_cast_fp16, y = layer_layers_4_post_attention_layernorm_weight_to_fp16)[name = string("out_57_cast_fp16")]; tensor var_1639_split_sizes_0 = const()[name = string("op_1639_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_1639_axis_0 = const()[name = string("op_1639_axis_0"), val = int32(1)]; tensor var_1639_cast_fp16_0, tensor var_1639_cast_fp16_1 = split(axis = var_1639_axis_0, split_sizes = var_1639_split_sizes_0, x = out_57_cast_fp16)[name = string("op_1639_cast_fp16")]; string input_9_pad_type_0 = const()[name = string("input_9_pad_type_0"), val = string("valid")]; tensor input_9_strides_0 = const()[name = string("input_9_strides_0"), val = tensor([1, 1])]; tensor input_9_pad_0 = const()[name = string("input_9_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_9_dilations_0 = const()[name = string("input_9_dilations_0"), val = tensor([1, 1])]; int32 input_9_groups_0 = const()[name = string("input_9_groups_0"), val = int32(1)]; tensor var_1644_to_fp16 = const()[name = string("op_1644_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(132701120)))]; tensor input_9_cast_fp16 = conv(dilations = input_9_dilations_0, groups = input_9_groups_0, pad = input_9_pad_0, pad_type = input_9_pad_type_0, strides = input_9_strides_0, weight = var_1644_to_fp16, x = var_1639_cast_fp16_0)[name = string("input_9_cast_fp16")]; tensor var_1655_cast_fp16 = silu(x = input_9_cast_fp16)[name = string("op_1655_cast_fp16")]; string var_1660_pad_type_0 = const()[name = string("op_1660_pad_type_0"), val = string("valid")]; tensor var_1660_strides_0 = const()[name = string("op_1660_strides_0"), val = tensor([1, 1])]; tensor var_1660_pad_0 = const()[name = string("op_1660_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_1660_dilations_0 = const()[name = string("op_1660_dilations_0"), val = tensor([1, 1])]; int32 var_1660_groups_0 = const()[name = string("op_1660_groups_0"), val = int32(1)]; tensor var_1643_to_fp16 = const()[name = string("op_1643_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(141089792)))]; tensor var_1660_cast_fp16 = conv(dilations = var_1660_dilations_0, groups = var_1660_groups_0, pad = var_1660_pad_0, pad_type = var_1660_pad_type_0, strides = var_1660_strides_0, weight = var_1643_to_fp16, x = var_1639_cast_fp16_0)[name = string("op_1660_cast_fp16")]; tensor x_87_cast_fp16 = mul(x = var_1655_cast_fp16, y = var_1660_cast_fp16)[name = string("x_87_cast_fp16")]; string hidden_states_29_pad_type_0 = const()[name = string("hidden_states_29_pad_type_0"), val = string("valid")]; tensor hidden_states_29_strides_0 = const()[name = string("hidden_states_29_strides_0"), val = tensor([1, 1])]; tensor hidden_states_29_pad_0 = const()[name = string("hidden_states_29_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_29_dilations_0 = const()[name = string("hidden_states_29_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_29_groups_0 = const()[name = string("hidden_states_29_groups_0"), val = int32(1)]; tensor var_1642_to_fp16 = const()[name = string("op_1642_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(149478464)))]; tensor hidden_states_29_cast_fp16 = conv(dilations = hidden_states_29_dilations_0, groups = hidden_states_29_groups_0, pad = hidden_states_29_pad_0, pad_type = hidden_states_29_pad_type_0, strides = hidden_states_29_strides_0, weight = var_1642_to_fp16, x = x_87_cast_fp16)[name = string("hidden_states_29_cast_fp16")]; tensor x_89_cast_fp16 = add(x = x_79_cast_fp16, y = hidden_states_29_cast_fp16)[name = string("x_89_cast_fp16")]; int32 var_1673 = const()[name = string("op_1673"), val = int32(1)]; fp16 const_54_promoted_to_fp16 = const()[name = string("const_54_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_1676_cast_fp16 = mul(x = x_89_cast_fp16, y = const_54_promoted_to_fp16)[name = string("op_1676_cast_fp16")]; bool x_91_interleave_0 = const()[name = string("x_91_interleave_0"), val = bool(false)]; tensor x_91_cast_fp16 = concat(axis = var_1673, interleave = x_91_interleave_0, values = (x_89_cast_fp16, var_1676_cast_fp16))[name = string("x_91_cast_fp16")]; tensor out_61_axes_0 = const()[name = string("out_61_axes_0"), val = tensor([1])]; fp16 var_1686_to_fp16 = const()[name = string("op_1686_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_61_cast_fp16 = layer_norm(axes = out_61_axes_0, epsilon = var_1686_to_fp16, x = x_91_cast_fp16)[name = string("out_61_cast_fp16")]; tensor layer_layers_5_input_layernorm_weight_to_fp16 = const()[name = string("layer_layers_5_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(157867136)))]; tensor out_63_cast_fp16 = mul(x = out_61_cast_fp16, y = layer_layers_5_input_layernorm_weight_to_fp16)[name = string("out_63_cast_fp16")]; tensor var_1692_split_sizes_0 = const()[name = string("op_1692_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_1692_axis_0 = const()[name = string("op_1692_axis_0"), val = int32(1)]; tensor var_1692_cast_fp16_0, tensor var_1692_cast_fp16_1 = split(axis = var_1692_axis_0, split_sizes = var_1692_split_sizes_0, x = out_63_cast_fp16)[name = string("op_1692_cast_fp16")]; string query_states_21_pad_type_0 = const()[name = string("query_states_21_pad_type_0"), val = string("valid")]; tensor query_states_21_strides_0 = const()[name = string("query_states_21_strides_0"), val = tensor([1, 1])]; tensor query_states_21_pad_0 = const()[name = string("query_states_21_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_states_21_dilations_0 = const()[name = string("query_states_21_dilations_0"), val = tensor([1, 1])]; int32 query_states_21_groups_0 = const()[name = string("query_states_21_groups_0"), val = int32(1)]; tensor var_1714_to_fp16 = const()[name = string("op_1714_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(157871296)))]; tensor query_states_21_cast_fp16 = conv(dilations = query_states_21_dilations_0, groups = query_states_21_groups_0, pad = query_states_21_pad_0, pad_type = query_states_21_pad_type_0, strides = query_states_21_strides_0, weight = var_1714_to_fp16, x = var_1692_cast_fp16_0)[name = string("query_states_21_cast_fp16")]; string key_states_21_pad_type_0 = const()[name = string("key_states_21_pad_type_0"), val = string("valid")]; tensor key_states_21_strides_0 = const()[name = string("key_states_21_strides_0"), val = tensor([1, 1])]; tensor key_states_21_pad_0 = const()[name = string("key_states_21_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_states_21_dilations_0 = const()[name = string("key_states_21_dilations_0"), val = tensor([1, 1])]; int32 key_states_21_groups_0 = const()[name = string("key_states_21_groups_0"), val = int32(1)]; tensor var_1725_to_fp16 = const()[name = string("op_1725_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(159968512)))]; tensor key_states_21_cast_fp16 = conv(dilations = key_states_21_dilations_0, groups = key_states_21_groups_0, pad = key_states_21_pad_0, pad_type = key_states_21_pad_type_0, strides = key_states_21_strides_0, weight = var_1725_to_fp16, x = var_1692_cast_fp16_0)[name = string("key_states_21_cast_fp16")]; string value_states_21_pad_type_0 = const()[name = string("value_states_21_pad_type_0"), val = string("valid")]; tensor value_states_21_strides_0 = const()[name = string("value_states_21_strides_0"), val = tensor([1, 1])]; tensor value_states_21_pad_0 = const()[name = string("value_states_21_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_states_21_dilations_0 = const()[name = string("value_states_21_dilations_0"), val = tensor([1, 1])]; int32 value_states_21_groups_0 = const()[name = string("value_states_21_groups_0"), val = int32(1)]; tensor var_1736_to_fp16 = const()[name = string("op_1736_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(160230720)))]; tensor value_states_21_cast_fp16 = conv(dilations = value_states_21_dilations_0, groups = value_states_21_groups_0, pad = value_states_21_pad_0, pad_type = value_states_21_pad_type_0, strides = value_states_21_strides_0, weight = var_1736_to_fp16, x = var_1692_cast_fp16_0)[name = string("value_states_21_cast_fp16")]; tensor var_1744 = const()[name = string("op_1744"), val = tensor([1, 16, 64, 128])]; tensor embed_21_cast_fp16 = reshape(shape = var_1744, x = query_states_21_cast_fp16)[name = string("embed_21_cast_fp16")]; tensor var_1748 = const()[name = string("op_1748"), val = tensor([1, 2, 64, 128])]; tensor var_1749_cast_fp16 = reshape(shape = var_1748, x = key_states_21_cast_fp16)[name = string("op_1749_cast_fp16")]; tensor embed_23_perm_0 = const()[name = string("embed_23_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_1755 = const()[name = string("op_1755"), val = tensor([1, 2, 64, 128])]; tensor var_1756_cast_fp16 = reshape(shape = var_1755, x = value_states_21_cast_fp16)[name = string("op_1756_cast_fp16")]; tensor value_states_23_perm_0 = const()[name = string("value_states_23_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_1760_cast_fp16 = mul(x = embed_21_cast_fp16, y = cos_1_cast_fp16)[name = string("op_1760_cast_fp16")]; tensor var_1761_split_sizes_0 = const()[name = string("op_1761_split_sizes_0"), val = tensor([32, 32])]; int32 var_1761_axis_0 = const()[name = string("op_1761_axis_0"), val = int32(-2)]; tensor var_1761_cast_fp16_0, tensor var_1761_cast_fp16_1 = split(axis = var_1761_axis_0, split_sizes = var_1761_split_sizes_0, x = embed_21_cast_fp16)[name = string("op_1761_cast_fp16")]; fp16 const_57_promoted_to_fp16 = const()[name = string("const_57_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_1763_cast_fp16 = mul(x = var_1761_cast_fp16_1, y = const_57_promoted_to_fp16)[name = string("op_1763_cast_fp16")]; int32 var_1765 = const()[name = string("op_1765"), val = int32(-2)]; bool var_1766_interleave_0 = const()[name = string("op_1766_interleave_0"), val = bool(false)]; tensor var_1766_cast_fp16 = concat(axis = var_1765, interleave = var_1766_interleave_0, values = (var_1763_cast_fp16, var_1761_cast_fp16_0))[name = string("op_1766_cast_fp16")]; tensor var_1767_cast_fp16 = mul(x = var_1766_cast_fp16, y = sin_1_cast_fp16)[name = string("op_1767_cast_fp16")]; tensor query_states_23_cast_fp16 = add(x = var_1760_cast_fp16, y = var_1767_cast_fp16)[name = string("query_states_23_cast_fp16")]; tensor embed_23_cast_fp16 = transpose(perm = embed_23_perm_0, x = var_1749_cast_fp16)[name = string("transpose_56")]; tensor var_1770_cast_fp16 = mul(x = embed_23_cast_fp16, y = cos_cast_fp16)[name = string("op_1770_cast_fp16")]; tensor var_1771_split_sizes_0 = const()[name = string("op_1771_split_sizes_0"), val = tensor([32, 32])]; int32 var_1771_axis_0 = const()[name = string("op_1771_axis_0"), val = int32(-1)]; tensor var_1771_cast_fp16_0, tensor var_1771_cast_fp16_1 = split(axis = var_1771_axis_0, split_sizes = var_1771_split_sizes_0, x = embed_23_cast_fp16)[name = string("op_1771_cast_fp16")]; fp16 const_58_promoted_to_fp16 = const()[name = string("const_58_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_1773_cast_fp16 = mul(x = var_1771_cast_fp16_1, y = const_58_promoted_to_fp16)[name = string("op_1773_cast_fp16")]; int32 var_1775 = const()[name = string("op_1775"), val = int32(-1)]; bool var_1776_interleave_0 = const()[name = string("op_1776_interleave_0"), val = bool(false)]; tensor var_1776_cast_fp16 = concat(axis = var_1775, interleave = var_1776_interleave_0, values = (var_1773_cast_fp16, var_1771_cast_fp16_0))[name = string("op_1776_cast_fp16")]; tensor var_1777_cast_fp16 = mul(x = var_1776_cast_fp16, y = sin_cast_fp16)[name = string("op_1777_cast_fp16")]; tensor key_states_23_cast_fp16 = add(x = var_1770_cast_fp16, y = var_1777_cast_fp16)[name = string("key_states_23_cast_fp16")]; tensor expand_dims_51 = const()[name = string("expand_dims_51"), val = tensor([5])]; tensor expand_dims_52 = const()[name = string("expand_dims_52"), val = tensor([0])]; tensor expand_dims_54 = const()[name = string("expand_dims_54"), val = tensor([6])]; tensor concat_43_values3_0 = const()[name = string("concat_43_values3_0"), val = tensor([0])]; int32 concat_43_axis_0 = const()[name = string("concat_43_axis_0"), val = int32(0)]; bool concat_43_interleave_0 = const()[name = string("concat_43_interleave_0"), val = bool(false)]; tensor concat_43 = concat(axis = concat_43_axis_0, interleave = concat_43_interleave_0, values = (expand_dims_51, expand_dims_52, position_id, concat_43_values3_0))[name = string("concat_43")]; tensor concat_44_values1_0 = const()[name = string("concat_44_values1_0"), val = tensor([0])]; tensor concat_44_values3_0 = const()[name = string("concat_44_values3_0"), val = tensor([0])]; int32 concat_44_axis_0 = const()[name = string("concat_44_axis_0"), val = int32(0)]; bool concat_44_interleave_0 = const()[name = string("concat_44_interleave_0"), val = bool(false)]; tensor concat_44 = concat(axis = concat_44_axis_0, interleave = concat_44_interleave_0, values = (expand_dims_54, concat_44_values1_0, var_426, concat_44_values3_0))[name = string("concat_44")]; tensor key_cache_internal_tensor_assign_6_stride_0 = const()[name = string("key_cache_internal_tensor_assign_6_stride_0"), val = tensor([1, 1, 1, 1])]; tensor key_cache_internal_tensor_assign_6_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_6_begin_mask_0"), val = tensor([false, false, false, true])]; tensor key_cache_internal_tensor_assign_6_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_6_end_mask_0"), val = tensor([false, true, false, true])]; tensor key_cache_internal_tensor_assign_6_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_6_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor key_cache_internal_tensor_assign_6_cast_fp16 = slice_update(begin = concat_43, begin_mask = key_cache_internal_tensor_assign_6_begin_mask_0, end = concat_44, end_mask = key_cache_internal_tensor_assign_6_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_6_squeeze_mask_0, stride = key_cache_internal_tensor_assign_6_stride_0, update = key_states_23_cast_fp16, x = coreml_update_state_56)[name = string("key_cache_internal_tensor_assign_6_cast_fp16")]; write_state(data = key_cache_internal_tensor_assign_6_cast_fp16, input = key_cache)[name = string("coreml_update_state_128_write_state")]; tensor coreml_update_state_58 = read_state(input = key_cache)[name = string("coreml_update_state_128")]; tensor value_cache_internal_tensor_assign_6_stride_0 = const()[name = string("value_cache_internal_tensor_assign_6_stride_0"), val = tensor([1, 1, 1, 1])]; tensor value_cache_internal_tensor_assign_6_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_6_begin_mask_0"), val = tensor([false, false, false, true])]; tensor value_cache_internal_tensor_assign_6_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_6_end_mask_0"), val = tensor([false, true, false, true])]; tensor value_cache_internal_tensor_assign_6_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_6_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor value_states_23_cast_fp16 = transpose(perm = value_states_23_perm_0, x = var_1756_cast_fp16)[name = string("transpose_55")]; tensor value_cache_internal_tensor_assign_6_cast_fp16 = slice_update(begin = concat_43, begin_mask = value_cache_internal_tensor_assign_6_begin_mask_0, end = concat_44, end_mask = value_cache_internal_tensor_assign_6_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_6_squeeze_mask_0, stride = value_cache_internal_tensor_assign_6_stride_0, update = value_states_23_cast_fp16, x = coreml_update_state_57)[name = string("value_cache_internal_tensor_assign_6_cast_fp16")]; write_state(data = value_cache_internal_tensor_assign_6_cast_fp16, input = value_cache)[name = string("coreml_update_state_129_write_state")]; tensor coreml_update_state_59 = read_state(input = value_cache)[name = string("coreml_update_state_129")]; tensor var_1820_begin_0 = const()[name = string("op_1820_begin_0"), val = tensor([5, 0, 0, 0])]; tensor var_1820_end_0 = const()[name = string("op_1820_end_0"), val = tensor([6, 2, 2048, 64])]; tensor var_1820_end_mask_0 = const()[name = string("op_1820_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_1820_cast_fp16 = slice_by_index(begin = var_1820_begin_0, end = var_1820_end_0, end_mask = var_1820_end_mask_0, x = coreml_update_state_58)[name = string("op_1820_cast_fp16")]; tensor tile_10 = const()[name = string("tile_10"), val = tensor([1, 1])]; int32 var_1823_axis_0 = const()[name = string("op_1823_axis_0"), val = int32(1)]; tensor var_1823_cast_fp16_0, tensor var_1823_cast_fp16_1 = split(axis = var_1823_axis_0, split_sizes = tile_10, x = var_1820_cast_fp16)[name = string("op_1823_cast_fp16")]; tensor var_1830_begin_0 = const()[name = string("op_1830_begin_0"), val = tensor([5, 0, 0, 0])]; tensor var_1830_end_0 = const()[name = string("op_1830_end_0"), val = tensor([6, 2, 2048, 64])]; tensor var_1830_end_mask_0 = const()[name = string("op_1830_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_1830_cast_fp16 = slice_by_index(begin = var_1830_begin_0, end = var_1830_end_0, end_mask = var_1830_end_mask_0, x = coreml_update_state_59)[name = string("op_1830_cast_fp16")]; tensor tile_11 = const()[name = string("tile_11"), val = tensor([1, 1])]; int32 var_1833_axis_0 = const()[name = string("op_1833_axis_0"), val = int32(1)]; tensor var_1833_cast_fp16_0, tensor var_1833_cast_fp16_1 = split(axis = var_1833_axis_0, split_sizes = tile_11, x = var_1830_cast_fp16)[name = string("op_1833_cast_fp16")]; tensor var_1836_split_sizes_0 = const()[name = string("op_1836_split_sizes_0"), val = tensor([8, 8])]; int32 var_1836_axis_0 = const()[name = string("op_1836_axis_0"), val = int32(1)]; tensor var_1836_cast_fp16_0, tensor var_1836_cast_fp16_1 = split(axis = var_1836_axis_0, split_sizes = var_1836_split_sizes_0, x = query_states_23_cast_fp16)[name = string("op_1836_cast_fp16")]; bool attn_weights_81_transpose_x_0 = const()[name = string("attn_weights_81_transpose_x_0"), val = bool(false)]; bool attn_weights_81_transpose_y_0 = const()[name = string("attn_weights_81_transpose_y_0"), val = bool(false)]; tensor attn_weights_81_cast_fp16 = matmul(transpose_x = attn_weights_81_transpose_x_0, transpose_y = attn_weights_81_transpose_y_0, x = var_1823_cast_fp16_0, y = var_1836_cast_fp16_0)[name = string("attn_weights_81_cast_fp16")]; fp16 _inversed_attn_weights_83_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_83_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_83_cast_fp16 = mul(x = attn_weights_81_cast_fp16, y = _inversed_attn_weights_83_y_0_to_fp16)[name = string("_inversed_attn_weights_83_cast_fp16")]; tensor attn_weights_85_cast_fp16 = add(x = _inversed_attn_weights_83_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_85_cast_fp16")]; int32 var_1843 = const()[name = string("op_1843"), val = int32(2)]; tensor attn_weights_87_cast_fp16 = softmax(axis = var_1843, x = attn_weights_85_cast_fp16)[name = string("attn_weights_87_cast_fp16")]; bool var_1849_transpose_x_1 = const()[name = string("op_1849_transpose_x_1"), val = bool(true)]; bool var_1849_transpose_y_1 = const()[name = string("op_1849_transpose_y_1"), val = bool(false)]; tensor var_1849_cast_fp16 = matmul(transpose_x = var_1849_transpose_x_1, transpose_y = var_1849_transpose_y_1, x = attn_weights_87_cast_fp16, y = var_1833_cast_fp16_0)[name = string("op_1849_cast_fp16")]; bool attn_weights_89_transpose_x_0 = const()[name = string("attn_weights_89_transpose_x_0"), val = bool(false)]; bool attn_weights_89_transpose_y_0 = const()[name = string("attn_weights_89_transpose_y_0"), val = bool(false)]; tensor attn_weights_89_cast_fp16 = matmul(transpose_x = attn_weights_89_transpose_x_0, transpose_y = attn_weights_89_transpose_y_0, x = var_1823_cast_fp16_1, y = var_1836_cast_fp16_1)[name = string("attn_weights_89_cast_fp16")]; fp16 _inversed_attn_weights_91_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_91_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_91_cast_fp16 = mul(x = attn_weights_89_cast_fp16, y = _inversed_attn_weights_91_y_0_to_fp16)[name = string("_inversed_attn_weights_91_cast_fp16")]; tensor attn_weights_93_cast_fp16 = add(x = _inversed_attn_weights_91_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_93_cast_fp16")]; int32 var_1855 = const()[name = string("op_1855"), val = int32(2)]; tensor attn_weights_95_cast_fp16 = softmax(axis = var_1855, x = attn_weights_93_cast_fp16)[name = string("attn_weights_95_cast_fp16")]; bool attn_output_31_transpose_x_1 = const()[name = string("attn_output_31_transpose_x_1"), val = bool(true)]; bool attn_output_31_transpose_y_1 = const()[name = string("attn_output_31_transpose_y_1"), val = bool(false)]; tensor attn_output_31_cast_fp16 = matmul(transpose_x = attn_output_31_transpose_x_1, transpose_y = attn_output_31_transpose_y_1, x = attn_weights_95_cast_fp16, y = var_1833_cast_fp16_1)[name = string("attn_output_31_cast_fp16")]; int32 var_1863 = const()[name = string("op_1863"), val = int32(1)]; bool attn_output_33_interleave_0 = const()[name = string("attn_output_33_interleave_0"), val = bool(false)]; tensor attn_output_33_cast_fp16 = concat(axis = var_1863, interleave = attn_output_33_interleave_0, values = (var_1849_cast_fp16, attn_output_31_cast_fp16))[name = string("attn_output_33_cast_fp16")]; tensor var_1867_perm_0 = const()[name = string("op_1867_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_1872 = const()[name = string("op_1872"), val = tensor([1, 1024, 1, 128])]; tensor var_1867_cast_fp16 = transpose(perm = var_1867_perm_0, x = attn_output_33_cast_fp16)[name = string("transpose_54")]; tensor x_95_cast_fp16 = reshape(shape = var_1872, x = var_1867_cast_fp16)[name = string("x_95_cast_fp16")]; string hidden_states_33_pad_type_0 = const()[name = string("hidden_states_33_pad_type_0"), val = string("valid")]; tensor hidden_states_33_strides_0 = const()[name = string("hidden_states_33_strides_0"), val = tensor([1, 1])]; tensor hidden_states_33_pad_0 = const()[name = string("hidden_states_33_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_33_dilations_0 = const()[name = string("hidden_states_33_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_33_groups_0 = const()[name = string("hidden_states_33_groups_0"), val = int32(1)]; tensor var_1879_to_fp16 = const()[name = string("op_1879_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(160492928)))]; tensor hidden_states_33_cast_fp16 = conv(dilations = hidden_states_33_dilations_0, groups = hidden_states_33_groups_0, pad = hidden_states_33_pad_0, pad_type = hidden_states_33_pad_type_0, strides = hidden_states_33_strides_0, weight = var_1879_to_fp16, x = x_95_cast_fp16)[name = string("hidden_states_33_cast_fp16")]; tensor x_97_cast_fp16 = add(x = x_89_cast_fp16, y = hidden_states_33_cast_fp16)[name = string("x_97_cast_fp16")]; int32 var_1891 = const()[name = string("op_1891"), val = int32(1)]; fp16 const_63_promoted_to_fp16 = const()[name = string("const_63_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_1894_cast_fp16 = mul(x = x_97_cast_fp16, y = const_63_promoted_to_fp16)[name = string("op_1894_cast_fp16")]; bool x_99_interleave_0 = const()[name = string("x_99_interleave_0"), val = bool(false)]; tensor x_99_cast_fp16 = concat(axis = var_1891, interleave = x_99_interleave_0, values = (x_97_cast_fp16, var_1894_cast_fp16))[name = string("x_99_cast_fp16")]; tensor out_67_axes_0 = const()[name = string("out_67_axes_0"), val = tensor([1])]; fp16 var_1904_to_fp16 = const()[name = string("op_1904_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_67_cast_fp16 = layer_norm(axes = out_67_axes_0, epsilon = var_1904_to_fp16, x = x_99_cast_fp16)[name = string("out_67_cast_fp16")]; tensor layer_layers_5_post_attention_layernorm_weight_to_fp16 = const()[name = string("layer_layers_5_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(162590144)))]; tensor out_69_cast_fp16 = mul(x = out_67_cast_fp16, y = layer_layers_5_post_attention_layernorm_weight_to_fp16)[name = string("out_69_cast_fp16")]; tensor var_1910_split_sizes_0 = const()[name = string("op_1910_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_1910_axis_0 = const()[name = string("op_1910_axis_0"), val = int32(1)]; tensor var_1910_cast_fp16_0, tensor var_1910_cast_fp16_1 = split(axis = var_1910_axis_0, split_sizes = var_1910_split_sizes_0, x = out_69_cast_fp16)[name = string("op_1910_cast_fp16")]; string input_11_pad_type_0 = const()[name = string("input_11_pad_type_0"), val = string("valid")]; tensor input_11_strides_0 = const()[name = string("input_11_strides_0"), val = tensor([1, 1])]; tensor input_11_pad_0 = const()[name = string("input_11_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_11_dilations_0 = const()[name = string("input_11_dilations_0"), val = tensor([1, 1])]; int32 input_11_groups_0 = const()[name = string("input_11_groups_0"), val = int32(1)]; tensor var_1915_to_fp16 = const()[name = string("op_1915_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(162594304)))]; tensor input_11_cast_fp16 = conv(dilations = input_11_dilations_0, groups = input_11_groups_0, pad = input_11_pad_0, pad_type = input_11_pad_type_0, strides = input_11_strides_0, weight = var_1915_to_fp16, x = var_1910_cast_fp16_0)[name = string("input_11_cast_fp16")]; tensor var_1926_cast_fp16 = silu(x = input_11_cast_fp16)[name = string("op_1926_cast_fp16")]; string var_1931_pad_type_0 = const()[name = string("op_1931_pad_type_0"), val = string("valid")]; tensor var_1931_strides_0 = const()[name = string("op_1931_strides_0"), val = tensor([1, 1])]; tensor var_1931_pad_0 = const()[name = string("op_1931_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_1931_dilations_0 = const()[name = string("op_1931_dilations_0"), val = tensor([1, 1])]; int32 var_1931_groups_0 = const()[name = string("op_1931_groups_0"), val = int32(1)]; tensor var_1914_to_fp16 = const()[name = string("op_1914_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(170982976)))]; tensor var_1931_cast_fp16 = conv(dilations = var_1931_dilations_0, groups = var_1931_groups_0, pad = var_1931_pad_0, pad_type = var_1931_pad_type_0, strides = var_1931_strides_0, weight = var_1914_to_fp16, x = var_1910_cast_fp16_0)[name = string("op_1931_cast_fp16")]; tensor x_105_cast_fp16 = mul(x = var_1926_cast_fp16, y = var_1931_cast_fp16)[name = string("x_105_cast_fp16")]; string hidden_states_35_pad_type_0 = const()[name = string("hidden_states_35_pad_type_0"), val = string("valid")]; tensor hidden_states_35_strides_0 = const()[name = string("hidden_states_35_strides_0"), val = tensor([1, 1])]; tensor hidden_states_35_pad_0 = const()[name = string("hidden_states_35_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_35_dilations_0 = const()[name = string("hidden_states_35_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_35_groups_0 = const()[name = string("hidden_states_35_groups_0"), val = int32(1)]; tensor var_1913_to_fp16 = const()[name = string("op_1913_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(179371648)))]; tensor hidden_states_35_cast_fp16 = conv(dilations = hidden_states_35_dilations_0, groups = hidden_states_35_groups_0, pad = hidden_states_35_pad_0, pad_type = hidden_states_35_pad_type_0, strides = hidden_states_35_strides_0, weight = var_1913_to_fp16, x = x_105_cast_fp16)[name = string("hidden_states_35_cast_fp16")]; tensor x_107_cast_fp16 = add(x = x_97_cast_fp16, y = hidden_states_35_cast_fp16)[name = string("x_107_cast_fp16")]; int32 var_1944 = const()[name = string("op_1944"), val = int32(1)]; fp16 const_64_promoted_to_fp16 = const()[name = string("const_64_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_1947_cast_fp16 = mul(x = x_107_cast_fp16, y = const_64_promoted_to_fp16)[name = string("op_1947_cast_fp16")]; bool x_109_interleave_0 = const()[name = string("x_109_interleave_0"), val = bool(false)]; tensor x_109_cast_fp16 = concat(axis = var_1944, interleave = x_109_interleave_0, values = (x_107_cast_fp16, var_1947_cast_fp16))[name = string("x_109_cast_fp16")]; tensor out_73_axes_0 = const()[name = string("out_73_axes_0"), val = tensor([1])]; fp16 var_1957_to_fp16 = const()[name = string("op_1957_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_73_cast_fp16 = layer_norm(axes = out_73_axes_0, epsilon = var_1957_to_fp16, x = x_109_cast_fp16)[name = string("out_73_cast_fp16")]; tensor layer_layers_6_input_layernorm_weight_to_fp16 = const()[name = string("layer_layers_6_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(187760320)))]; tensor out_75_cast_fp16 = mul(x = out_73_cast_fp16, y = layer_layers_6_input_layernorm_weight_to_fp16)[name = string("out_75_cast_fp16")]; tensor var_1963_split_sizes_0 = const()[name = string("op_1963_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_1963_axis_0 = const()[name = string("op_1963_axis_0"), val = int32(1)]; tensor var_1963_cast_fp16_0, tensor var_1963_cast_fp16_1 = split(axis = var_1963_axis_0, split_sizes = var_1963_split_sizes_0, x = out_75_cast_fp16)[name = string("op_1963_cast_fp16")]; string query_states_25_pad_type_0 = const()[name = string("query_states_25_pad_type_0"), val = string("valid")]; tensor query_states_25_strides_0 = const()[name = string("query_states_25_strides_0"), val = tensor([1, 1])]; tensor query_states_25_pad_0 = const()[name = string("query_states_25_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_states_25_dilations_0 = const()[name = string("query_states_25_dilations_0"), val = tensor([1, 1])]; int32 query_states_25_groups_0 = const()[name = string("query_states_25_groups_0"), val = int32(1)]; tensor var_1985_to_fp16 = const()[name = string("op_1985_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(187764480)))]; tensor query_states_25_cast_fp16 = conv(dilations = query_states_25_dilations_0, groups = query_states_25_groups_0, pad = query_states_25_pad_0, pad_type = query_states_25_pad_type_0, strides = query_states_25_strides_0, weight = var_1985_to_fp16, x = var_1963_cast_fp16_0)[name = string("query_states_25_cast_fp16")]; string key_states_25_pad_type_0 = const()[name = string("key_states_25_pad_type_0"), val = string("valid")]; tensor key_states_25_strides_0 = const()[name = string("key_states_25_strides_0"), val = tensor([1, 1])]; tensor key_states_25_pad_0 = const()[name = string("key_states_25_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_states_25_dilations_0 = const()[name = string("key_states_25_dilations_0"), val = tensor([1, 1])]; int32 key_states_25_groups_0 = const()[name = string("key_states_25_groups_0"), val = int32(1)]; tensor var_1996_to_fp16 = const()[name = string("op_1996_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(189861696)))]; tensor key_states_25_cast_fp16 = conv(dilations = key_states_25_dilations_0, groups = key_states_25_groups_0, pad = key_states_25_pad_0, pad_type = key_states_25_pad_type_0, strides = key_states_25_strides_0, weight = var_1996_to_fp16, x = var_1963_cast_fp16_0)[name = string("key_states_25_cast_fp16")]; string value_states_25_pad_type_0 = const()[name = string("value_states_25_pad_type_0"), val = string("valid")]; tensor value_states_25_strides_0 = const()[name = string("value_states_25_strides_0"), val = tensor([1, 1])]; tensor value_states_25_pad_0 = const()[name = string("value_states_25_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_states_25_dilations_0 = const()[name = string("value_states_25_dilations_0"), val = tensor([1, 1])]; int32 value_states_25_groups_0 = const()[name = string("value_states_25_groups_0"), val = int32(1)]; tensor var_2007_to_fp16 = const()[name = string("op_2007_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(190123904)))]; tensor value_states_25_cast_fp16 = conv(dilations = value_states_25_dilations_0, groups = value_states_25_groups_0, pad = value_states_25_pad_0, pad_type = value_states_25_pad_type_0, strides = value_states_25_strides_0, weight = var_2007_to_fp16, x = var_1963_cast_fp16_0)[name = string("value_states_25_cast_fp16")]; tensor var_2015 = const()[name = string("op_2015"), val = tensor([1, 16, 64, 128])]; tensor embed_25_cast_fp16 = reshape(shape = var_2015, x = query_states_25_cast_fp16)[name = string("embed_25_cast_fp16")]; tensor var_2019 = const()[name = string("op_2019"), val = tensor([1, 2, 64, 128])]; tensor var_2020_cast_fp16 = reshape(shape = var_2019, x = key_states_25_cast_fp16)[name = string("op_2020_cast_fp16")]; tensor embed_27_perm_0 = const()[name = string("embed_27_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_2026 = const()[name = string("op_2026"), val = tensor([1, 2, 64, 128])]; tensor var_2027_cast_fp16 = reshape(shape = var_2026, x = value_states_25_cast_fp16)[name = string("op_2027_cast_fp16")]; tensor value_states_27_perm_0 = const()[name = string("value_states_27_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_2031_cast_fp16 = mul(x = embed_25_cast_fp16, y = cos_1_cast_fp16)[name = string("op_2031_cast_fp16")]; tensor var_2032_split_sizes_0 = const()[name = string("op_2032_split_sizes_0"), val = tensor([32, 32])]; int32 var_2032_axis_0 = const()[name = string("op_2032_axis_0"), val = int32(-2)]; tensor var_2032_cast_fp16_0, tensor var_2032_cast_fp16_1 = split(axis = var_2032_axis_0, split_sizes = var_2032_split_sizes_0, x = embed_25_cast_fp16)[name = string("op_2032_cast_fp16")]; fp16 const_67_promoted_to_fp16 = const()[name = string("const_67_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_2034_cast_fp16 = mul(x = var_2032_cast_fp16_1, y = const_67_promoted_to_fp16)[name = string("op_2034_cast_fp16")]; int32 var_2036 = const()[name = string("op_2036"), val = int32(-2)]; bool var_2037_interleave_0 = const()[name = string("op_2037_interleave_0"), val = bool(false)]; tensor var_2037_cast_fp16 = concat(axis = var_2036, interleave = var_2037_interleave_0, values = (var_2034_cast_fp16, var_2032_cast_fp16_0))[name = string("op_2037_cast_fp16")]; tensor var_2038_cast_fp16 = mul(x = var_2037_cast_fp16, y = sin_1_cast_fp16)[name = string("op_2038_cast_fp16")]; tensor query_states_27_cast_fp16 = add(x = var_2031_cast_fp16, y = var_2038_cast_fp16)[name = string("query_states_27_cast_fp16")]; tensor embed_27_cast_fp16 = transpose(perm = embed_27_perm_0, x = var_2020_cast_fp16)[name = string("transpose_53")]; tensor var_2041_cast_fp16 = mul(x = embed_27_cast_fp16, y = cos_cast_fp16)[name = string("op_2041_cast_fp16")]; tensor var_2042_split_sizes_0 = const()[name = string("op_2042_split_sizes_0"), val = tensor([32, 32])]; int32 var_2042_axis_0 = const()[name = string("op_2042_axis_0"), val = int32(-1)]; tensor var_2042_cast_fp16_0, tensor var_2042_cast_fp16_1 = split(axis = var_2042_axis_0, split_sizes = var_2042_split_sizes_0, x = embed_27_cast_fp16)[name = string("op_2042_cast_fp16")]; fp16 const_68_promoted_to_fp16 = const()[name = string("const_68_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_2044_cast_fp16 = mul(x = var_2042_cast_fp16_1, y = const_68_promoted_to_fp16)[name = string("op_2044_cast_fp16")]; int32 var_2046 = const()[name = string("op_2046"), val = int32(-1)]; bool var_2047_interleave_0 = const()[name = string("op_2047_interleave_0"), val = bool(false)]; tensor var_2047_cast_fp16 = concat(axis = var_2046, interleave = var_2047_interleave_0, values = (var_2044_cast_fp16, var_2042_cast_fp16_0))[name = string("op_2047_cast_fp16")]; tensor var_2048_cast_fp16 = mul(x = var_2047_cast_fp16, y = sin_cast_fp16)[name = string("op_2048_cast_fp16")]; tensor key_states_27_cast_fp16 = add(x = var_2041_cast_fp16, y = var_2048_cast_fp16)[name = string("key_states_27_cast_fp16")]; tensor expand_dims_61 = const()[name = string("expand_dims_61"), val = tensor([6])]; tensor expand_dims_62 = const()[name = string("expand_dims_62"), val = tensor([0])]; tensor expand_dims_64 = const()[name = string("expand_dims_64"), val = tensor([7])]; tensor concat_51_values3_0 = const()[name = string("concat_51_values3_0"), val = tensor([0])]; int32 concat_51_axis_0 = const()[name = string("concat_51_axis_0"), val = int32(0)]; bool concat_51_interleave_0 = const()[name = string("concat_51_interleave_0"), val = bool(false)]; tensor concat_51 = concat(axis = concat_51_axis_0, interleave = concat_51_interleave_0, values = (expand_dims_61, expand_dims_62, position_id, concat_51_values3_0))[name = string("concat_51")]; tensor concat_52_values1_0 = const()[name = string("concat_52_values1_0"), val = tensor([0])]; tensor concat_52_values3_0 = const()[name = string("concat_52_values3_0"), val = tensor([0])]; int32 concat_52_axis_0 = const()[name = string("concat_52_axis_0"), val = int32(0)]; bool concat_52_interleave_0 = const()[name = string("concat_52_interleave_0"), val = bool(false)]; tensor concat_52 = concat(axis = concat_52_axis_0, interleave = concat_52_interleave_0, values = (expand_dims_64, concat_52_values1_0, var_426, concat_52_values3_0))[name = string("concat_52")]; tensor key_cache_internal_tensor_assign_7_stride_0 = const()[name = string("key_cache_internal_tensor_assign_7_stride_0"), val = tensor([1, 1, 1, 1])]; tensor key_cache_internal_tensor_assign_7_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_7_begin_mask_0"), val = tensor([false, false, false, true])]; tensor key_cache_internal_tensor_assign_7_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_7_end_mask_0"), val = tensor([false, true, false, true])]; tensor key_cache_internal_tensor_assign_7_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_7_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor key_cache_internal_tensor_assign_7_cast_fp16 = slice_update(begin = concat_51, begin_mask = key_cache_internal_tensor_assign_7_begin_mask_0, end = concat_52, end_mask = key_cache_internal_tensor_assign_7_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_7_squeeze_mask_0, stride = key_cache_internal_tensor_assign_7_stride_0, update = key_states_27_cast_fp16, x = coreml_update_state_58)[name = string("key_cache_internal_tensor_assign_7_cast_fp16")]; write_state(data = key_cache_internal_tensor_assign_7_cast_fp16, input = key_cache)[name = string("coreml_update_state_130_write_state")]; tensor coreml_update_state_60 = read_state(input = key_cache)[name = string("coreml_update_state_130")]; tensor value_cache_internal_tensor_assign_7_stride_0 = const()[name = string("value_cache_internal_tensor_assign_7_stride_0"), val = tensor([1, 1, 1, 1])]; tensor value_cache_internal_tensor_assign_7_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_7_begin_mask_0"), val = tensor([false, false, false, true])]; tensor value_cache_internal_tensor_assign_7_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_7_end_mask_0"), val = tensor([false, true, false, true])]; tensor value_cache_internal_tensor_assign_7_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_7_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor value_states_27_cast_fp16 = transpose(perm = value_states_27_perm_0, x = var_2027_cast_fp16)[name = string("transpose_52")]; tensor value_cache_internal_tensor_assign_7_cast_fp16 = slice_update(begin = concat_51, begin_mask = value_cache_internal_tensor_assign_7_begin_mask_0, end = concat_52, end_mask = value_cache_internal_tensor_assign_7_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_7_squeeze_mask_0, stride = value_cache_internal_tensor_assign_7_stride_0, update = value_states_27_cast_fp16, x = coreml_update_state_59)[name = string("value_cache_internal_tensor_assign_7_cast_fp16")]; write_state(data = value_cache_internal_tensor_assign_7_cast_fp16, input = value_cache)[name = string("coreml_update_state_131_write_state")]; tensor coreml_update_state_61 = read_state(input = value_cache)[name = string("coreml_update_state_131")]; tensor var_2091_begin_0 = const()[name = string("op_2091_begin_0"), val = tensor([6, 0, 0, 0])]; tensor var_2091_end_0 = const()[name = string("op_2091_end_0"), val = tensor([7, 2, 2048, 64])]; tensor var_2091_end_mask_0 = const()[name = string("op_2091_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_2091_cast_fp16 = slice_by_index(begin = var_2091_begin_0, end = var_2091_end_0, end_mask = var_2091_end_mask_0, x = coreml_update_state_60)[name = string("op_2091_cast_fp16")]; tensor tile_12 = const()[name = string("tile_12"), val = tensor([1, 1])]; int32 var_2094_axis_0 = const()[name = string("op_2094_axis_0"), val = int32(1)]; tensor var_2094_cast_fp16_0, tensor var_2094_cast_fp16_1 = split(axis = var_2094_axis_0, split_sizes = tile_12, x = var_2091_cast_fp16)[name = string("op_2094_cast_fp16")]; tensor var_2101_begin_0 = const()[name = string("op_2101_begin_0"), val = tensor([6, 0, 0, 0])]; tensor var_2101_end_0 = const()[name = string("op_2101_end_0"), val = tensor([7, 2, 2048, 64])]; tensor var_2101_end_mask_0 = const()[name = string("op_2101_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_2101_cast_fp16 = slice_by_index(begin = var_2101_begin_0, end = var_2101_end_0, end_mask = var_2101_end_mask_0, x = coreml_update_state_61)[name = string("op_2101_cast_fp16")]; tensor tile_13 = const()[name = string("tile_13"), val = tensor([1, 1])]; int32 var_2104_axis_0 = const()[name = string("op_2104_axis_0"), val = int32(1)]; tensor var_2104_cast_fp16_0, tensor var_2104_cast_fp16_1 = split(axis = var_2104_axis_0, split_sizes = tile_13, x = var_2101_cast_fp16)[name = string("op_2104_cast_fp16")]; tensor var_2107_split_sizes_0 = const()[name = string("op_2107_split_sizes_0"), val = tensor([8, 8])]; int32 var_2107_axis_0 = const()[name = string("op_2107_axis_0"), val = int32(1)]; tensor var_2107_cast_fp16_0, tensor var_2107_cast_fp16_1 = split(axis = var_2107_axis_0, split_sizes = var_2107_split_sizes_0, x = query_states_27_cast_fp16)[name = string("op_2107_cast_fp16")]; bool attn_weights_97_transpose_x_0 = const()[name = string("attn_weights_97_transpose_x_0"), val = bool(false)]; bool attn_weights_97_transpose_y_0 = const()[name = string("attn_weights_97_transpose_y_0"), val = bool(false)]; tensor attn_weights_97_cast_fp16 = matmul(transpose_x = attn_weights_97_transpose_x_0, transpose_y = attn_weights_97_transpose_y_0, x = var_2094_cast_fp16_0, y = var_2107_cast_fp16_0)[name = string("attn_weights_97_cast_fp16")]; fp16 _inversed_attn_weights_99_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_99_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_99_cast_fp16 = mul(x = attn_weights_97_cast_fp16, y = _inversed_attn_weights_99_y_0_to_fp16)[name = string("_inversed_attn_weights_99_cast_fp16")]; tensor attn_weights_101_cast_fp16 = add(x = _inversed_attn_weights_99_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_101_cast_fp16")]; int32 var_2114 = const()[name = string("op_2114"), val = int32(2)]; tensor attn_weights_103_cast_fp16 = softmax(axis = var_2114, x = attn_weights_101_cast_fp16)[name = string("attn_weights_103_cast_fp16")]; bool var_2120_transpose_x_1 = const()[name = string("op_2120_transpose_x_1"), val = bool(true)]; bool var_2120_transpose_y_1 = const()[name = string("op_2120_transpose_y_1"), val = bool(false)]; tensor var_2120_cast_fp16 = matmul(transpose_x = var_2120_transpose_x_1, transpose_y = var_2120_transpose_y_1, x = attn_weights_103_cast_fp16, y = var_2104_cast_fp16_0)[name = string("op_2120_cast_fp16")]; bool attn_weights_105_transpose_x_0 = const()[name = string("attn_weights_105_transpose_x_0"), val = bool(false)]; bool attn_weights_105_transpose_y_0 = const()[name = string("attn_weights_105_transpose_y_0"), val = bool(false)]; tensor attn_weights_105_cast_fp16 = matmul(transpose_x = attn_weights_105_transpose_x_0, transpose_y = attn_weights_105_transpose_y_0, x = var_2094_cast_fp16_1, y = var_2107_cast_fp16_1)[name = string("attn_weights_105_cast_fp16")]; fp16 _inversed_attn_weights_107_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_107_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_107_cast_fp16 = mul(x = attn_weights_105_cast_fp16, y = _inversed_attn_weights_107_y_0_to_fp16)[name = string("_inversed_attn_weights_107_cast_fp16")]; tensor attn_weights_109_cast_fp16 = add(x = _inversed_attn_weights_107_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_109_cast_fp16")]; int32 var_2126 = const()[name = string("op_2126"), val = int32(2)]; tensor attn_weights_111_cast_fp16 = softmax(axis = var_2126, x = attn_weights_109_cast_fp16)[name = string("attn_weights_111_cast_fp16")]; bool attn_output_37_transpose_x_1 = const()[name = string("attn_output_37_transpose_x_1"), val = bool(true)]; bool attn_output_37_transpose_y_1 = const()[name = string("attn_output_37_transpose_y_1"), val = bool(false)]; tensor attn_output_37_cast_fp16 = matmul(transpose_x = attn_output_37_transpose_x_1, transpose_y = attn_output_37_transpose_y_1, x = attn_weights_111_cast_fp16, y = var_2104_cast_fp16_1)[name = string("attn_output_37_cast_fp16")]; int32 var_2134 = const()[name = string("op_2134"), val = int32(1)]; bool attn_output_39_interleave_0 = const()[name = string("attn_output_39_interleave_0"), val = bool(false)]; tensor attn_output_39_cast_fp16 = concat(axis = var_2134, interleave = attn_output_39_interleave_0, values = (var_2120_cast_fp16, attn_output_37_cast_fp16))[name = string("attn_output_39_cast_fp16")]; tensor var_2138_perm_0 = const()[name = string("op_2138_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_2143 = const()[name = string("op_2143"), val = tensor([1, 1024, 1, 128])]; tensor var_2138_cast_fp16 = transpose(perm = var_2138_perm_0, x = attn_output_39_cast_fp16)[name = string("transpose_51")]; tensor x_113_cast_fp16 = reshape(shape = var_2143, x = var_2138_cast_fp16)[name = string("x_113_cast_fp16")]; string hidden_states_39_pad_type_0 = const()[name = string("hidden_states_39_pad_type_0"), val = string("valid")]; tensor hidden_states_39_strides_0 = const()[name = string("hidden_states_39_strides_0"), val = tensor([1, 1])]; tensor hidden_states_39_pad_0 = const()[name = string("hidden_states_39_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_39_dilations_0 = const()[name = string("hidden_states_39_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_39_groups_0 = const()[name = string("hidden_states_39_groups_0"), val = int32(1)]; tensor var_2150_to_fp16 = const()[name = string("op_2150_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(190386112)))]; tensor hidden_states_39_cast_fp16 = conv(dilations = hidden_states_39_dilations_0, groups = hidden_states_39_groups_0, pad = hidden_states_39_pad_0, pad_type = hidden_states_39_pad_type_0, strides = hidden_states_39_strides_0, weight = var_2150_to_fp16, x = x_113_cast_fp16)[name = string("hidden_states_39_cast_fp16")]; tensor x_115_cast_fp16 = add(x = x_107_cast_fp16, y = hidden_states_39_cast_fp16)[name = string("x_115_cast_fp16")]; int32 var_2162 = const()[name = string("op_2162"), val = int32(1)]; fp16 const_73_promoted_to_fp16 = const()[name = string("const_73_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_2165_cast_fp16 = mul(x = x_115_cast_fp16, y = const_73_promoted_to_fp16)[name = string("op_2165_cast_fp16")]; bool x_117_interleave_0 = const()[name = string("x_117_interleave_0"), val = bool(false)]; tensor x_117_cast_fp16 = concat(axis = var_2162, interleave = x_117_interleave_0, values = (x_115_cast_fp16, var_2165_cast_fp16))[name = string("x_117_cast_fp16")]; tensor out_79_axes_0 = const()[name = string("out_79_axes_0"), val = tensor([1])]; fp16 var_2175_to_fp16 = const()[name = string("op_2175_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_79_cast_fp16 = layer_norm(axes = out_79_axes_0, epsilon = var_2175_to_fp16, x = x_117_cast_fp16)[name = string("out_79_cast_fp16")]; tensor layer_layers_6_post_attention_layernorm_weight_to_fp16 = const()[name = string("layer_layers_6_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(192483328)))]; tensor out_81_cast_fp16 = mul(x = out_79_cast_fp16, y = layer_layers_6_post_attention_layernorm_weight_to_fp16)[name = string("out_81_cast_fp16")]; tensor var_2181_split_sizes_0 = const()[name = string("op_2181_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_2181_axis_0 = const()[name = string("op_2181_axis_0"), val = int32(1)]; tensor var_2181_cast_fp16_0, tensor var_2181_cast_fp16_1 = split(axis = var_2181_axis_0, split_sizes = var_2181_split_sizes_0, x = out_81_cast_fp16)[name = string("op_2181_cast_fp16")]; string input_13_pad_type_0 = const()[name = string("input_13_pad_type_0"), val = string("valid")]; tensor input_13_strides_0 = const()[name = string("input_13_strides_0"), val = tensor([1, 1])]; tensor input_13_pad_0 = const()[name = string("input_13_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_13_dilations_0 = const()[name = string("input_13_dilations_0"), val = tensor([1, 1])]; int32 input_13_groups_0 = const()[name = string("input_13_groups_0"), val = int32(1)]; tensor var_2186_to_fp16 = const()[name = string("op_2186_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(192487488)))]; tensor input_13_cast_fp16 = conv(dilations = input_13_dilations_0, groups = input_13_groups_0, pad = input_13_pad_0, pad_type = input_13_pad_type_0, strides = input_13_strides_0, weight = var_2186_to_fp16, x = var_2181_cast_fp16_0)[name = string("input_13_cast_fp16")]; tensor var_2197_cast_fp16 = silu(x = input_13_cast_fp16)[name = string("op_2197_cast_fp16")]; string var_2202_pad_type_0 = const()[name = string("op_2202_pad_type_0"), val = string("valid")]; tensor var_2202_strides_0 = const()[name = string("op_2202_strides_0"), val = tensor([1, 1])]; tensor var_2202_pad_0 = const()[name = string("op_2202_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_2202_dilations_0 = const()[name = string("op_2202_dilations_0"), val = tensor([1, 1])]; int32 var_2202_groups_0 = const()[name = string("op_2202_groups_0"), val = int32(1)]; tensor var_2185_to_fp16 = const()[name = string("op_2185_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(200876160)))]; tensor var_2202_cast_fp16 = conv(dilations = var_2202_dilations_0, groups = var_2202_groups_0, pad = var_2202_pad_0, pad_type = var_2202_pad_type_0, strides = var_2202_strides_0, weight = var_2185_to_fp16, x = var_2181_cast_fp16_0)[name = string("op_2202_cast_fp16")]; tensor x_123_cast_fp16 = mul(x = var_2197_cast_fp16, y = var_2202_cast_fp16)[name = string("x_123_cast_fp16")]; string hidden_states_41_pad_type_0 = const()[name = string("hidden_states_41_pad_type_0"), val = string("valid")]; tensor hidden_states_41_strides_0 = const()[name = string("hidden_states_41_strides_0"), val = tensor([1, 1])]; tensor hidden_states_41_pad_0 = const()[name = string("hidden_states_41_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_41_dilations_0 = const()[name = string("hidden_states_41_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_41_groups_0 = const()[name = string("hidden_states_41_groups_0"), val = int32(1)]; tensor var_2184_to_fp16 = const()[name = string("op_2184_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(209264832)))]; tensor hidden_states_41_cast_fp16 = conv(dilations = hidden_states_41_dilations_0, groups = hidden_states_41_groups_0, pad = hidden_states_41_pad_0, pad_type = hidden_states_41_pad_type_0, strides = hidden_states_41_strides_0, weight = var_2184_to_fp16, x = x_123_cast_fp16)[name = string("hidden_states_41_cast_fp16")]; tensor x_125_cast_fp16 = add(x = x_115_cast_fp16, y = hidden_states_41_cast_fp16)[name = string("x_125_cast_fp16")]; int32 var_2215 = const()[name = string("op_2215"), val = int32(1)]; fp16 const_74_promoted_to_fp16 = const()[name = string("const_74_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_2218_cast_fp16 = mul(x = x_125_cast_fp16, y = const_74_promoted_to_fp16)[name = string("op_2218_cast_fp16")]; bool x_127_interleave_0 = const()[name = string("x_127_interleave_0"), val = bool(false)]; tensor x_127_cast_fp16 = concat(axis = var_2215, interleave = x_127_interleave_0, values = (x_125_cast_fp16, var_2218_cast_fp16))[name = string("x_127_cast_fp16")]; tensor out_85_axes_0 = const()[name = string("out_85_axes_0"), val = tensor([1])]; fp16 var_2228_to_fp16 = const()[name = string("op_2228_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_85_cast_fp16 = layer_norm(axes = out_85_axes_0, epsilon = var_2228_to_fp16, x = x_127_cast_fp16)[name = string("out_85_cast_fp16")]; tensor layer_layers_7_input_layernorm_weight_to_fp16 = const()[name = string("layer_layers_7_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(217653504)))]; tensor out_87_cast_fp16 = mul(x = out_85_cast_fp16, y = layer_layers_7_input_layernorm_weight_to_fp16)[name = string("out_87_cast_fp16")]; tensor var_2234_split_sizes_0 = const()[name = string("op_2234_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_2234_axis_0 = const()[name = string("op_2234_axis_0"), val = int32(1)]; tensor var_2234_cast_fp16_0, tensor var_2234_cast_fp16_1 = split(axis = var_2234_axis_0, split_sizes = var_2234_split_sizes_0, x = out_87_cast_fp16)[name = string("op_2234_cast_fp16")]; string query_states_29_pad_type_0 = const()[name = string("query_states_29_pad_type_0"), val = string("valid")]; tensor query_states_29_strides_0 = const()[name = string("query_states_29_strides_0"), val = tensor([1, 1])]; tensor query_states_29_pad_0 = const()[name = string("query_states_29_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_states_29_dilations_0 = const()[name = string("query_states_29_dilations_0"), val = tensor([1, 1])]; int32 query_states_29_groups_0 = const()[name = string("query_states_29_groups_0"), val = int32(1)]; tensor var_2256_to_fp16 = const()[name = string("op_2256_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(217657664)))]; tensor query_states_29_cast_fp16 = conv(dilations = query_states_29_dilations_0, groups = query_states_29_groups_0, pad = query_states_29_pad_0, pad_type = query_states_29_pad_type_0, strides = query_states_29_strides_0, weight = var_2256_to_fp16, x = var_2234_cast_fp16_0)[name = string("query_states_29_cast_fp16")]; string key_states_29_pad_type_0 = const()[name = string("key_states_29_pad_type_0"), val = string("valid")]; tensor key_states_29_strides_0 = const()[name = string("key_states_29_strides_0"), val = tensor([1, 1])]; tensor key_states_29_pad_0 = const()[name = string("key_states_29_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_states_29_dilations_0 = const()[name = string("key_states_29_dilations_0"), val = tensor([1, 1])]; int32 key_states_29_groups_0 = const()[name = string("key_states_29_groups_0"), val = int32(1)]; tensor var_2267_to_fp16 = const()[name = string("op_2267_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(219754880)))]; tensor key_states_29_cast_fp16 = conv(dilations = key_states_29_dilations_0, groups = key_states_29_groups_0, pad = key_states_29_pad_0, pad_type = key_states_29_pad_type_0, strides = key_states_29_strides_0, weight = var_2267_to_fp16, x = var_2234_cast_fp16_0)[name = string("key_states_29_cast_fp16")]; string value_states_29_pad_type_0 = const()[name = string("value_states_29_pad_type_0"), val = string("valid")]; tensor value_states_29_strides_0 = const()[name = string("value_states_29_strides_0"), val = tensor([1, 1])]; tensor value_states_29_pad_0 = const()[name = string("value_states_29_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_states_29_dilations_0 = const()[name = string("value_states_29_dilations_0"), val = tensor([1, 1])]; int32 value_states_29_groups_0 = const()[name = string("value_states_29_groups_0"), val = int32(1)]; tensor var_2278_to_fp16 = const()[name = string("op_2278_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(220017088)))]; tensor value_states_29_cast_fp16 = conv(dilations = value_states_29_dilations_0, groups = value_states_29_groups_0, pad = value_states_29_pad_0, pad_type = value_states_29_pad_type_0, strides = value_states_29_strides_0, weight = var_2278_to_fp16, x = var_2234_cast_fp16_0)[name = string("value_states_29_cast_fp16")]; tensor var_2286 = const()[name = string("op_2286"), val = tensor([1, 16, 64, 128])]; tensor embed_29_cast_fp16 = reshape(shape = var_2286, x = query_states_29_cast_fp16)[name = string("embed_29_cast_fp16")]; tensor var_2290 = const()[name = string("op_2290"), val = tensor([1, 2, 64, 128])]; tensor var_2291_cast_fp16 = reshape(shape = var_2290, x = key_states_29_cast_fp16)[name = string("op_2291_cast_fp16")]; tensor embed_31_perm_0 = const()[name = string("embed_31_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_2297 = const()[name = string("op_2297"), val = tensor([1, 2, 64, 128])]; tensor var_2298_cast_fp16 = reshape(shape = var_2297, x = value_states_29_cast_fp16)[name = string("op_2298_cast_fp16")]; tensor value_states_31_perm_0 = const()[name = string("value_states_31_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_2302_cast_fp16 = mul(x = embed_29_cast_fp16, y = cos_1_cast_fp16)[name = string("op_2302_cast_fp16")]; tensor var_2303_split_sizes_0 = const()[name = string("op_2303_split_sizes_0"), val = tensor([32, 32])]; int32 var_2303_axis_0 = const()[name = string("op_2303_axis_0"), val = int32(-2)]; tensor var_2303_cast_fp16_0, tensor var_2303_cast_fp16_1 = split(axis = var_2303_axis_0, split_sizes = var_2303_split_sizes_0, x = embed_29_cast_fp16)[name = string("op_2303_cast_fp16")]; fp16 const_77_promoted_to_fp16 = const()[name = string("const_77_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_2305_cast_fp16 = mul(x = var_2303_cast_fp16_1, y = const_77_promoted_to_fp16)[name = string("op_2305_cast_fp16")]; int32 var_2307 = const()[name = string("op_2307"), val = int32(-2)]; bool var_2308_interleave_0 = const()[name = string("op_2308_interleave_0"), val = bool(false)]; tensor var_2308_cast_fp16 = concat(axis = var_2307, interleave = var_2308_interleave_0, values = (var_2305_cast_fp16, var_2303_cast_fp16_0))[name = string("op_2308_cast_fp16")]; tensor var_2309_cast_fp16 = mul(x = var_2308_cast_fp16, y = sin_1_cast_fp16)[name = string("op_2309_cast_fp16")]; tensor query_states_31_cast_fp16 = add(x = var_2302_cast_fp16, y = var_2309_cast_fp16)[name = string("query_states_31_cast_fp16")]; tensor embed_31_cast_fp16 = transpose(perm = embed_31_perm_0, x = var_2291_cast_fp16)[name = string("transpose_50")]; tensor var_2312_cast_fp16 = mul(x = embed_31_cast_fp16, y = cos_cast_fp16)[name = string("op_2312_cast_fp16")]; tensor var_2313_split_sizes_0 = const()[name = string("op_2313_split_sizes_0"), val = tensor([32, 32])]; int32 var_2313_axis_0 = const()[name = string("op_2313_axis_0"), val = int32(-1)]; tensor var_2313_cast_fp16_0, tensor var_2313_cast_fp16_1 = split(axis = var_2313_axis_0, split_sizes = var_2313_split_sizes_0, x = embed_31_cast_fp16)[name = string("op_2313_cast_fp16")]; fp16 const_78_promoted_to_fp16 = const()[name = string("const_78_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_2315_cast_fp16 = mul(x = var_2313_cast_fp16_1, y = const_78_promoted_to_fp16)[name = string("op_2315_cast_fp16")]; int32 var_2317 = const()[name = string("op_2317"), val = int32(-1)]; bool var_2318_interleave_0 = const()[name = string("op_2318_interleave_0"), val = bool(false)]; tensor var_2318_cast_fp16 = concat(axis = var_2317, interleave = var_2318_interleave_0, values = (var_2315_cast_fp16, var_2313_cast_fp16_0))[name = string("op_2318_cast_fp16")]; tensor var_2319_cast_fp16 = mul(x = var_2318_cast_fp16, y = sin_cast_fp16)[name = string("op_2319_cast_fp16")]; tensor key_states_31_cast_fp16 = add(x = var_2312_cast_fp16, y = var_2319_cast_fp16)[name = string("key_states_31_cast_fp16")]; tensor expand_dims_71 = const()[name = string("expand_dims_71"), val = tensor([7])]; tensor expand_dims_72 = const()[name = string("expand_dims_72"), val = tensor([0])]; tensor expand_dims_74 = const()[name = string("expand_dims_74"), val = tensor([8])]; tensor concat_59_values3_0 = const()[name = string("concat_59_values3_0"), val = tensor([0])]; int32 concat_59_axis_0 = const()[name = string("concat_59_axis_0"), val = int32(0)]; bool concat_59_interleave_0 = const()[name = string("concat_59_interleave_0"), val = bool(false)]; tensor concat_59 = concat(axis = concat_59_axis_0, interleave = concat_59_interleave_0, values = (expand_dims_71, expand_dims_72, position_id, concat_59_values3_0))[name = string("concat_59")]; tensor concat_60_values1_0 = const()[name = string("concat_60_values1_0"), val = tensor([0])]; tensor concat_60_values3_0 = const()[name = string("concat_60_values3_0"), val = tensor([0])]; int32 concat_60_axis_0 = const()[name = string("concat_60_axis_0"), val = int32(0)]; bool concat_60_interleave_0 = const()[name = string("concat_60_interleave_0"), val = bool(false)]; tensor concat_60 = concat(axis = concat_60_axis_0, interleave = concat_60_interleave_0, values = (expand_dims_74, concat_60_values1_0, var_426, concat_60_values3_0))[name = string("concat_60")]; tensor key_cache_internal_tensor_assign_8_stride_0 = const()[name = string("key_cache_internal_tensor_assign_8_stride_0"), val = tensor([1, 1, 1, 1])]; tensor key_cache_internal_tensor_assign_8_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_8_begin_mask_0"), val = tensor([false, false, false, true])]; tensor key_cache_internal_tensor_assign_8_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_8_end_mask_0"), val = tensor([false, true, false, true])]; tensor key_cache_internal_tensor_assign_8_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_8_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor key_cache_internal_tensor_assign_8_cast_fp16 = slice_update(begin = concat_59, begin_mask = key_cache_internal_tensor_assign_8_begin_mask_0, end = concat_60, end_mask = key_cache_internal_tensor_assign_8_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_8_squeeze_mask_0, stride = key_cache_internal_tensor_assign_8_stride_0, update = key_states_31_cast_fp16, x = coreml_update_state_60)[name = string("key_cache_internal_tensor_assign_8_cast_fp16")]; write_state(data = key_cache_internal_tensor_assign_8_cast_fp16, input = key_cache)[name = string("coreml_update_state_132_write_state")]; tensor coreml_update_state_62 = read_state(input = key_cache)[name = string("coreml_update_state_132")]; tensor value_cache_internal_tensor_assign_8_stride_0 = const()[name = string("value_cache_internal_tensor_assign_8_stride_0"), val = tensor([1, 1, 1, 1])]; tensor value_cache_internal_tensor_assign_8_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_8_begin_mask_0"), val = tensor([false, false, false, true])]; tensor value_cache_internal_tensor_assign_8_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_8_end_mask_0"), val = tensor([false, true, false, true])]; tensor value_cache_internal_tensor_assign_8_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_8_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor value_states_31_cast_fp16 = transpose(perm = value_states_31_perm_0, x = var_2298_cast_fp16)[name = string("transpose_49")]; tensor value_cache_internal_tensor_assign_8_cast_fp16 = slice_update(begin = concat_59, begin_mask = value_cache_internal_tensor_assign_8_begin_mask_0, end = concat_60, end_mask = value_cache_internal_tensor_assign_8_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_8_squeeze_mask_0, stride = value_cache_internal_tensor_assign_8_stride_0, update = value_states_31_cast_fp16, x = coreml_update_state_61)[name = string("value_cache_internal_tensor_assign_8_cast_fp16")]; write_state(data = value_cache_internal_tensor_assign_8_cast_fp16, input = value_cache)[name = string("coreml_update_state_133_write_state")]; tensor coreml_update_state_63 = read_state(input = value_cache)[name = string("coreml_update_state_133")]; tensor var_2362_begin_0 = const()[name = string("op_2362_begin_0"), val = tensor([7, 0, 0, 0])]; tensor var_2362_end_0 = const()[name = string("op_2362_end_0"), val = tensor([8, 2, 2048, 64])]; tensor var_2362_end_mask_0 = const()[name = string("op_2362_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_2362_cast_fp16 = slice_by_index(begin = var_2362_begin_0, end = var_2362_end_0, end_mask = var_2362_end_mask_0, x = coreml_update_state_62)[name = string("op_2362_cast_fp16")]; tensor tile_14 = const()[name = string("tile_14"), val = tensor([1, 1])]; int32 var_2365_axis_0 = const()[name = string("op_2365_axis_0"), val = int32(1)]; tensor var_2365_cast_fp16_0, tensor var_2365_cast_fp16_1 = split(axis = var_2365_axis_0, split_sizes = tile_14, x = var_2362_cast_fp16)[name = string("op_2365_cast_fp16")]; tensor var_2372_begin_0 = const()[name = string("op_2372_begin_0"), val = tensor([7, 0, 0, 0])]; tensor var_2372_end_0 = const()[name = string("op_2372_end_0"), val = tensor([8, 2, 2048, 64])]; tensor var_2372_end_mask_0 = const()[name = string("op_2372_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_2372_cast_fp16 = slice_by_index(begin = var_2372_begin_0, end = var_2372_end_0, end_mask = var_2372_end_mask_0, x = coreml_update_state_63)[name = string("op_2372_cast_fp16")]; tensor tile_15 = const()[name = string("tile_15"), val = tensor([1, 1])]; int32 var_2375_axis_0 = const()[name = string("op_2375_axis_0"), val = int32(1)]; tensor var_2375_cast_fp16_0, tensor var_2375_cast_fp16_1 = split(axis = var_2375_axis_0, split_sizes = tile_15, x = var_2372_cast_fp16)[name = string("op_2375_cast_fp16")]; tensor var_2378_split_sizes_0 = const()[name = string("op_2378_split_sizes_0"), val = tensor([8, 8])]; int32 var_2378_axis_0 = const()[name = string("op_2378_axis_0"), val = int32(1)]; tensor var_2378_cast_fp16_0, tensor var_2378_cast_fp16_1 = split(axis = var_2378_axis_0, split_sizes = var_2378_split_sizes_0, x = query_states_31_cast_fp16)[name = string("op_2378_cast_fp16")]; bool attn_weights_113_transpose_x_0 = const()[name = string("attn_weights_113_transpose_x_0"), val = bool(false)]; bool attn_weights_113_transpose_y_0 = const()[name = string("attn_weights_113_transpose_y_0"), val = bool(false)]; tensor attn_weights_113_cast_fp16 = matmul(transpose_x = attn_weights_113_transpose_x_0, transpose_y = attn_weights_113_transpose_y_0, x = var_2365_cast_fp16_0, y = var_2378_cast_fp16_0)[name = string("attn_weights_113_cast_fp16")]; fp16 _inversed_attn_weights_115_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_115_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_115_cast_fp16 = mul(x = attn_weights_113_cast_fp16, y = _inversed_attn_weights_115_y_0_to_fp16)[name = string("_inversed_attn_weights_115_cast_fp16")]; tensor attn_weights_117_cast_fp16 = add(x = _inversed_attn_weights_115_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_117_cast_fp16")]; int32 var_2385 = const()[name = string("op_2385"), val = int32(2)]; tensor attn_weights_119_cast_fp16 = softmax(axis = var_2385, x = attn_weights_117_cast_fp16)[name = string("attn_weights_119_cast_fp16")]; bool var_2391_transpose_x_1 = const()[name = string("op_2391_transpose_x_1"), val = bool(true)]; bool var_2391_transpose_y_1 = const()[name = string("op_2391_transpose_y_1"), val = bool(false)]; tensor var_2391_cast_fp16 = matmul(transpose_x = var_2391_transpose_x_1, transpose_y = var_2391_transpose_y_1, x = attn_weights_119_cast_fp16, y = var_2375_cast_fp16_0)[name = string("op_2391_cast_fp16")]; bool attn_weights_121_transpose_x_0 = const()[name = string("attn_weights_121_transpose_x_0"), val = bool(false)]; bool attn_weights_121_transpose_y_0 = const()[name = string("attn_weights_121_transpose_y_0"), val = bool(false)]; tensor attn_weights_121_cast_fp16 = matmul(transpose_x = attn_weights_121_transpose_x_0, transpose_y = attn_weights_121_transpose_y_0, x = var_2365_cast_fp16_1, y = var_2378_cast_fp16_1)[name = string("attn_weights_121_cast_fp16")]; fp16 _inversed_attn_weights_123_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_123_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_123_cast_fp16 = mul(x = attn_weights_121_cast_fp16, y = _inversed_attn_weights_123_y_0_to_fp16)[name = string("_inversed_attn_weights_123_cast_fp16")]; tensor attn_weights_125_cast_fp16 = add(x = _inversed_attn_weights_123_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_125_cast_fp16")]; int32 var_2397 = const()[name = string("op_2397"), val = int32(2)]; tensor attn_weights_127_cast_fp16 = softmax(axis = var_2397, x = attn_weights_125_cast_fp16)[name = string("attn_weights_127_cast_fp16")]; bool attn_output_43_transpose_x_1 = const()[name = string("attn_output_43_transpose_x_1"), val = bool(true)]; bool attn_output_43_transpose_y_1 = const()[name = string("attn_output_43_transpose_y_1"), val = bool(false)]; tensor attn_output_43_cast_fp16 = matmul(transpose_x = attn_output_43_transpose_x_1, transpose_y = attn_output_43_transpose_y_1, x = attn_weights_127_cast_fp16, y = var_2375_cast_fp16_1)[name = string("attn_output_43_cast_fp16")]; int32 var_2405 = const()[name = string("op_2405"), val = int32(1)]; bool attn_output_45_interleave_0 = const()[name = string("attn_output_45_interleave_0"), val = bool(false)]; tensor attn_output_45_cast_fp16 = concat(axis = var_2405, interleave = attn_output_45_interleave_0, values = (var_2391_cast_fp16, attn_output_43_cast_fp16))[name = string("attn_output_45_cast_fp16")]; tensor var_2409_perm_0 = const()[name = string("op_2409_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_2414 = const()[name = string("op_2414"), val = tensor([1, 1024, 1, 128])]; tensor var_2409_cast_fp16 = transpose(perm = var_2409_perm_0, x = attn_output_45_cast_fp16)[name = string("transpose_48")]; tensor x_131_cast_fp16 = reshape(shape = var_2414, x = var_2409_cast_fp16)[name = string("x_131_cast_fp16")]; string hidden_states_45_pad_type_0 = const()[name = string("hidden_states_45_pad_type_0"), val = string("valid")]; tensor hidden_states_45_strides_0 = const()[name = string("hidden_states_45_strides_0"), val = tensor([1, 1])]; tensor hidden_states_45_pad_0 = const()[name = string("hidden_states_45_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_45_dilations_0 = const()[name = string("hidden_states_45_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_45_groups_0 = const()[name = string("hidden_states_45_groups_0"), val = int32(1)]; tensor var_2421_to_fp16 = const()[name = string("op_2421_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(220279296)))]; tensor hidden_states_45_cast_fp16 = conv(dilations = hidden_states_45_dilations_0, groups = hidden_states_45_groups_0, pad = hidden_states_45_pad_0, pad_type = hidden_states_45_pad_type_0, strides = hidden_states_45_strides_0, weight = var_2421_to_fp16, x = x_131_cast_fp16)[name = string("hidden_states_45_cast_fp16")]; tensor x_133_cast_fp16 = add(x = x_125_cast_fp16, y = hidden_states_45_cast_fp16)[name = string("x_133_cast_fp16")]; int32 var_2433 = const()[name = string("op_2433"), val = int32(1)]; fp16 const_83_promoted_to_fp16 = const()[name = string("const_83_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_2436_cast_fp16 = mul(x = x_133_cast_fp16, y = const_83_promoted_to_fp16)[name = string("op_2436_cast_fp16")]; bool x_135_interleave_0 = const()[name = string("x_135_interleave_0"), val = bool(false)]; tensor x_135_cast_fp16 = concat(axis = var_2433, interleave = x_135_interleave_0, values = (x_133_cast_fp16, var_2436_cast_fp16))[name = string("x_135_cast_fp16")]; tensor out_91_axes_0 = const()[name = string("out_91_axes_0"), val = tensor([1])]; fp16 var_2446_to_fp16 = const()[name = string("op_2446_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_91_cast_fp16 = layer_norm(axes = out_91_axes_0, epsilon = var_2446_to_fp16, x = x_135_cast_fp16)[name = string("out_91_cast_fp16")]; tensor layer_layers_7_post_attention_layernorm_weight_to_fp16 = const()[name = string("layer_layers_7_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(222376512)))]; tensor out_93_cast_fp16 = mul(x = out_91_cast_fp16, y = layer_layers_7_post_attention_layernorm_weight_to_fp16)[name = string("out_93_cast_fp16")]; tensor var_2452_split_sizes_0 = const()[name = string("op_2452_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_2452_axis_0 = const()[name = string("op_2452_axis_0"), val = int32(1)]; tensor var_2452_cast_fp16_0, tensor var_2452_cast_fp16_1 = split(axis = var_2452_axis_0, split_sizes = var_2452_split_sizes_0, x = out_93_cast_fp16)[name = string("op_2452_cast_fp16")]; string input_15_pad_type_0 = const()[name = string("input_15_pad_type_0"), val = string("valid")]; tensor input_15_strides_0 = const()[name = string("input_15_strides_0"), val = tensor([1, 1])]; tensor input_15_pad_0 = const()[name = string("input_15_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_15_dilations_0 = const()[name = string("input_15_dilations_0"), val = tensor([1, 1])]; int32 input_15_groups_0 = const()[name = string("input_15_groups_0"), val = int32(1)]; tensor var_2457_to_fp16 = const()[name = string("op_2457_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(222380672)))]; tensor input_15_cast_fp16 = conv(dilations = input_15_dilations_0, groups = input_15_groups_0, pad = input_15_pad_0, pad_type = input_15_pad_type_0, strides = input_15_strides_0, weight = var_2457_to_fp16, x = var_2452_cast_fp16_0)[name = string("input_15_cast_fp16")]; tensor var_2468_cast_fp16 = silu(x = input_15_cast_fp16)[name = string("op_2468_cast_fp16")]; string var_2473_pad_type_0 = const()[name = string("op_2473_pad_type_0"), val = string("valid")]; tensor var_2473_strides_0 = const()[name = string("op_2473_strides_0"), val = tensor([1, 1])]; tensor var_2473_pad_0 = const()[name = string("op_2473_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_2473_dilations_0 = const()[name = string("op_2473_dilations_0"), val = tensor([1, 1])]; int32 var_2473_groups_0 = const()[name = string("op_2473_groups_0"), val = int32(1)]; tensor var_2456_to_fp16 = const()[name = string("op_2456_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(230769344)))]; tensor var_2473_cast_fp16 = conv(dilations = var_2473_dilations_0, groups = var_2473_groups_0, pad = var_2473_pad_0, pad_type = var_2473_pad_type_0, strides = var_2473_strides_0, weight = var_2456_to_fp16, x = var_2452_cast_fp16_0)[name = string("op_2473_cast_fp16")]; tensor x_141_cast_fp16 = mul(x = var_2468_cast_fp16, y = var_2473_cast_fp16)[name = string("x_141_cast_fp16")]; string hidden_states_47_pad_type_0 = const()[name = string("hidden_states_47_pad_type_0"), val = string("valid")]; tensor hidden_states_47_strides_0 = const()[name = string("hidden_states_47_strides_0"), val = tensor([1, 1])]; tensor hidden_states_47_pad_0 = const()[name = string("hidden_states_47_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_47_dilations_0 = const()[name = string("hidden_states_47_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_47_groups_0 = const()[name = string("hidden_states_47_groups_0"), val = int32(1)]; tensor var_2455_to_fp16 = const()[name = string("op_2455_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(239158016)))]; tensor hidden_states_47_cast_fp16 = conv(dilations = hidden_states_47_dilations_0, groups = hidden_states_47_groups_0, pad = hidden_states_47_pad_0, pad_type = hidden_states_47_pad_type_0, strides = hidden_states_47_strides_0, weight = var_2455_to_fp16, x = x_141_cast_fp16)[name = string("hidden_states_47_cast_fp16")]; tensor x_143_cast_fp16 = add(x = x_133_cast_fp16, y = hidden_states_47_cast_fp16)[name = string("x_143_cast_fp16")]; int32 var_2486 = const()[name = string("op_2486"), val = int32(1)]; fp16 const_84_promoted_to_fp16 = const()[name = string("const_84_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_2489_cast_fp16 = mul(x = x_143_cast_fp16, y = const_84_promoted_to_fp16)[name = string("op_2489_cast_fp16")]; bool x_145_interleave_0 = const()[name = string("x_145_interleave_0"), val = bool(false)]; tensor x_145_cast_fp16 = concat(axis = var_2486, interleave = x_145_interleave_0, values = (x_143_cast_fp16, var_2489_cast_fp16))[name = string("x_145_cast_fp16")]; tensor out_97_axes_0 = const()[name = string("out_97_axes_0"), val = tensor([1])]; fp16 var_2499_to_fp16 = const()[name = string("op_2499_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_97_cast_fp16 = layer_norm(axes = out_97_axes_0, epsilon = var_2499_to_fp16, x = x_145_cast_fp16)[name = string("out_97_cast_fp16")]; tensor layer_layers_8_input_layernorm_weight_to_fp16 = const()[name = string("layer_layers_8_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(247546688)))]; tensor out_99_cast_fp16 = mul(x = out_97_cast_fp16, y = layer_layers_8_input_layernorm_weight_to_fp16)[name = string("out_99_cast_fp16")]; tensor var_2505_split_sizes_0 = const()[name = string("op_2505_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_2505_axis_0 = const()[name = string("op_2505_axis_0"), val = int32(1)]; tensor var_2505_cast_fp16_0, tensor var_2505_cast_fp16_1 = split(axis = var_2505_axis_0, split_sizes = var_2505_split_sizes_0, x = out_99_cast_fp16)[name = string("op_2505_cast_fp16")]; string query_states_33_pad_type_0 = const()[name = string("query_states_33_pad_type_0"), val = string("valid")]; tensor query_states_33_strides_0 = const()[name = string("query_states_33_strides_0"), val = tensor([1, 1])]; tensor query_states_33_pad_0 = const()[name = string("query_states_33_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_states_33_dilations_0 = const()[name = string("query_states_33_dilations_0"), val = tensor([1, 1])]; int32 query_states_33_groups_0 = const()[name = string("query_states_33_groups_0"), val = int32(1)]; tensor var_2527_to_fp16 = const()[name = string("op_2527_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(247550848)))]; tensor query_states_33_cast_fp16 = conv(dilations = query_states_33_dilations_0, groups = query_states_33_groups_0, pad = query_states_33_pad_0, pad_type = query_states_33_pad_type_0, strides = query_states_33_strides_0, weight = var_2527_to_fp16, x = var_2505_cast_fp16_0)[name = string("query_states_33_cast_fp16")]; string key_states_33_pad_type_0 = const()[name = string("key_states_33_pad_type_0"), val = string("valid")]; tensor key_states_33_strides_0 = const()[name = string("key_states_33_strides_0"), val = tensor([1, 1])]; tensor key_states_33_pad_0 = const()[name = string("key_states_33_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_states_33_dilations_0 = const()[name = string("key_states_33_dilations_0"), val = tensor([1, 1])]; int32 key_states_33_groups_0 = const()[name = string("key_states_33_groups_0"), val = int32(1)]; tensor var_2538_to_fp16 = const()[name = string("op_2538_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(249648064)))]; tensor key_states_33_cast_fp16 = conv(dilations = key_states_33_dilations_0, groups = key_states_33_groups_0, pad = key_states_33_pad_0, pad_type = key_states_33_pad_type_0, strides = key_states_33_strides_0, weight = var_2538_to_fp16, x = var_2505_cast_fp16_0)[name = string("key_states_33_cast_fp16")]; string value_states_33_pad_type_0 = const()[name = string("value_states_33_pad_type_0"), val = string("valid")]; tensor value_states_33_strides_0 = const()[name = string("value_states_33_strides_0"), val = tensor([1, 1])]; tensor value_states_33_pad_0 = const()[name = string("value_states_33_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_states_33_dilations_0 = const()[name = string("value_states_33_dilations_0"), val = tensor([1, 1])]; int32 value_states_33_groups_0 = const()[name = string("value_states_33_groups_0"), val = int32(1)]; tensor var_2549_to_fp16 = const()[name = string("op_2549_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(249910272)))]; tensor value_states_33_cast_fp16 = conv(dilations = value_states_33_dilations_0, groups = value_states_33_groups_0, pad = value_states_33_pad_0, pad_type = value_states_33_pad_type_0, strides = value_states_33_strides_0, weight = var_2549_to_fp16, x = var_2505_cast_fp16_0)[name = string("value_states_33_cast_fp16")]; tensor var_2557 = const()[name = string("op_2557"), val = tensor([1, 16, 64, 128])]; tensor embed_33_cast_fp16 = reshape(shape = var_2557, x = query_states_33_cast_fp16)[name = string("embed_33_cast_fp16")]; tensor var_2561 = const()[name = string("op_2561"), val = tensor([1, 2, 64, 128])]; tensor var_2562_cast_fp16 = reshape(shape = var_2561, x = key_states_33_cast_fp16)[name = string("op_2562_cast_fp16")]; tensor embed_35_perm_0 = const()[name = string("embed_35_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_2568 = const()[name = string("op_2568"), val = tensor([1, 2, 64, 128])]; tensor var_2569_cast_fp16 = reshape(shape = var_2568, x = value_states_33_cast_fp16)[name = string("op_2569_cast_fp16")]; tensor value_states_35_perm_0 = const()[name = string("value_states_35_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_2573_cast_fp16 = mul(x = embed_33_cast_fp16, y = cos_1_cast_fp16)[name = string("op_2573_cast_fp16")]; tensor var_2574_split_sizes_0 = const()[name = string("op_2574_split_sizes_0"), val = tensor([32, 32])]; int32 var_2574_axis_0 = const()[name = string("op_2574_axis_0"), val = int32(-2)]; tensor var_2574_cast_fp16_0, tensor var_2574_cast_fp16_1 = split(axis = var_2574_axis_0, split_sizes = var_2574_split_sizes_0, x = embed_33_cast_fp16)[name = string("op_2574_cast_fp16")]; fp16 const_87_promoted_to_fp16 = const()[name = string("const_87_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_2576_cast_fp16 = mul(x = var_2574_cast_fp16_1, y = const_87_promoted_to_fp16)[name = string("op_2576_cast_fp16")]; int32 var_2578 = const()[name = string("op_2578"), val = int32(-2)]; bool var_2579_interleave_0 = const()[name = string("op_2579_interleave_0"), val = bool(false)]; tensor var_2579_cast_fp16 = concat(axis = var_2578, interleave = var_2579_interleave_0, values = (var_2576_cast_fp16, var_2574_cast_fp16_0))[name = string("op_2579_cast_fp16")]; tensor var_2580_cast_fp16 = mul(x = var_2579_cast_fp16, y = sin_1_cast_fp16)[name = string("op_2580_cast_fp16")]; tensor query_states_35_cast_fp16 = add(x = var_2573_cast_fp16, y = var_2580_cast_fp16)[name = string("query_states_35_cast_fp16")]; tensor embed_35_cast_fp16 = transpose(perm = embed_35_perm_0, x = var_2562_cast_fp16)[name = string("transpose_47")]; tensor var_2583_cast_fp16 = mul(x = embed_35_cast_fp16, y = cos_cast_fp16)[name = string("op_2583_cast_fp16")]; tensor var_2584_split_sizes_0 = const()[name = string("op_2584_split_sizes_0"), val = tensor([32, 32])]; int32 var_2584_axis_0 = const()[name = string("op_2584_axis_0"), val = int32(-1)]; tensor var_2584_cast_fp16_0, tensor var_2584_cast_fp16_1 = split(axis = var_2584_axis_0, split_sizes = var_2584_split_sizes_0, x = embed_35_cast_fp16)[name = string("op_2584_cast_fp16")]; fp16 const_88_promoted_to_fp16 = const()[name = string("const_88_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_2586_cast_fp16 = mul(x = var_2584_cast_fp16_1, y = const_88_promoted_to_fp16)[name = string("op_2586_cast_fp16")]; int32 var_2588 = const()[name = string("op_2588"), val = int32(-1)]; bool var_2589_interleave_0 = const()[name = string("op_2589_interleave_0"), val = bool(false)]; tensor var_2589_cast_fp16 = concat(axis = var_2588, interleave = var_2589_interleave_0, values = (var_2586_cast_fp16, var_2584_cast_fp16_0))[name = string("op_2589_cast_fp16")]; tensor var_2590_cast_fp16 = mul(x = var_2589_cast_fp16, y = sin_cast_fp16)[name = string("op_2590_cast_fp16")]; tensor key_states_35_cast_fp16 = add(x = var_2583_cast_fp16, y = var_2590_cast_fp16)[name = string("key_states_35_cast_fp16")]; tensor expand_dims_81 = const()[name = string("expand_dims_81"), val = tensor([8])]; tensor expand_dims_82 = const()[name = string("expand_dims_82"), val = tensor([0])]; tensor expand_dims_84 = const()[name = string("expand_dims_84"), val = tensor([9])]; tensor concat_67_values3_0 = const()[name = string("concat_67_values3_0"), val = tensor([0])]; int32 concat_67_axis_0 = const()[name = string("concat_67_axis_0"), val = int32(0)]; bool concat_67_interleave_0 = const()[name = string("concat_67_interleave_0"), val = bool(false)]; tensor concat_67 = concat(axis = concat_67_axis_0, interleave = concat_67_interleave_0, values = (expand_dims_81, expand_dims_82, position_id, concat_67_values3_0))[name = string("concat_67")]; tensor concat_68_values1_0 = const()[name = string("concat_68_values1_0"), val = tensor([0])]; tensor concat_68_values3_0 = const()[name = string("concat_68_values3_0"), val = tensor([0])]; int32 concat_68_axis_0 = const()[name = string("concat_68_axis_0"), val = int32(0)]; bool concat_68_interleave_0 = const()[name = string("concat_68_interleave_0"), val = bool(false)]; tensor concat_68 = concat(axis = concat_68_axis_0, interleave = concat_68_interleave_0, values = (expand_dims_84, concat_68_values1_0, var_426, concat_68_values3_0))[name = string("concat_68")]; tensor key_cache_internal_tensor_assign_9_stride_0 = const()[name = string("key_cache_internal_tensor_assign_9_stride_0"), val = tensor([1, 1, 1, 1])]; tensor key_cache_internal_tensor_assign_9_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_9_begin_mask_0"), val = tensor([false, false, false, true])]; tensor key_cache_internal_tensor_assign_9_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_9_end_mask_0"), val = tensor([false, true, false, true])]; tensor key_cache_internal_tensor_assign_9_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_9_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor key_cache_internal_tensor_assign_9_cast_fp16 = slice_update(begin = concat_67, begin_mask = key_cache_internal_tensor_assign_9_begin_mask_0, end = concat_68, end_mask = key_cache_internal_tensor_assign_9_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_9_squeeze_mask_0, stride = key_cache_internal_tensor_assign_9_stride_0, update = key_states_35_cast_fp16, x = coreml_update_state_62)[name = string("key_cache_internal_tensor_assign_9_cast_fp16")]; write_state(data = key_cache_internal_tensor_assign_9_cast_fp16, input = key_cache)[name = string("coreml_update_state_134_write_state")]; tensor coreml_update_state_64 = read_state(input = key_cache)[name = string("coreml_update_state_134")]; tensor value_cache_internal_tensor_assign_9_stride_0 = const()[name = string("value_cache_internal_tensor_assign_9_stride_0"), val = tensor([1, 1, 1, 1])]; tensor value_cache_internal_tensor_assign_9_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_9_begin_mask_0"), val = tensor([false, false, false, true])]; tensor value_cache_internal_tensor_assign_9_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_9_end_mask_0"), val = tensor([false, true, false, true])]; tensor value_cache_internal_tensor_assign_9_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_9_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor value_states_35_cast_fp16 = transpose(perm = value_states_35_perm_0, x = var_2569_cast_fp16)[name = string("transpose_46")]; tensor value_cache_internal_tensor_assign_9_cast_fp16 = slice_update(begin = concat_67, begin_mask = value_cache_internal_tensor_assign_9_begin_mask_0, end = concat_68, end_mask = value_cache_internal_tensor_assign_9_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_9_squeeze_mask_0, stride = value_cache_internal_tensor_assign_9_stride_0, update = value_states_35_cast_fp16, x = coreml_update_state_63)[name = string("value_cache_internal_tensor_assign_9_cast_fp16")]; write_state(data = value_cache_internal_tensor_assign_9_cast_fp16, input = value_cache)[name = string("coreml_update_state_135_write_state")]; tensor coreml_update_state_65 = read_state(input = value_cache)[name = string("coreml_update_state_135")]; tensor var_2633_begin_0 = const()[name = string("op_2633_begin_0"), val = tensor([8, 0, 0, 0])]; tensor var_2633_end_0 = const()[name = string("op_2633_end_0"), val = tensor([9, 2, 2048, 64])]; tensor var_2633_end_mask_0 = const()[name = string("op_2633_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_2633_cast_fp16 = slice_by_index(begin = var_2633_begin_0, end = var_2633_end_0, end_mask = var_2633_end_mask_0, x = coreml_update_state_64)[name = string("op_2633_cast_fp16")]; tensor tile_16 = const()[name = string("tile_16"), val = tensor([1, 1])]; int32 var_2636_axis_0 = const()[name = string("op_2636_axis_0"), val = int32(1)]; tensor var_2636_cast_fp16_0, tensor var_2636_cast_fp16_1 = split(axis = var_2636_axis_0, split_sizes = tile_16, x = var_2633_cast_fp16)[name = string("op_2636_cast_fp16")]; tensor var_2643_begin_0 = const()[name = string("op_2643_begin_0"), val = tensor([8, 0, 0, 0])]; tensor var_2643_end_0 = const()[name = string("op_2643_end_0"), val = tensor([9, 2, 2048, 64])]; tensor var_2643_end_mask_0 = const()[name = string("op_2643_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_2643_cast_fp16 = slice_by_index(begin = var_2643_begin_0, end = var_2643_end_0, end_mask = var_2643_end_mask_0, x = coreml_update_state_65)[name = string("op_2643_cast_fp16")]; tensor tile_17 = const()[name = string("tile_17"), val = tensor([1, 1])]; int32 var_2646_axis_0 = const()[name = string("op_2646_axis_0"), val = int32(1)]; tensor var_2646_cast_fp16_0, tensor var_2646_cast_fp16_1 = split(axis = var_2646_axis_0, split_sizes = tile_17, x = var_2643_cast_fp16)[name = string("op_2646_cast_fp16")]; tensor var_2649_split_sizes_0 = const()[name = string("op_2649_split_sizes_0"), val = tensor([8, 8])]; int32 var_2649_axis_0 = const()[name = string("op_2649_axis_0"), val = int32(1)]; tensor var_2649_cast_fp16_0, tensor var_2649_cast_fp16_1 = split(axis = var_2649_axis_0, split_sizes = var_2649_split_sizes_0, x = query_states_35_cast_fp16)[name = string("op_2649_cast_fp16")]; bool attn_weights_129_transpose_x_0 = const()[name = string("attn_weights_129_transpose_x_0"), val = bool(false)]; bool attn_weights_129_transpose_y_0 = const()[name = string("attn_weights_129_transpose_y_0"), val = bool(false)]; tensor attn_weights_129_cast_fp16 = matmul(transpose_x = attn_weights_129_transpose_x_0, transpose_y = attn_weights_129_transpose_y_0, x = var_2636_cast_fp16_0, y = var_2649_cast_fp16_0)[name = string("attn_weights_129_cast_fp16")]; fp16 _inversed_attn_weights_131_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_131_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_131_cast_fp16 = mul(x = attn_weights_129_cast_fp16, y = _inversed_attn_weights_131_y_0_to_fp16)[name = string("_inversed_attn_weights_131_cast_fp16")]; tensor attn_weights_133_cast_fp16 = add(x = _inversed_attn_weights_131_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_133_cast_fp16")]; int32 var_2656 = const()[name = string("op_2656"), val = int32(2)]; tensor attn_weights_135_cast_fp16 = softmax(axis = var_2656, x = attn_weights_133_cast_fp16)[name = string("attn_weights_135_cast_fp16")]; bool var_2662_transpose_x_1 = const()[name = string("op_2662_transpose_x_1"), val = bool(true)]; bool var_2662_transpose_y_1 = const()[name = string("op_2662_transpose_y_1"), val = bool(false)]; tensor var_2662_cast_fp16 = matmul(transpose_x = var_2662_transpose_x_1, transpose_y = var_2662_transpose_y_1, x = attn_weights_135_cast_fp16, y = var_2646_cast_fp16_0)[name = string("op_2662_cast_fp16")]; bool attn_weights_137_transpose_x_0 = const()[name = string("attn_weights_137_transpose_x_0"), val = bool(false)]; bool attn_weights_137_transpose_y_0 = const()[name = string("attn_weights_137_transpose_y_0"), val = bool(false)]; tensor attn_weights_137_cast_fp16 = matmul(transpose_x = attn_weights_137_transpose_x_0, transpose_y = attn_weights_137_transpose_y_0, x = var_2636_cast_fp16_1, y = var_2649_cast_fp16_1)[name = string("attn_weights_137_cast_fp16")]; fp16 _inversed_attn_weights_139_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_139_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_139_cast_fp16 = mul(x = attn_weights_137_cast_fp16, y = _inversed_attn_weights_139_y_0_to_fp16)[name = string("_inversed_attn_weights_139_cast_fp16")]; tensor attn_weights_141_cast_fp16 = add(x = _inversed_attn_weights_139_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_141_cast_fp16")]; int32 var_2668 = const()[name = string("op_2668"), val = int32(2)]; tensor attn_weights_143_cast_fp16 = softmax(axis = var_2668, x = attn_weights_141_cast_fp16)[name = string("attn_weights_143_cast_fp16")]; bool attn_output_49_transpose_x_1 = const()[name = string("attn_output_49_transpose_x_1"), val = bool(true)]; bool attn_output_49_transpose_y_1 = const()[name = string("attn_output_49_transpose_y_1"), val = bool(false)]; tensor attn_output_49_cast_fp16 = matmul(transpose_x = attn_output_49_transpose_x_1, transpose_y = attn_output_49_transpose_y_1, x = attn_weights_143_cast_fp16, y = var_2646_cast_fp16_1)[name = string("attn_output_49_cast_fp16")]; int32 var_2676 = const()[name = string("op_2676"), val = int32(1)]; bool attn_output_51_interleave_0 = const()[name = string("attn_output_51_interleave_0"), val = bool(false)]; tensor attn_output_51_cast_fp16 = concat(axis = var_2676, interleave = attn_output_51_interleave_0, values = (var_2662_cast_fp16, attn_output_49_cast_fp16))[name = string("attn_output_51_cast_fp16")]; tensor var_2680_perm_0 = const()[name = string("op_2680_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_2685 = const()[name = string("op_2685"), val = tensor([1, 1024, 1, 128])]; tensor var_2680_cast_fp16 = transpose(perm = var_2680_perm_0, x = attn_output_51_cast_fp16)[name = string("transpose_45")]; tensor x_149_cast_fp16 = reshape(shape = var_2685, x = var_2680_cast_fp16)[name = string("x_149_cast_fp16")]; string hidden_states_51_pad_type_0 = const()[name = string("hidden_states_51_pad_type_0"), val = string("valid")]; tensor hidden_states_51_strides_0 = const()[name = string("hidden_states_51_strides_0"), val = tensor([1, 1])]; tensor hidden_states_51_pad_0 = const()[name = string("hidden_states_51_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_51_dilations_0 = const()[name = string("hidden_states_51_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_51_groups_0 = const()[name = string("hidden_states_51_groups_0"), val = int32(1)]; tensor var_2692_to_fp16 = const()[name = string("op_2692_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(250172480)))]; tensor hidden_states_51_cast_fp16 = conv(dilations = hidden_states_51_dilations_0, groups = hidden_states_51_groups_0, pad = hidden_states_51_pad_0, pad_type = hidden_states_51_pad_type_0, strides = hidden_states_51_strides_0, weight = var_2692_to_fp16, x = x_149_cast_fp16)[name = string("hidden_states_51_cast_fp16")]; tensor x_151_cast_fp16 = add(x = x_143_cast_fp16, y = hidden_states_51_cast_fp16)[name = string("x_151_cast_fp16")]; int32 var_2704 = const()[name = string("op_2704"), val = int32(1)]; fp16 const_93_promoted_to_fp16 = const()[name = string("const_93_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_2707_cast_fp16 = mul(x = x_151_cast_fp16, y = const_93_promoted_to_fp16)[name = string("op_2707_cast_fp16")]; bool x_153_interleave_0 = const()[name = string("x_153_interleave_0"), val = bool(false)]; tensor x_153_cast_fp16 = concat(axis = var_2704, interleave = x_153_interleave_0, values = (x_151_cast_fp16, var_2707_cast_fp16))[name = string("x_153_cast_fp16")]; tensor out_103_axes_0 = const()[name = string("out_103_axes_0"), val = tensor([1])]; fp16 var_2717_to_fp16 = const()[name = string("op_2717_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_103_cast_fp16 = layer_norm(axes = out_103_axes_0, epsilon = var_2717_to_fp16, x = x_153_cast_fp16)[name = string("out_103_cast_fp16")]; tensor layer_layers_8_post_attention_layernorm_weight_to_fp16 = const()[name = string("layer_layers_8_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(252269696)))]; tensor out_105_cast_fp16 = mul(x = out_103_cast_fp16, y = layer_layers_8_post_attention_layernorm_weight_to_fp16)[name = string("out_105_cast_fp16")]; tensor var_2723_split_sizes_0 = const()[name = string("op_2723_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_2723_axis_0 = const()[name = string("op_2723_axis_0"), val = int32(1)]; tensor var_2723_cast_fp16_0, tensor var_2723_cast_fp16_1 = split(axis = var_2723_axis_0, split_sizes = var_2723_split_sizes_0, x = out_105_cast_fp16)[name = string("op_2723_cast_fp16")]; string input_17_pad_type_0 = const()[name = string("input_17_pad_type_0"), val = string("valid")]; tensor input_17_strides_0 = const()[name = string("input_17_strides_0"), val = tensor([1, 1])]; tensor input_17_pad_0 = const()[name = string("input_17_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_17_dilations_0 = const()[name = string("input_17_dilations_0"), val = tensor([1, 1])]; int32 input_17_groups_0 = const()[name = string("input_17_groups_0"), val = int32(1)]; tensor var_2728_to_fp16 = const()[name = string("op_2728_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(252273856)))]; tensor input_17_cast_fp16 = conv(dilations = input_17_dilations_0, groups = input_17_groups_0, pad = input_17_pad_0, pad_type = input_17_pad_type_0, strides = input_17_strides_0, weight = var_2728_to_fp16, x = var_2723_cast_fp16_0)[name = string("input_17_cast_fp16")]; tensor var_2739_cast_fp16 = silu(x = input_17_cast_fp16)[name = string("op_2739_cast_fp16")]; string var_2744_pad_type_0 = const()[name = string("op_2744_pad_type_0"), val = string("valid")]; tensor var_2744_strides_0 = const()[name = string("op_2744_strides_0"), val = tensor([1, 1])]; tensor var_2744_pad_0 = const()[name = string("op_2744_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_2744_dilations_0 = const()[name = string("op_2744_dilations_0"), val = tensor([1, 1])]; int32 var_2744_groups_0 = const()[name = string("op_2744_groups_0"), val = int32(1)]; tensor var_2727_to_fp16 = const()[name = string("op_2727_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(260662528)))]; tensor var_2744_cast_fp16 = conv(dilations = var_2744_dilations_0, groups = var_2744_groups_0, pad = var_2744_pad_0, pad_type = var_2744_pad_type_0, strides = var_2744_strides_0, weight = var_2727_to_fp16, x = var_2723_cast_fp16_0)[name = string("op_2744_cast_fp16")]; tensor x_159_cast_fp16 = mul(x = var_2739_cast_fp16, y = var_2744_cast_fp16)[name = string("x_159_cast_fp16")]; string hidden_states_53_pad_type_0 = const()[name = string("hidden_states_53_pad_type_0"), val = string("valid")]; tensor hidden_states_53_strides_0 = const()[name = string("hidden_states_53_strides_0"), val = tensor([1, 1])]; tensor hidden_states_53_pad_0 = const()[name = string("hidden_states_53_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_53_dilations_0 = const()[name = string("hidden_states_53_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_53_groups_0 = const()[name = string("hidden_states_53_groups_0"), val = int32(1)]; tensor var_2726_to_fp16 = const()[name = string("op_2726_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(269051200)))]; tensor hidden_states_53_cast_fp16 = conv(dilations = hidden_states_53_dilations_0, groups = hidden_states_53_groups_0, pad = hidden_states_53_pad_0, pad_type = hidden_states_53_pad_type_0, strides = hidden_states_53_strides_0, weight = var_2726_to_fp16, x = x_159_cast_fp16)[name = string("hidden_states_53_cast_fp16")]; tensor x_161_cast_fp16 = add(x = x_151_cast_fp16, y = hidden_states_53_cast_fp16)[name = string("x_161_cast_fp16")]; int32 var_2757 = const()[name = string("op_2757"), val = int32(1)]; fp16 const_94_promoted_to_fp16 = const()[name = string("const_94_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_2760_cast_fp16 = mul(x = x_161_cast_fp16, y = const_94_promoted_to_fp16)[name = string("op_2760_cast_fp16")]; bool x_163_interleave_0 = const()[name = string("x_163_interleave_0"), val = bool(false)]; tensor x_163_cast_fp16 = concat(axis = var_2757, interleave = x_163_interleave_0, values = (x_161_cast_fp16, var_2760_cast_fp16))[name = string("x_163_cast_fp16")]; tensor out_109_axes_0 = const()[name = string("out_109_axes_0"), val = tensor([1])]; fp16 var_2770_to_fp16 = const()[name = string("op_2770_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_109_cast_fp16 = layer_norm(axes = out_109_axes_0, epsilon = var_2770_to_fp16, x = x_163_cast_fp16)[name = string("out_109_cast_fp16")]; tensor layer_layers_9_input_layernorm_weight_to_fp16 = const()[name = string("layer_layers_9_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(277439872)))]; tensor out_111_cast_fp16 = mul(x = out_109_cast_fp16, y = layer_layers_9_input_layernorm_weight_to_fp16)[name = string("out_111_cast_fp16")]; tensor var_2776_split_sizes_0 = const()[name = string("op_2776_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_2776_axis_0 = const()[name = string("op_2776_axis_0"), val = int32(1)]; tensor var_2776_cast_fp16_0, tensor var_2776_cast_fp16_1 = split(axis = var_2776_axis_0, split_sizes = var_2776_split_sizes_0, x = out_111_cast_fp16)[name = string("op_2776_cast_fp16")]; string query_states_37_pad_type_0 = const()[name = string("query_states_37_pad_type_0"), val = string("valid")]; tensor query_states_37_strides_0 = const()[name = string("query_states_37_strides_0"), val = tensor([1, 1])]; tensor query_states_37_pad_0 = const()[name = string("query_states_37_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_states_37_dilations_0 = const()[name = string("query_states_37_dilations_0"), val = tensor([1, 1])]; int32 query_states_37_groups_0 = const()[name = string("query_states_37_groups_0"), val = int32(1)]; tensor var_2798_to_fp16 = const()[name = string("op_2798_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(277444032)))]; tensor query_states_37_cast_fp16 = conv(dilations = query_states_37_dilations_0, groups = query_states_37_groups_0, pad = query_states_37_pad_0, pad_type = query_states_37_pad_type_0, strides = query_states_37_strides_0, weight = var_2798_to_fp16, x = var_2776_cast_fp16_0)[name = string("query_states_37_cast_fp16")]; string key_states_37_pad_type_0 = const()[name = string("key_states_37_pad_type_0"), val = string("valid")]; tensor key_states_37_strides_0 = const()[name = string("key_states_37_strides_0"), val = tensor([1, 1])]; tensor key_states_37_pad_0 = const()[name = string("key_states_37_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_states_37_dilations_0 = const()[name = string("key_states_37_dilations_0"), val = tensor([1, 1])]; int32 key_states_37_groups_0 = const()[name = string("key_states_37_groups_0"), val = int32(1)]; tensor var_2809_to_fp16 = const()[name = string("op_2809_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(279541248)))]; tensor key_states_37_cast_fp16 = conv(dilations = key_states_37_dilations_0, groups = key_states_37_groups_0, pad = key_states_37_pad_0, pad_type = key_states_37_pad_type_0, strides = key_states_37_strides_0, weight = var_2809_to_fp16, x = var_2776_cast_fp16_0)[name = string("key_states_37_cast_fp16")]; string value_states_37_pad_type_0 = const()[name = string("value_states_37_pad_type_0"), val = string("valid")]; tensor value_states_37_strides_0 = const()[name = string("value_states_37_strides_0"), val = tensor([1, 1])]; tensor value_states_37_pad_0 = const()[name = string("value_states_37_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_states_37_dilations_0 = const()[name = string("value_states_37_dilations_0"), val = tensor([1, 1])]; int32 value_states_37_groups_0 = const()[name = string("value_states_37_groups_0"), val = int32(1)]; tensor var_2820_to_fp16 = const()[name = string("op_2820_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(279803456)))]; tensor value_states_37_cast_fp16 = conv(dilations = value_states_37_dilations_0, groups = value_states_37_groups_0, pad = value_states_37_pad_0, pad_type = value_states_37_pad_type_0, strides = value_states_37_strides_0, weight = var_2820_to_fp16, x = var_2776_cast_fp16_0)[name = string("value_states_37_cast_fp16")]; tensor var_2828 = const()[name = string("op_2828"), val = tensor([1, 16, 64, 128])]; tensor embed_37_cast_fp16 = reshape(shape = var_2828, x = query_states_37_cast_fp16)[name = string("embed_37_cast_fp16")]; tensor var_2832 = const()[name = string("op_2832"), val = tensor([1, 2, 64, 128])]; tensor var_2833_cast_fp16 = reshape(shape = var_2832, x = key_states_37_cast_fp16)[name = string("op_2833_cast_fp16")]; tensor embed_39_perm_0 = const()[name = string("embed_39_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_2839 = const()[name = string("op_2839"), val = tensor([1, 2, 64, 128])]; tensor var_2840_cast_fp16 = reshape(shape = var_2839, x = value_states_37_cast_fp16)[name = string("op_2840_cast_fp16")]; tensor value_states_39_perm_0 = const()[name = string("value_states_39_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_2844_cast_fp16 = mul(x = embed_37_cast_fp16, y = cos_1_cast_fp16)[name = string("op_2844_cast_fp16")]; tensor var_2845_split_sizes_0 = const()[name = string("op_2845_split_sizes_0"), val = tensor([32, 32])]; int32 var_2845_axis_0 = const()[name = string("op_2845_axis_0"), val = int32(-2)]; tensor var_2845_cast_fp16_0, tensor var_2845_cast_fp16_1 = split(axis = var_2845_axis_0, split_sizes = var_2845_split_sizes_0, x = embed_37_cast_fp16)[name = string("op_2845_cast_fp16")]; fp16 const_97_promoted_to_fp16 = const()[name = string("const_97_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_2847_cast_fp16 = mul(x = var_2845_cast_fp16_1, y = const_97_promoted_to_fp16)[name = string("op_2847_cast_fp16")]; int32 var_2849 = const()[name = string("op_2849"), val = int32(-2)]; bool var_2850_interleave_0 = const()[name = string("op_2850_interleave_0"), val = bool(false)]; tensor var_2850_cast_fp16 = concat(axis = var_2849, interleave = var_2850_interleave_0, values = (var_2847_cast_fp16, var_2845_cast_fp16_0))[name = string("op_2850_cast_fp16")]; tensor var_2851_cast_fp16 = mul(x = var_2850_cast_fp16, y = sin_1_cast_fp16)[name = string("op_2851_cast_fp16")]; tensor query_states_39_cast_fp16 = add(x = var_2844_cast_fp16, y = var_2851_cast_fp16)[name = string("query_states_39_cast_fp16")]; tensor embed_39_cast_fp16 = transpose(perm = embed_39_perm_0, x = var_2833_cast_fp16)[name = string("transpose_44")]; tensor var_2854_cast_fp16 = mul(x = embed_39_cast_fp16, y = cos_cast_fp16)[name = string("op_2854_cast_fp16")]; tensor var_2855_split_sizes_0 = const()[name = string("op_2855_split_sizes_0"), val = tensor([32, 32])]; int32 var_2855_axis_0 = const()[name = string("op_2855_axis_0"), val = int32(-1)]; tensor var_2855_cast_fp16_0, tensor var_2855_cast_fp16_1 = split(axis = var_2855_axis_0, split_sizes = var_2855_split_sizes_0, x = embed_39_cast_fp16)[name = string("op_2855_cast_fp16")]; fp16 const_98_promoted_to_fp16 = const()[name = string("const_98_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_2857_cast_fp16 = mul(x = var_2855_cast_fp16_1, y = const_98_promoted_to_fp16)[name = string("op_2857_cast_fp16")]; int32 var_2859 = const()[name = string("op_2859"), val = int32(-1)]; bool var_2860_interleave_0 = const()[name = string("op_2860_interleave_0"), val = bool(false)]; tensor var_2860_cast_fp16 = concat(axis = var_2859, interleave = var_2860_interleave_0, values = (var_2857_cast_fp16, var_2855_cast_fp16_0))[name = string("op_2860_cast_fp16")]; tensor var_2861_cast_fp16 = mul(x = var_2860_cast_fp16, y = sin_cast_fp16)[name = string("op_2861_cast_fp16")]; tensor key_states_39_cast_fp16 = add(x = var_2854_cast_fp16, y = var_2861_cast_fp16)[name = string("key_states_39_cast_fp16")]; tensor expand_dims_91 = const()[name = string("expand_dims_91"), val = tensor([9])]; tensor expand_dims_92 = const()[name = string("expand_dims_92"), val = tensor([0])]; tensor expand_dims_94 = const()[name = string("expand_dims_94"), val = tensor([10])]; tensor concat_75_values3_0 = const()[name = string("concat_75_values3_0"), val = tensor([0])]; int32 concat_75_axis_0 = const()[name = string("concat_75_axis_0"), val = int32(0)]; bool concat_75_interleave_0 = const()[name = string("concat_75_interleave_0"), val = bool(false)]; tensor concat_75 = concat(axis = concat_75_axis_0, interleave = concat_75_interleave_0, values = (expand_dims_91, expand_dims_92, position_id, concat_75_values3_0))[name = string("concat_75")]; tensor concat_76_values1_0 = const()[name = string("concat_76_values1_0"), val = tensor([0])]; tensor concat_76_values3_0 = const()[name = string("concat_76_values3_0"), val = tensor([0])]; int32 concat_76_axis_0 = const()[name = string("concat_76_axis_0"), val = int32(0)]; bool concat_76_interleave_0 = const()[name = string("concat_76_interleave_0"), val = bool(false)]; tensor concat_76 = concat(axis = concat_76_axis_0, interleave = concat_76_interleave_0, values = (expand_dims_94, concat_76_values1_0, var_426, concat_76_values3_0))[name = string("concat_76")]; tensor key_cache_internal_tensor_assign_10_stride_0 = const()[name = string("key_cache_internal_tensor_assign_10_stride_0"), val = tensor([1, 1, 1, 1])]; tensor key_cache_internal_tensor_assign_10_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_10_begin_mask_0"), val = tensor([false, false, false, true])]; tensor key_cache_internal_tensor_assign_10_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_10_end_mask_0"), val = tensor([false, true, false, true])]; tensor key_cache_internal_tensor_assign_10_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_10_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor key_cache_internal_tensor_assign_10_cast_fp16 = slice_update(begin = concat_75, begin_mask = key_cache_internal_tensor_assign_10_begin_mask_0, end = concat_76, end_mask = key_cache_internal_tensor_assign_10_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_10_squeeze_mask_0, stride = key_cache_internal_tensor_assign_10_stride_0, update = key_states_39_cast_fp16, x = coreml_update_state_64)[name = string("key_cache_internal_tensor_assign_10_cast_fp16")]; write_state(data = key_cache_internal_tensor_assign_10_cast_fp16, input = key_cache)[name = string("coreml_update_state_136_write_state")]; tensor coreml_update_state_66 = read_state(input = key_cache)[name = string("coreml_update_state_136")]; tensor value_cache_internal_tensor_assign_10_stride_0 = const()[name = string("value_cache_internal_tensor_assign_10_stride_0"), val = tensor([1, 1, 1, 1])]; tensor value_cache_internal_tensor_assign_10_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_10_begin_mask_0"), val = tensor([false, false, false, true])]; tensor value_cache_internal_tensor_assign_10_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_10_end_mask_0"), val = tensor([false, true, false, true])]; tensor value_cache_internal_tensor_assign_10_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_10_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor value_states_39_cast_fp16 = transpose(perm = value_states_39_perm_0, x = var_2840_cast_fp16)[name = string("transpose_43")]; tensor value_cache_internal_tensor_assign_10_cast_fp16 = slice_update(begin = concat_75, begin_mask = value_cache_internal_tensor_assign_10_begin_mask_0, end = concat_76, end_mask = value_cache_internal_tensor_assign_10_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_10_squeeze_mask_0, stride = value_cache_internal_tensor_assign_10_stride_0, update = value_states_39_cast_fp16, x = coreml_update_state_65)[name = string("value_cache_internal_tensor_assign_10_cast_fp16")]; write_state(data = value_cache_internal_tensor_assign_10_cast_fp16, input = value_cache)[name = string("coreml_update_state_137_write_state")]; tensor coreml_update_state_67 = read_state(input = value_cache)[name = string("coreml_update_state_137")]; tensor var_2904_begin_0 = const()[name = string("op_2904_begin_0"), val = tensor([9, 0, 0, 0])]; tensor var_2904_end_0 = const()[name = string("op_2904_end_0"), val = tensor([10, 2, 2048, 64])]; tensor var_2904_end_mask_0 = const()[name = string("op_2904_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_2904_cast_fp16 = slice_by_index(begin = var_2904_begin_0, end = var_2904_end_0, end_mask = var_2904_end_mask_0, x = coreml_update_state_66)[name = string("op_2904_cast_fp16")]; tensor tile_18 = const()[name = string("tile_18"), val = tensor([1, 1])]; int32 var_2907_axis_0 = const()[name = string("op_2907_axis_0"), val = int32(1)]; tensor var_2907_cast_fp16_0, tensor var_2907_cast_fp16_1 = split(axis = var_2907_axis_0, split_sizes = tile_18, x = var_2904_cast_fp16)[name = string("op_2907_cast_fp16")]; tensor var_2914_begin_0 = const()[name = string("op_2914_begin_0"), val = tensor([9, 0, 0, 0])]; tensor var_2914_end_0 = const()[name = string("op_2914_end_0"), val = tensor([10, 2, 2048, 64])]; tensor var_2914_end_mask_0 = const()[name = string("op_2914_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_2914_cast_fp16 = slice_by_index(begin = var_2914_begin_0, end = var_2914_end_0, end_mask = var_2914_end_mask_0, x = coreml_update_state_67)[name = string("op_2914_cast_fp16")]; tensor tile_19 = const()[name = string("tile_19"), val = tensor([1, 1])]; int32 var_2917_axis_0 = const()[name = string("op_2917_axis_0"), val = int32(1)]; tensor var_2917_cast_fp16_0, tensor var_2917_cast_fp16_1 = split(axis = var_2917_axis_0, split_sizes = tile_19, x = var_2914_cast_fp16)[name = string("op_2917_cast_fp16")]; tensor var_2920_split_sizes_0 = const()[name = string("op_2920_split_sizes_0"), val = tensor([8, 8])]; int32 var_2920_axis_0 = const()[name = string("op_2920_axis_0"), val = int32(1)]; tensor var_2920_cast_fp16_0, tensor var_2920_cast_fp16_1 = split(axis = var_2920_axis_0, split_sizes = var_2920_split_sizes_0, x = query_states_39_cast_fp16)[name = string("op_2920_cast_fp16")]; bool attn_weights_145_transpose_x_0 = const()[name = string("attn_weights_145_transpose_x_0"), val = bool(false)]; bool attn_weights_145_transpose_y_0 = const()[name = string("attn_weights_145_transpose_y_0"), val = bool(false)]; tensor attn_weights_145_cast_fp16 = matmul(transpose_x = attn_weights_145_transpose_x_0, transpose_y = attn_weights_145_transpose_y_0, x = var_2907_cast_fp16_0, y = var_2920_cast_fp16_0)[name = string("attn_weights_145_cast_fp16")]; fp16 _inversed_attn_weights_147_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_147_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_147_cast_fp16 = mul(x = attn_weights_145_cast_fp16, y = _inversed_attn_weights_147_y_0_to_fp16)[name = string("_inversed_attn_weights_147_cast_fp16")]; tensor attn_weights_149_cast_fp16 = add(x = _inversed_attn_weights_147_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_149_cast_fp16")]; int32 var_2927 = const()[name = string("op_2927"), val = int32(2)]; tensor attn_weights_151_cast_fp16 = softmax(axis = var_2927, x = attn_weights_149_cast_fp16)[name = string("attn_weights_151_cast_fp16")]; bool var_2933_transpose_x_1 = const()[name = string("op_2933_transpose_x_1"), val = bool(true)]; bool var_2933_transpose_y_1 = const()[name = string("op_2933_transpose_y_1"), val = bool(false)]; tensor var_2933_cast_fp16 = matmul(transpose_x = var_2933_transpose_x_1, transpose_y = var_2933_transpose_y_1, x = attn_weights_151_cast_fp16, y = var_2917_cast_fp16_0)[name = string("op_2933_cast_fp16")]; bool attn_weights_153_transpose_x_0 = const()[name = string("attn_weights_153_transpose_x_0"), val = bool(false)]; bool attn_weights_153_transpose_y_0 = const()[name = string("attn_weights_153_transpose_y_0"), val = bool(false)]; tensor attn_weights_153_cast_fp16 = matmul(transpose_x = attn_weights_153_transpose_x_0, transpose_y = attn_weights_153_transpose_y_0, x = var_2907_cast_fp16_1, y = var_2920_cast_fp16_1)[name = string("attn_weights_153_cast_fp16")]; fp16 _inversed_attn_weights_155_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_155_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_155_cast_fp16 = mul(x = attn_weights_153_cast_fp16, y = _inversed_attn_weights_155_y_0_to_fp16)[name = string("_inversed_attn_weights_155_cast_fp16")]; tensor attn_weights_157_cast_fp16 = add(x = _inversed_attn_weights_155_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_157_cast_fp16")]; int32 var_2939 = const()[name = string("op_2939"), val = int32(2)]; tensor attn_weights_159_cast_fp16 = softmax(axis = var_2939, x = attn_weights_157_cast_fp16)[name = string("attn_weights_159_cast_fp16")]; bool attn_output_55_transpose_x_1 = const()[name = string("attn_output_55_transpose_x_1"), val = bool(true)]; bool attn_output_55_transpose_y_1 = const()[name = string("attn_output_55_transpose_y_1"), val = bool(false)]; tensor attn_output_55_cast_fp16 = matmul(transpose_x = attn_output_55_transpose_x_1, transpose_y = attn_output_55_transpose_y_1, x = attn_weights_159_cast_fp16, y = var_2917_cast_fp16_1)[name = string("attn_output_55_cast_fp16")]; int32 var_2947 = const()[name = string("op_2947"), val = int32(1)]; bool attn_output_57_interleave_0 = const()[name = string("attn_output_57_interleave_0"), val = bool(false)]; tensor attn_output_57_cast_fp16 = concat(axis = var_2947, interleave = attn_output_57_interleave_0, values = (var_2933_cast_fp16, attn_output_55_cast_fp16))[name = string("attn_output_57_cast_fp16")]; tensor var_2951_perm_0 = const()[name = string("op_2951_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_2956 = const()[name = string("op_2956"), val = tensor([1, 1024, 1, 128])]; tensor var_2951_cast_fp16 = transpose(perm = var_2951_perm_0, x = attn_output_57_cast_fp16)[name = string("transpose_42")]; tensor x_167_cast_fp16 = reshape(shape = var_2956, x = var_2951_cast_fp16)[name = string("x_167_cast_fp16")]; string hidden_states_57_pad_type_0 = const()[name = string("hidden_states_57_pad_type_0"), val = string("valid")]; tensor hidden_states_57_strides_0 = const()[name = string("hidden_states_57_strides_0"), val = tensor([1, 1])]; tensor hidden_states_57_pad_0 = const()[name = string("hidden_states_57_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_57_dilations_0 = const()[name = string("hidden_states_57_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_57_groups_0 = const()[name = string("hidden_states_57_groups_0"), val = int32(1)]; tensor var_2963_to_fp16 = const()[name = string("op_2963_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(280065664)))]; tensor hidden_states_57_cast_fp16 = conv(dilations = hidden_states_57_dilations_0, groups = hidden_states_57_groups_0, pad = hidden_states_57_pad_0, pad_type = hidden_states_57_pad_type_0, strides = hidden_states_57_strides_0, weight = var_2963_to_fp16, x = x_167_cast_fp16)[name = string("hidden_states_57_cast_fp16")]; tensor x_169_cast_fp16 = add(x = x_161_cast_fp16, y = hidden_states_57_cast_fp16)[name = string("x_169_cast_fp16")]; int32 var_2975 = const()[name = string("op_2975"), val = int32(1)]; fp16 const_103_promoted_to_fp16 = const()[name = string("const_103_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_2978_cast_fp16 = mul(x = x_169_cast_fp16, y = const_103_promoted_to_fp16)[name = string("op_2978_cast_fp16")]; bool x_171_interleave_0 = const()[name = string("x_171_interleave_0"), val = bool(false)]; tensor x_171_cast_fp16 = concat(axis = var_2975, interleave = x_171_interleave_0, values = (x_169_cast_fp16, var_2978_cast_fp16))[name = string("x_171_cast_fp16")]; tensor out_115_axes_0 = const()[name = string("out_115_axes_0"), val = tensor([1])]; fp16 var_2988_to_fp16 = const()[name = string("op_2988_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_115_cast_fp16 = layer_norm(axes = out_115_axes_0, epsilon = var_2988_to_fp16, x = x_171_cast_fp16)[name = string("out_115_cast_fp16")]; tensor layer_layers_9_post_attention_layernorm_weight_to_fp16 = const()[name = string("layer_layers_9_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(282162880)))]; tensor out_117_cast_fp16 = mul(x = out_115_cast_fp16, y = layer_layers_9_post_attention_layernorm_weight_to_fp16)[name = string("out_117_cast_fp16")]; tensor var_2994_split_sizes_0 = const()[name = string("op_2994_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_2994_axis_0 = const()[name = string("op_2994_axis_0"), val = int32(1)]; tensor var_2994_cast_fp16_0, tensor var_2994_cast_fp16_1 = split(axis = var_2994_axis_0, split_sizes = var_2994_split_sizes_0, x = out_117_cast_fp16)[name = string("op_2994_cast_fp16")]; string input_19_pad_type_0 = const()[name = string("input_19_pad_type_0"), val = string("valid")]; tensor input_19_strides_0 = const()[name = string("input_19_strides_0"), val = tensor([1, 1])]; tensor input_19_pad_0 = const()[name = string("input_19_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_19_dilations_0 = const()[name = string("input_19_dilations_0"), val = tensor([1, 1])]; int32 input_19_groups_0 = const()[name = string("input_19_groups_0"), val = int32(1)]; tensor var_2999_to_fp16 = const()[name = string("op_2999_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(282167040)))]; tensor input_19_cast_fp16 = conv(dilations = input_19_dilations_0, groups = input_19_groups_0, pad = input_19_pad_0, pad_type = input_19_pad_type_0, strides = input_19_strides_0, weight = var_2999_to_fp16, x = var_2994_cast_fp16_0)[name = string("input_19_cast_fp16")]; tensor var_3010_cast_fp16 = silu(x = input_19_cast_fp16)[name = string("op_3010_cast_fp16")]; string var_3015_pad_type_0 = const()[name = string("op_3015_pad_type_0"), val = string("valid")]; tensor var_3015_strides_0 = const()[name = string("op_3015_strides_0"), val = tensor([1, 1])]; tensor var_3015_pad_0 = const()[name = string("op_3015_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_3015_dilations_0 = const()[name = string("op_3015_dilations_0"), val = tensor([1, 1])]; int32 var_3015_groups_0 = const()[name = string("op_3015_groups_0"), val = int32(1)]; tensor var_2998_to_fp16 = const()[name = string("op_2998_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(290555712)))]; tensor var_3015_cast_fp16 = conv(dilations = var_3015_dilations_0, groups = var_3015_groups_0, pad = var_3015_pad_0, pad_type = var_3015_pad_type_0, strides = var_3015_strides_0, weight = var_2998_to_fp16, x = var_2994_cast_fp16_0)[name = string("op_3015_cast_fp16")]; tensor x_177_cast_fp16 = mul(x = var_3010_cast_fp16, y = var_3015_cast_fp16)[name = string("x_177_cast_fp16")]; string hidden_states_59_pad_type_0 = const()[name = string("hidden_states_59_pad_type_0"), val = string("valid")]; tensor hidden_states_59_strides_0 = const()[name = string("hidden_states_59_strides_0"), val = tensor([1, 1])]; tensor hidden_states_59_pad_0 = const()[name = string("hidden_states_59_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_59_dilations_0 = const()[name = string("hidden_states_59_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_59_groups_0 = const()[name = string("hidden_states_59_groups_0"), val = int32(1)]; tensor var_2997_to_fp16 = const()[name = string("op_2997_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(298944384)))]; tensor hidden_states_59_cast_fp16 = conv(dilations = hidden_states_59_dilations_0, groups = hidden_states_59_groups_0, pad = hidden_states_59_pad_0, pad_type = hidden_states_59_pad_type_0, strides = hidden_states_59_strides_0, weight = var_2997_to_fp16, x = x_177_cast_fp16)[name = string("hidden_states_59_cast_fp16")]; tensor x_179_cast_fp16 = add(x = x_169_cast_fp16, y = hidden_states_59_cast_fp16)[name = string("x_179_cast_fp16")]; int32 var_3028 = const()[name = string("op_3028"), val = int32(1)]; fp16 const_104_promoted_to_fp16 = const()[name = string("const_104_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_3031_cast_fp16 = mul(x = x_179_cast_fp16, y = const_104_promoted_to_fp16)[name = string("op_3031_cast_fp16")]; bool x_181_interleave_0 = const()[name = string("x_181_interleave_0"), val = bool(false)]; tensor x_181_cast_fp16 = concat(axis = var_3028, interleave = x_181_interleave_0, values = (x_179_cast_fp16, var_3031_cast_fp16))[name = string("x_181_cast_fp16")]; tensor out_121_axes_0 = const()[name = string("out_121_axes_0"), val = tensor([1])]; fp16 var_3041_to_fp16 = const()[name = string("op_3041_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_121_cast_fp16 = layer_norm(axes = out_121_axes_0, epsilon = var_3041_to_fp16, x = x_181_cast_fp16)[name = string("out_121_cast_fp16")]; tensor layer_layers_10_input_layernorm_weight_to_fp16 = const()[name = string("layer_layers_10_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(307333056)))]; tensor out_123_cast_fp16 = mul(x = out_121_cast_fp16, y = layer_layers_10_input_layernorm_weight_to_fp16)[name = string("out_123_cast_fp16")]; tensor var_3047_split_sizes_0 = const()[name = string("op_3047_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_3047_axis_0 = const()[name = string("op_3047_axis_0"), val = int32(1)]; tensor var_3047_cast_fp16_0, tensor var_3047_cast_fp16_1 = split(axis = var_3047_axis_0, split_sizes = var_3047_split_sizes_0, x = out_123_cast_fp16)[name = string("op_3047_cast_fp16")]; string query_states_41_pad_type_0 = const()[name = string("query_states_41_pad_type_0"), val = string("valid")]; tensor query_states_41_strides_0 = const()[name = string("query_states_41_strides_0"), val = tensor([1, 1])]; tensor query_states_41_pad_0 = const()[name = string("query_states_41_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_states_41_dilations_0 = const()[name = string("query_states_41_dilations_0"), val = tensor([1, 1])]; int32 query_states_41_groups_0 = const()[name = string("query_states_41_groups_0"), val = int32(1)]; tensor var_3069_to_fp16 = const()[name = string("op_3069_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(307337216)))]; tensor query_states_41_cast_fp16 = conv(dilations = query_states_41_dilations_0, groups = query_states_41_groups_0, pad = query_states_41_pad_0, pad_type = query_states_41_pad_type_0, strides = query_states_41_strides_0, weight = var_3069_to_fp16, x = var_3047_cast_fp16_0)[name = string("query_states_41_cast_fp16")]; string key_states_41_pad_type_0 = const()[name = string("key_states_41_pad_type_0"), val = string("valid")]; tensor key_states_41_strides_0 = const()[name = string("key_states_41_strides_0"), val = tensor([1, 1])]; tensor key_states_41_pad_0 = const()[name = string("key_states_41_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_states_41_dilations_0 = const()[name = string("key_states_41_dilations_0"), val = tensor([1, 1])]; int32 key_states_41_groups_0 = const()[name = string("key_states_41_groups_0"), val = int32(1)]; tensor var_3080_to_fp16 = const()[name = string("op_3080_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(309434432)))]; tensor key_states_41_cast_fp16 = conv(dilations = key_states_41_dilations_0, groups = key_states_41_groups_0, pad = key_states_41_pad_0, pad_type = key_states_41_pad_type_0, strides = key_states_41_strides_0, weight = var_3080_to_fp16, x = var_3047_cast_fp16_0)[name = string("key_states_41_cast_fp16")]; string value_states_41_pad_type_0 = const()[name = string("value_states_41_pad_type_0"), val = string("valid")]; tensor value_states_41_strides_0 = const()[name = string("value_states_41_strides_0"), val = tensor([1, 1])]; tensor value_states_41_pad_0 = const()[name = string("value_states_41_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_states_41_dilations_0 = const()[name = string("value_states_41_dilations_0"), val = tensor([1, 1])]; int32 value_states_41_groups_0 = const()[name = string("value_states_41_groups_0"), val = int32(1)]; tensor var_3091_to_fp16 = const()[name = string("op_3091_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(309696640)))]; tensor value_states_41_cast_fp16 = conv(dilations = value_states_41_dilations_0, groups = value_states_41_groups_0, pad = value_states_41_pad_0, pad_type = value_states_41_pad_type_0, strides = value_states_41_strides_0, weight = var_3091_to_fp16, x = var_3047_cast_fp16_0)[name = string("value_states_41_cast_fp16")]; tensor var_3099 = const()[name = string("op_3099"), val = tensor([1, 16, 64, 128])]; tensor embed_41_cast_fp16 = reshape(shape = var_3099, x = query_states_41_cast_fp16)[name = string("embed_41_cast_fp16")]; tensor var_3103 = const()[name = string("op_3103"), val = tensor([1, 2, 64, 128])]; tensor var_3104_cast_fp16 = reshape(shape = var_3103, x = key_states_41_cast_fp16)[name = string("op_3104_cast_fp16")]; tensor embed_43_perm_0 = const()[name = string("embed_43_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_3110 = const()[name = string("op_3110"), val = tensor([1, 2, 64, 128])]; tensor var_3111_cast_fp16 = reshape(shape = var_3110, x = value_states_41_cast_fp16)[name = string("op_3111_cast_fp16")]; tensor value_states_43_perm_0 = const()[name = string("value_states_43_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_3115_cast_fp16 = mul(x = embed_41_cast_fp16, y = cos_1_cast_fp16)[name = string("op_3115_cast_fp16")]; tensor var_3116_split_sizes_0 = const()[name = string("op_3116_split_sizes_0"), val = tensor([32, 32])]; int32 var_3116_axis_0 = const()[name = string("op_3116_axis_0"), val = int32(-2)]; tensor var_3116_cast_fp16_0, tensor var_3116_cast_fp16_1 = split(axis = var_3116_axis_0, split_sizes = var_3116_split_sizes_0, x = embed_41_cast_fp16)[name = string("op_3116_cast_fp16")]; fp16 const_107_promoted_to_fp16 = const()[name = string("const_107_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_3118_cast_fp16 = mul(x = var_3116_cast_fp16_1, y = const_107_promoted_to_fp16)[name = string("op_3118_cast_fp16")]; int32 var_3120 = const()[name = string("op_3120"), val = int32(-2)]; bool var_3121_interleave_0 = const()[name = string("op_3121_interleave_0"), val = bool(false)]; tensor var_3121_cast_fp16 = concat(axis = var_3120, interleave = var_3121_interleave_0, values = (var_3118_cast_fp16, var_3116_cast_fp16_0))[name = string("op_3121_cast_fp16")]; tensor var_3122_cast_fp16 = mul(x = var_3121_cast_fp16, y = sin_1_cast_fp16)[name = string("op_3122_cast_fp16")]; tensor query_states_43_cast_fp16 = add(x = var_3115_cast_fp16, y = var_3122_cast_fp16)[name = string("query_states_43_cast_fp16")]; tensor embed_43_cast_fp16 = transpose(perm = embed_43_perm_0, x = var_3104_cast_fp16)[name = string("transpose_41")]; tensor var_3125_cast_fp16 = mul(x = embed_43_cast_fp16, y = cos_cast_fp16)[name = string("op_3125_cast_fp16")]; tensor var_3126_split_sizes_0 = const()[name = string("op_3126_split_sizes_0"), val = tensor([32, 32])]; int32 var_3126_axis_0 = const()[name = string("op_3126_axis_0"), val = int32(-1)]; tensor var_3126_cast_fp16_0, tensor var_3126_cast_fp16_1 = split(axis = var_3126_axis_0, split_sizes = var_3126_split_sizes_0, x = embed_43_cast_fp16)[name = string("op_3126_cast_fp16")]; fp16 const_108_promoted_to_fp16 = const()[name = string("const_108_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_3128_cast_fp16 = mul(x = var_3126_cast_fp16_1, y = const_108_promoted_to_fp16)[name = string("op_3128_cast_fp16")]; int32 var_3130 = const()[name = string("op_3130"), val = int32(-1)]; bool var_3131_interleave_0 = const()[name = string("op_3131_interleave_0"), val = bool(false)]; tensor var_3131_cast_fp16 = concat(axis = var_3130, interleave = var_3131_interleave_0, values = (var_3128_cast_fp16, var_3126_cast_fp16_0))[name = string("op_3131_cast_fp16")]; tensor var_3132_cast_fp16 = mul(x = var_3131_cast_fp16, y = sin_cast_fp16)[name = string("op_3132_cast_fp16")]; tensor key_states_43_cast_fp16 = add(x = var_3125_cast_fp16, y = var_3132_cast_fp16)[name = string("key_states_43_cast_fp16")]; tensor expand_dims_101 = const()[name = string("expand_dims_101"), val = tensor([10])]; tensor expand_dims_102 = const()[name = string("expand_dims_102"), val = tensor([0])]; tensor expand_dims_104 = const()[name = string("expand_dims_104"), val = tensor([11])]; tensor concat_83_values3_0 = const()[name = string("concat_83_values3_0"), val = tensor([0])]; int32 concat_83_axis_0 = const()[name = string("concat_83_axis_0"), val = int32(0)]; bool concat_83_interleave_0 = const()[name = string("concat_83_interleave_0"), val = bool(false)]; tensor concat_83 = concat(axis = concat_83_axis_0, interleave = concat_83_interleave_0, values = (expand_dims_101, expand_dims_102, position_id, concat_83_values3_0))[name = string("concat_83")]; tensor concat_84_values1_0 = const()[name = string("concat_84_values1_0"), val = tensor([0])]; tensor concat_84_values3_0 = const()[name = string("concat_84_values3_0"), val = tensor([0])]; int32 concat_84_axis_0 = const()[name = string("concat_84_axis_0"), val = int32(0)]; bool concat_84_interleave_0 = const()[name = string("concat_84_interleave_0"), val = bool(false)]; tensor concat_84 = concat(axis = concat_84_axis_0, interleave = concat_84_interleave_0, values = (expand_dims_104, concat_84_values1_0, var_426, concat_84_values3_0))[name = string("concat_84")]; tensor key_cache_internal_tensor_assign_11_stride_0 = const()[name = string("key_cache_internal_tensor_assign_11_stride_0"), val = tensor([1, 1, 1, 1])]; tensor key_cache_internal_tensor_assign_11_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_11_begin_mask_0"), val = tensor([false, false, false, true])]; tensor key_cache_internal_tensor_assign_11_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_11_end_mask_0"), val = tensor([false, true, false, true])]; tensor key_cache_internal_tensor_assign_11_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_11_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor key_cache_internal_tensor_assign_11_cast_fp16 = slice_update(begin = concat_83, begin_mask = key_cache_internal_tensor_assign_11_begin_mask_0, end = concat_84, end_mask = key_cache_internal_tensor_assign_11_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_11_squeeze_mask_0, stride = key_cache_internal_tensor_assign_11_stride_0, update = key_states_43_cast_fp16, x = coreml_update_state_66)[name = string("key_cache_internal_tensor_assign_11_cast_fp16")]; write_state(data = key_cache_internal_tensor_assign_11_cast_fp16, input = key_cache)[name = string("coreml_update_state_138_write_state")]; tensor coreml_update_state_68 = read_state(input = key_cache)[name = string("coreml_update_state_138")]; tensor value_cache_internal_tensor_assign_11_stride_0 = const()[name = string("value_cache_internal_tensor_assign_11_stride_0"), val = tensor([1, 1, 1, 1])]; tensor value_cache_internal_tensor_assign_11_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_11_begin_mask_0"), val = tensor([false, false, false, true])]; tensor value_cache_internal_tensor_assign_11_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_11_end_mask_0"), val = tensor([false, true, false, true])]; tensor value_cache_internal_tensor_assign_11_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_11_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor value_states_43_cast_fp16 = transpose(perm = value_states_43_perm_0, x = var_3111_cast_fp16)[name = string("transpose_40")]; tensor value_cache_internal_tensor_assign_11_cast_fp16 = slice_update(begin = concat_83, begin_mask = value_cache_internal_tensor_assign_11_begin_mask_0, end = concat_84, end_mask = value_cache_internal_tensor_assign_11_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_11_squeeze_mask_0, stride = value_cache_internal_tensor_assign_11_stride_0, update = value_states_43_cast_fp16, x = coreml_update_state_67)[name = string("value_cache_internal_tensor_assign_11_cast_fp16")]; write_state(data = value_cache_internal_tensor_assign_11_cast_fp16, input = value_cache)[name = string("coreml_update_state_139_write_state")]; tensor coreml_update_state_69 = read_state(input = value_cache)[name = string("coreml_update_state_139")]; tensor var_3175_begin_0 = const()[name = string("op_3175_begin_0"), val = tensor([10, 0, 0, 0])]; tensor var_3175_end_0 = const()[name = string("op_3175_end_0"), val = tensor([11, 2, 2048, 64])]; tensor var_3175_end_mask_0 = const()[name = string("op_3175_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_3175_cast_fp16 = slice_by_index(begin = var_3175_begin_0, end = var_3175_end_0, end_mask = var_3175_end_mask_0, x = coreml_update_state_68)[name = string("op_3175_cast_fp16")]; tensor tile_20 = const()[name = string("tile_20"), val = tensor([1, 1])]; int32 var_3178_axis_0 = const()[name = string("op_3178_axis_0"), val = int32(1)]; tensor var_3178_cast_fp16_0, tensor var_3178_cast_fp16_1 = split(axis = var_3178_axis_0, split_sizes = tile_20, x = var_3175_cast_fp16)[name = string("op_3178_cast_fp16")]; tensor var_3185_begin_0 = const()[name = string("op_3185_begin_0"), val = tensor([10, 0, 0, 0])]; tensor var_3185_end_0 = const()[name = string("op_3185_end_0"), val = tensor([11, 2, 2048, 64])]; tensor var_3185_end_mask_0 = const()[name = string("op_3185_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_3185_cast_fp16 = slice_by_index(begin = var_3185_begin_0, end = var_3185_end_0, end_mask = var_3185_end_mask_0, x = coreml_update_state_69)[name = string("op_3185_cast_fp16")]; tensor tile_21 = const()[name = string("tile_21"), val = tensor([1, 1])]; int32 var_3188_axis_0 = const()[name = string("op_3188_axis_0"), val = int32(1)]; tensor var_3188_cast_fp16_0, tensor var_3188_cast_fp16_1 = split(axis = var_3188_axis_0, split_sizes = tile_21, x = var_3185_cast_fp16)[name = string("op_3188_cast_fp16")]; tensor var_3191_split_sizes_0 = const()[name = string("op_3191_split_sizes_0"), val = tensor([8, 8])]; int32 var_3191_axis_0 = const()[name = string("op_3191_axis_0"), val = int32(1)]; tensor var_3191_cast_fp16_0, tensor var_3191_cast_fp16_1 = split(axis = var_3191_axis_0, split_sizes = var_3191_split_sizes_0, x = query_states_43_cast_fp16)[name = string("op_3191_cast_fp16")]; bool attn_weights_161_transpose_x_0 = const()[name = string("attn_weights_161_transpose_x_0"), val = bool(false)]; bool attn_weights_161_transpose_y_0 = const()[name = string("attn_weights_161_transpose_y_0"), val = bool(false)]; tensor attn_weights_161_cast_fp16 = matmul(transpose_x = attn_weights_161_transpose_x_0, transpose_y = attn_weights_161_transpose_y_0, x = var_3178_cast_fp16_0, y = var_3191_cast_fp16_0)[name = string("attn_weights_161_cast_fp16")]; fp16 _inversed_attn_weights_163_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_163_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_163_cast_fp16 = mul(x = attn_weights_161_cast_fp16, y = _inversed_attn_weights_163_y_0_to_fp16)[name = string("_inversed_attn_weights_163_cast_fp16")]; tensor attn_weights_165_cast_fp16 = add(x = _inversed_attn_weights_163_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_165_cast_fp16")]; int32 var_3198 = const()[name = string("op_3198"), val = int32(2)]; tensor attn_weights_167_cast_fp16 = softmax(axis = var_3198, x = attn_weights_165_cast_fp16)[name = string("attn_weights_167_cast_fp16")]; bool var_3204_transpose_x_1 = const()[name = string("op_3204_transpose_x_1"), val = bool(true)]; bool var_3204_transpose_y_1 = const()[name = string("op_3204_transpose_y_1"), val = bool(false)]; tensor var_3204_cast_fp16 = matmul(transpose_x = var_3204_transpose_x_1, transpose_y = var_3204_transpose_y_1, x = attn_weights_167_cast_fp16, y = var_3188_cast_fp16_0)[name = string("op_3204_cast_fp16")]; bool attn_weights_169_transpose_x_0 = const()[name = string("attn_weights_169_transpose_x_0"), val = bool(false)]; bool attn_weights_169_transpose_y_0 = const()[name = string("attn_weights_169_transpose_y_0"), val = bool(false)]; tensor attn_weights_169_cast_fp16 = matmul(transpose_x = attn_weights_169_transpose_x_0, transpose_y = attn_weights_169_transpose_y_0, x = var_3178_cast_fp16_1, y = var_3191_cast_fp16_1)[name = string("attn_weights_169_cast_fp16")]; fp16 _inversed_attn_weights_171_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_171_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_171_cast_fp16 = mul(x = attn_weights_169_cast_fp16, y = _inversed_attn_weights_171_y_0_to_fp16)[name = string("_inversed_attn_weights_171_cast_fp16")]; tensor attn_weights_173_cast_fp16 = add(x = _inversed_attn_weights_171_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_173_cast_fp16")]; int32 var_3210 = const()[name = string("op_3210"), val = int32(2)]; tensor attn_weights_175_cast_fp16 = softmax(axis = var_3210, x = attn_weights_173_cast_fp16)[name = string("attn_weights_175_cast_fp16")]; bool attn_output_61_transpose_x_1 = const()[name = string("attn_output_61_transpose_x_1"), val = bool(true)]; bool attn_output_61_transpose_y_1 = const()[name = string("attn_output_61_transpose_y_1"), val = bool(false)]; tensor attn_output_61_cast_fp16 = matmul(transpose_x = attn_output_61_transpose_x_1, transpose_y = attn_output_61_transpose_y_1, x = attn_weights_175_cast_fp16, y = var_3188_cast_fp16_1)[name = string("attn_output_61_cast_fp16")]; int32 var_3218 = const()[name = string("op_3218"), val = int32(1)]; bool attn_output_63_interleave_0 = const()[name = string("attn_output_63_interleave_0"), val = bool(false)]; tensor attn_output_63_cast_fp16 = concat(axis = var_3218, interleave = attn_output_63_interleave_0, values = (var_3204_cast_fp16, attn_output_61_cast_fp16))[name = string("attn_output_63_cast_fp16")]; tensor var_3222_perm_0 = const()[name = string("op_3222_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_3227 = const()[name = string("op_3227"), val = tensor([1, 1024, 1, 128])]; tensor var_3222_cast_fp16 = transpose(perm = var_3222_perm_0, x = attn_output_63_cast_fp16)[name = string("transpose_39")]; tensor x_185_cast_fp16 = reshape(shape = var_3227, x = var_3222_cast_fp16)[name = string("x_185_cast_fp16")]; string hidden_states_63_pad_type_0 = const()[name = string("hidden_states_63_pad_type_0"), val = string("valid")]; tensor hidden_states_63_strides_0 = const()[name = string("hidden_states_63_strides_0"), val = tensor([1, 1])]; tensor hidden_states_63_pad_0 = const()[name = string("hidden_states_63_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_63_dilations_0 = const()[name = string("hidden_states_63_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_63_groups_0 = const()[name = string("hidden_states_63_groups_0"), val = int32(1)]; tensor var_3234_to_fp16 = const()[name = string("op_3234_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(309958848)))]; tensor hidden_states_63_cast_fp16 = conv(dilations = hidden_states_63_dilations_0, groups = hidden_states_63_groups_0, pad = hidden_states_63_pad_0, pad_type = hidden_states_63_pad_type_0, strides = hidden_states_63_strides_0, weight = var_3234_to_fp16, x = x_185_cast_fp16)[name = string("hidden_states_63_cast_fp16")]; tensor x_187_cast_fp16 = add(x = x_179_cast_fp16, y = hidden_states_63_cast_fp16)[name = string("x_187_cast_fp16")]; int32 var_3246 = const()[name = string("op_3246"), val = int32(1)]; fp16 const_113_promoted_to_fp16 = const()[name = string("const_113_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_3249_cast_fp16 = mul(x = x_187_cast_fp16, y = const_113_promoted_to_fp16)[name = string("op_3249_cast_fp16")]; bool x_189_interleave_0 = const()[name = string("x_189_interleave_0"), val = bool(false)]; tensor x_189_cast_fp16 = concat(axis = var_3246, interleave = x_189_interleave_0, values = (x_187_cast_fp16, var_3249_cast_fp16))[name = string("x_189_cast_fp16")]; tensor out_127_axes_0 = const()[name = string("out_127_axes_0"), val = tensor([1])]; fp16 var_3259_to_fp16 = const()[name = string("op_3259_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_127_cast_fp16 = layer_norm(axes = out_127_axes_0, epsilon = var_3259_to_fp16, x = x_189_cast_fp16)[name = string("out_127_cast_fp16")]; tensor layer_layers_10_post_attention_layernorm_weight_to_fp16 = const()[name = string("layer_layers_10_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(312056064)))]; tensor out_129_cast_fp16 = mul(x = out_127_cast_fp16, y = layer_layers_10_post_attention_layernorm_weight_to_fp16)[name = string("out_129_cast_fp16")]; tensor var_3265_split_sizes_0 = const()[name = string("op_3265_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_3265_axis_0 = const()[name = string("op_3265_axis_0"), val = int32(1)]; tensor var_3265_cast_fp16_0, tensor var_3265_cast_fp16_1 = split(axis = var_3265_axis_0, split_sizes = var_3265_split_sizes_0, x = out_129_cast_fp16)[name = string("op_3265_cast_fp16")]; string input_21_pad_type_0 = const()[name = string("input_21_pad_type_0"), val = string("valid")]; tensor input_21_strides_0 = const()[name = string("input_21_strides_0"), val = tensor([1, 1])]; tensor input_21_pad_0 = const()[name = string("input_21_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_21_dilations_0 = const()[name = string("input_21_dilations_0"), val = tensor([1, 1])]; int32 input_21_groups_0 = const()[name = string("input_21_groups_0"), val = int32(1)]; tensor var_3270_to_fp16 = const()[name = string("op_3270_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(312060224)))]; tensor input_21_cast_fp16 = conv(dilations = input_21_dilations_0, groups = input_21_groups_0, pad = input_21_pad_0, pad_type = input_21_pad_type_0, strides = input_21_strides_0, weight = var_3270_to_fp16, x = var_3265_cast_fp16_0)[name = string("input_21_cast_fp16")]; tensor var_3281_cast_fp16 = silu(x = input_21_cast_fp16)[name = string("op_3281_cast_fp16")]; string var_3286_pad_type_0 = const()[name = string("op_3286_pad_type_0"), val = string("valid")]; tensor var_3286_strides_0 = const()[name = string("op_3286_strides_0"), val = tensor([1, 1])]; tensor var_3286_pad_0 = const()[name = string("op_3286_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_3286_dilations_0 = const()[name = string("op_3286_dilations_0"), val = tensor([1, 1])]; int32 var_3286_groups_0 = const()[name = string("op_3286_groups_0"), val = int32(1)]; tensor var_3269_to_fp16 = const()[name = string("op_3269_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(320448896)))]; tensor var_3286_cast_fp16 = conv(dilations = var_3286_dilations_0, groups = var_3286_groups_0, pad = var_3286_pad_0, pad_type = var_3286_pad_type_0, strides = var_3286_strides_0, weight = var_3269_to_fp16, x = var_3265_cast_fp16_0)[name = string("op_3286_cast_fp16")]; tensor x_195_cast_fp16 = mul(x = var_3281_cast_fp16, y = var_3286_cast_fp16)[name = string("x_195_cast_fp16")]; string hidden_states_65_pad_type_0 = const()[name = string("hidden_states_65_pad_type_0"), val = string("valid")]; tensor hidden_states_65_strides_0 = const()[name = string("hidden_states_65_strides_0"), val = tensor([1, 1])]; tensor hidden_states_65_pad_0 = const()[name = string("hidden_states_65_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_65_dilations_0 = const()[name = string("hidden_states_65_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_65_groups_0 = const()[name = string("hidden_states_65_groups_0"), val = int32(1)]; tensor var_3268_to_fp16 = const()[name = string("op_3268_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(328837568)))]; tensor hidden_states_65_cast_fp16 = conv(dilations = hidden_states_65_dilations_0, groups = hidden_states_65_groups_0, pad = hidden_states_65_pad_0, pad_type = hidden_states_65_pad_type_0, strides = hidden_states_65_strides_0, weight = var_3268_to_fp16, x = x_195_cast_fp16)[name = string("hidden_states_65_cast_fp16")]; tensor x_197_cast_fp16 = add(x = x_187_cast_fp16, y = hidden_states_65_cast_fp16)[name = string("x_197_cast_fp16")]; int32 var_3299 = const()[name = string("op_3299"), val = int32(1)]; fp16 const_114_promoted_to_fp16 = const()[name = string("const_114_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_3302_cast_fp16 = mul(x = x_197_cast_fp16, y = const_114_promoted_to_fp16)[name = string("op_3302_cast_fp16")]; bool x_199_interleave_0 = const()[name = string("x_199_interleave_0"), val = bool(false)]; tensor x_199_cast_fp16 = concat(axis = var_3299, interleave = x_199_interleave_0, values = (x_197_cast_fp16, var_3302_cast_fp16))[name = string("x_199_cast_fp16")]; tensor out_133_axes_0 = const()[name = string("out_133_axes_0"), val = tensor([1])]; fp16 var_3312_to_fp16 = const()[name = string("op_3312_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_133_cast_fp16 = layer_norm(axes = out_133_axes_0, epsilon = var_3312_to_fp16, x = x_199_cast_fp16)[name = string("out_133_cast_fp16")]; tensor layer_layers_11_input_layernorm_weight_to_fp16 = const()[name = string("layer_layers_11_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(337226240)))]; tensor out_135_cast_fp16 = mul(x = out_133_cast_fp16, y = layer_layers_11_input_layernorm_weight_to_fp16)[name = string("out_135_cast_fp16")]; tensor var_3318_split_sizes_0 = const()[name = string("op_3318_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_3318_axis_0 = const()[name = string("op_3318_axis_0"), val = int32(1)]; tensor var_3318_cast_fp16_0, tensor var_3318_cast_fp16_1 = split(axis = var_3318_axis_0, split_sizes = var_3318_split_sizes_0, x = out_135_cast_fp16)[name = string("op_3318_cast_fp16")]; string query_states_45_pad_type_0 = const()[name = string("query_states_45_pad_type_0"), val = string("valid")]; tensor query_states_45_strides_0 = const()[name = string("query_states_45_strides_0"), val = tensor([1, 1])]; tensor query_states_45_pad_0 = const()[name = string("query_states_45_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_states_45_dilations_0 = const()[name = string("query_states_45_dilations_0"), val = tensor([1, 1])]; int32 query_states_45_groups_0 = const()[name = string("query_states_45_groups_0"), val = int32(1)]; tensor var_3340_to_fp16 = const()[name = string("op_3340_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(337230400)))]; tensor query_states_45_cast_fp16 = conv(dilations = query_states_45_dilations_0, groups = query_states_45_groups_0, pad = query_states_45_pad_0, pad_type = query_states_45_pad_type_0, strides = query_states_45_strides_0, weight = var_3340_to_fp16, x = var_3318_cast_fp16_0)[name = string("query_states_45_cast_fp16")]; string key_states_45_pad_type_0 = const()[name = string("key_states_45_pad_type_0"), val = string("valid")]; tensor key_states_45_strides_0 = const()[name = string("key_states_45_strides_0"), val = tensor([1, 1])]; tensor key_states_45_pad_0 = const()[name = string("key_states_45_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_states_45_dilations_0 = const()[name = string("key_states_45_dilations_0"), val = tensor([1, 1])]; int32 key_states_45_groups_0 = const()[name = string("key_states_45_groups_0"), val = int32(1)]; tensor var_3351_to_fp16 = const()[name = string("op_3351_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(339327616)))]; tensor key_states_45_cast_fp16 = conv(dilations = key_states_45_dilations_0, groups = key_states_45_groups_0, pad = key_states_45_pad_0, pad_type = key_states_45_pad_type_0, strides = key_states_45_strides_0, weight = var_3351_to_fp16, x = var_3318_cast_fp16_0)[name = string("key_states_45_cast_fp16")]; string value_states_45_pad_type_0 = const()[name = string("value_states_45_pad_type_0"), val = string("valid")]; tensor value_states_45_strides_0 = const()[name = string("value_states_45_strides_0"), val = tensor([1, 1])]; tensor value_states_45_pad_0 = const()[name = string("value_states_45_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_states_45_dilations_0 = const()[name = string("value_states_45_dilations_0"), val = tensor([1, 1])]; int32 value_states_45_groups_0 = const()[name = string("value_states_45_groups_0"), val = int32(1)]; tensor var_3362_to_fp16 = const()[name = string("op_3362_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(339589824)))]; tensor value_states_45_cast_fp16 = conv(dilations = value_states_45_dilations_0, groups = value_states_45_groups_0, pad = value_states_45_pad_0, pad_type = value_states_45_pad_type_0, strides = value_states_45_strides_0, weight = var_3362_to_fp16, x = var_3318_cast_fp16_0)[name = string("value_states_45_cast_fp16")]; tensor var_3370 = const()[name = string("op_3370"), val = tensor([1, 16, 64, 128])]; tensor embed_45_cast_fp16 = reshape(shape = var_3370, x = query_states_45_cast_fp16)[name = string("embed_45_cast_fp16")]; tensor var_3374 = const()[name = string("op_3374"), val = tensor([1, 2, 64, 128])]; tensor var_3375_cast_fp16 = reshape(shape = var_3374, x = key_states_45_cast_fp16)[name = string("op_3375_cast_fp16")]; tensor embed_47_perm_0 = const()[name = string("embed_47_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_3381 = const()[name = string("op_3381"), val = tensor([1, 2, 64, 128])]; tensor var_3382_cast_fp16 = reshape(shape = var_3381, x = value_states_45_cast_fp16)[name = string("op_3382_cast_fp16")]; tensor value_states_47_perm_0 = const()[name = string("value_states_47_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_3386_cast_fp16 = mul(x = embed_45_cast_fp16, y = cos_1_cast_fp16)[name = string("op_3386_cast_fp16")]; tensor var_3387_split_sizes_0 = const()[name = string("op_3387_split_sizes_0"), val = tensor([32, 32])]; int32 var_3387_axis_0 = const()[name = string("op_3387_axis_0"), val = int32(-2)]; tensor var_3387_cast_fp16_0, tensor var_3387_cast_fp16_1 = split(axis = var_3387_axis_0, split_sizes = var_3387_split_sizes_0, x = embed_45_cast_fp16)[name = string("op_3387_cast_fp16")]; fp16 const_117_promoted_to_fp16 = const()[name = string("const_117_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_3389_cast_fp16 = mul(x = var_3387_cast_fp16_1, y = const_117_promoted_to_fp16)[name = string("op_3389_cast_fp16")]; int32 var_3391 = const()[name = string("op_3391"), val = int32(-2)]; bool var_3392_interleave_0 = const()[name = string("op_3392_interleave_0"), val = bool(false)]; tensor var_3392_cast_fp16 = concat(axis = var_3391, interleave = var_3392_interleave_0, values = (var_3389_cast_fp16, var_3387_cast_fp16_0))[name = string("op_3392_cast_fp16")]; tensor var_3393_cast_fp16 = mul(x = var_3392_cast_fp16, y = sin_1_cast_fp16)[name = string("op_3393_cast_fp16")]; tensor query_states_47_cast_fp16 = add(x = var_3386_cast_fp16, y = var_3393_cast_fp16)[name = string("query_states_47_cast_fp16")]; tensor embed_47_cast_fp16 = transpose(perm = embed_47_perm_0, x = var_3375_cast_fp16)[name = string("transpose_38")]; tensor var_3396_cast_fp16 = mul(x = embed_47_cast_fp16, y = cos_cast_fp16)[name = string("op_3396_cast_fp16")]; tensor var_3397_split_sizes_0 = const()[name = string("op_3397_split_sizes_0"), val = tensor([32, 32])]; int32 var_3397_axis_0 = const()[name = string("op_3397_axis_0"), val = int32(-1)]; tensor var_3397_cast_fp16_0, tensor var_3397_cast_fp16_1 = split(axis = var_3397_axis_0, split_sizes = var_3397_split_sizes_0, x = embed_47_cast_fp16)[name = string("op_3397_cast_fp16")]; fp16 const_118_promoted_to_fp16 = const()[name = string("const_118_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_3399_cast_fp16 = mul(x = var_3397_cast_fp16_1, y = const_118_promoted_to_fp16)[name = string("op_3399_cast_fp16")]; int32 var_3401 = const()[name = string("op_3401"), val = int32(-1)]; bool var_3402_interleave_0 = const()[name = string("op_3402_interleave_0"), val = bool(false)]; tensor var_3402_cast_fp16 = concat(axis = var_3401, interleave = var_3402_interleave_0, values = (var_3399_cast_fp16, var_3397_cast_fp16_0))[name = string("op_3402_cast_fp16")]; tensor var_3403_cast_fp16 = mul(x = var_3402_cast_fp16, y = sin_cast_fp16)[name = string("op_3403_cast_fp16")]; tensor key_states_47_cast_fp16 = add(x = var_3396_cast_fp16, y = var_3403_cast_fp16)[name = string("key_states_47_cast_fp16")]; tensor expand_dims_111 = const()[name = string("expand_dims_111"), val = tensor([11])]; tensor expand_dims_112 = const()[name = string("expand_dims_112"), val = tensor([0])]; tensor expand_dims_114 = const()[name = string("expand_dims_114"), val = tensor([12])]; tensor concat_91_values3_0 = const()[name = string("concat_91_values3_0"), val = tensor([0])]; int32 concat_91_axis_0 = const()[name = string("concat_91_axis_0"), val = int32(0)]; bool concat_91_interleave_0 = const()[name = string("concat_91_interleave_0"), val = bool(false)]; tensor concat_91 = concat(axis = concat_91_axis_0, interleave = concat_91_interleave_0, values = (expand_dims_111, expand_dims_112, position_id, concat_91_values3_0))[name = string("concat_91")]; tensor concat_92_values1_0 = const()[name = string("concat_92_values1_0"), val = tensor([0])]; tensor concat_92_values3_0 = const()[name = string("concat_92_values3_0"), val = tensor([0])]; int32 concat_92_axis_0 = const()[name = string("concat_92_axis_0"), val = int32(0)]; bool concat_92_interleave_0 = const()[name = string("concat_92_interleave_0"), val = bool(false)]; tensor concat_92 = concat(axis = concat_92_axis_0, interleave = concat_92_interleave_0, values = (expand_dims_114, concat_92_values1_0, var_426, concat_92_values3_0))[name = string("concat_92")]; tensor key_cache_internal_tensor_assign_12_stride_0 = const()[name = string("key_cache_internal_tensor_assign_12_stride_0"), val = tensor([1, 1, 1, 1])]; tensor key_cache_internal_tensor_assign_12_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_12_begin_mask_0"), val = tensor([false, false, false, true])]; tensor key_cache_internal_tensor_assign_12_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_12_end_mask_0"), val = tensor([false, true, false, true])]; tensor key_cache_internal_tensor_assign_12_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_12_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor key_cache_internal_tensor_assign_12_cast_fp16 = slice_update(begin = concat_91, begin_mask = key_cache_internal_tensor_assign_12_begin_mask_0, end = concat_92, end_mask = key_cache_internal_tensor_assign_12_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_12_squeeze_mask_0, stride = key_cache_internal_tensor_assign_12_stride_0, update = key_states_47_cast_fp16, x = coreml_update_state_68)[name = string("key_cache_internal_tensor_assign_12_cast_fp16")]; write_state(data = key_cache_internal_tensor_assign_12_cast_fp16, input = key_cache)[name = string("coreml_update_state_140_write_state")]; tensor coreml_update_state_70 = read_state(input = key_cache)[name = string("coreml_update_state_140")]; tensor value_cache_internal_tensor_assign_12_stride_0 = const()[name = string("value_cache_internal_tensor_assign_12_stride_0"), val = tensor([1, 1, 1, 1])]; tensor value_cache_internal_tensor_assign_12_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_12_begin_mask_0"), val = tensor([false, false, false, true])]; tensor value_cache_internal_tensor_assign_12_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_12_end_mask_0"), val = tensor([false, true, false, true])]; tensor value_cache_internal_tensor_assign_12_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_12_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor value_states_47_cast_fp16 = transpose(perm = value_states_47_perm_0, x = var_3382_cast_fp16)[name = string("transpose_37")]; tensor value_cache_internal_tensor_assign_12_cast_fp16 = slice_update(begin = concat_91, begin_mask = value_cache_internal_tensor_assign_12_begin_mask_0, end = concat_92, end_mask = value_cache_internal_tensor_assign_12_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_12_squeeze_mask_0, stride = value_cache_internal_tensor_assign_12_stride_0, update = value_states_47_cast_fp16, x = coreml_update_state_69)[name = string("value_cache_internal_tensor_assign_12_cast_fp16")]; write_state(data = value_cache_internal_tensor_assign_12_cast_fp16, input = value_cache)[name = string("coreml_update_state_141_write_state")]; tensor coreml_update_state_71 = read_state(input = value_cache)[name = string("coreml_update_state_141")]; tensor var_3446_begin_0 = const()[name = string("op_3446_begin_0"), val = tensor([11, 0, 0, 0])]; tensor var_3446_end_0 = const()[name = string("op_3446_end_0"), val = tensor([12, 2, 2048, 64])]; tensor var_3446_end_mask_0 = const()[name = string("op_3446_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_3446_cast_fp16 = slice_by_index(begin = var_3446_begin_0, end = var_3446_end_0, end_mask = var_3446_end_mask_0, x = coreml_update_state_70)[name = string("op_3446_cast_fp16")]; tensor tile_22 = const()[name = string("tile_22"), val = tensor([1, 1])]; int32 var_3449_axis_0 = const()[name = string("op_3449_axis_0"), val = int32(1)]; tensor var_3449_cast_fp16_0, tensor var_3449_cast_fp16_1 = split(axis = var_3449_axis_0, split_sizes = tile_22, x = var_3446_cast_fp16)[name = string("op_3449_cast_fp16")]; tensor var_3456_begin_0 = const()[name = string("op_3456_begin_0"), val = tensor([11, 0, 0, 0])]; tensor var_3456_end_0 = const()[name = string("op_3456_end_0"), val = tensor([12, 2, 2048, 64])]; tensor var_3456_end_mask_0 = const()[name = string("op_3456_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_3456_cast_fp16 = slice_by_index(begin = var_3456_begin_0, end = var_3456_end_0, end_mask = var_3456_end_mask_0, x = coreml_update_state_71)[name = string("op_3456_cast_fp16")]; tensor tile_23 = const()[name = string("tile_23"), val = tensor([1, 1])]; int32 var_3459_axis_0 = const()[name = string("op_3459_axis_0"), val = int32(1)]; tensor var_3459_cast_fp16_0, tensor var_3459_cast_fp16_1 = split(axis = var_3459_axis_0, split_sizes = tile_23, x = var_3456_cast_fp16)[name = string("op_3459_cast_fp16")]; tensor var_3462_split_sizes_0 = const()[name = string("op_3462_split_sizes_0"), val = tensor([8, 8])]; int32 var_3462_axis_0 = const()[name = string("op_3462_axis_0"), val = int32(1)]; tensor var_3462_cast_fp16_0, tensor var_3462_cast_fp16_1 = split(axis = var_3462_axis_0, split_sizes = var_3462_split_sizes_0, x = query_states_47_cast_fp16)[name = string("op_3462_cast_fp16")]; bool attn_weights_177_transpose_x_0 = const()[name = string("attn_weights_177_transpose_x_0"), val = bool(false)]; bool attn_weights_177_transpose_y_0 = const()[name = string("attn_weights_177_transpose_y_0"), val = bool(false)]; tensor attn_weights_177_cast_fp16 = matmul(transpose_x = attn_weights_177_transpose_x_0, transpose_y = attn_weights_177_transpose_y_0, x = var_3449_cast_fp16_0, y = var_3462_cast_fp16_0)[name = string("attn_weights_177_cast_fp16")]; fp16 _inversed_attn_weights_179_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_179_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_179_cast_fp16 = mul(x = attn_weights_177_cast_fp16, y = _inversed_attn_weights_179_y_0_to_fp16)[name = string("_inversed_attn_weights_179_cast_fp16")]; tensor attn_weights_181_cast_fp16 = add(x = _inversed_attn_weights_179_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_181_cast_fp16")]; int32 var_3469 = const()[name = string("op_3469"), val = int32(2)]; tensor attn_weights_183_cast_fp16 = softmax(axis = var_3469, x = attn_weights_181_cast_fp16)[name = string("attn_weights_183_cast_fp16")]; bool var_3475_transpose_x_1 = const()[name = string("op_3475_transpose_x_1"), val = bool(true)]; bool var_3475_transpose_y_1 = const()[name = string("op_3475_transpose_y_1"), val = bool(false)]; tensor var_3475_cast_fp16 = matmul(transpose_x = var_3475_transpose_x_1, transpose_y = var_3475_transpose_y_1, x = attn_weights_183_cast_fp16, y = var_3459_cast_fp16_0)[name = string("op_3475_cast_fp16")]; bool attn_weights_185_transpose_x_0 = const()[name = string("attn_weights_185_transpose_x_0"), val = bool(false)]; bool attn_weights_185_transpose_y_0 = const()[name = string("attn_weights_185_transpose_y_0"), val = bool(false)]; tensor attn_weights_185_cast_fp16 = matmul(transpose_x = attn_weights_185_transpose_x_0, transpose_y = attn_weights_185_transpose_y_0, x = var_3449_cast_fp16_1, y = var_3462_cast_fp16_1)[name = string("attn_weights_185_cast_fp16")]; fp16 _inversed_attn_weights_187_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_187_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_187_cast_fp16 = mul(x = attn_weights_185_cast_fp16, y = _inversed_attn_weights_187_y_0_to_fp16)[name = string("_inversed_attn_weights_187_cast_fp16")]; tensor attn_weights_189_cast_fp16 = add(x = _inversed_attn_weights_187_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_189_cast_fp16")]; int32 var_3481 = const()[name = string("op_3481"), val = int32(2)]; tensor attn_weights_191_cast_fp16 = softmax(axis = var_3481, x = attn_weights_189_cast_fp16)[name = string("attn_weights_191_cast_fp16")]; bool attn_output_67_transpose_x_1 = const()[name = string("attn_output_67_transpose_x_1"), val = bool(true)]; bool attn_output_67_transpose_y_1 = const()[name = string("attn_output_67_transpose_y_1"), val = bool(false)]; tensor attn_output_67_cast_fp16 = matmul(transpose_x = attn_output_67_transpose_x_1, transpose_y = attn_output_67_transpose_y_1, x = attn_weights_191_cast_fp16, y = var_3459_cast_fp16_1)[name = string("attn_output_67_cast_fp16")]; int32 var_3489 = const()[name = string("op_3489"), val = int32(1)]; bool attn_output_69_interleave_0 = const()[name = string("attn_output_69_interleave_0"), val = bool(false)]; tensor attn_output_69_cast_fp16 = concat(axis = var_3489, interleave = attn_output_69_interleave_0, values = (var_3475_cast_fp16, attn_output_67_cast_fp16))[name = string("attn_output_69_cast_fp16")]; tensor var_3493_perm_0 = const()[name = string("op_3493_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_3498 = const()[name = string("op_3498"), val = tensor([1, 1024, 1, 128])]; tensor var_3493_cast_fp16 = transpose(perm = var_3493_perm_0, x = attn_output_69_cast_fp16)[name = string("transpose_36")]; tensor x_203_cast_fp16 = reshape(shape = var_3498, x = var_3493_cast_fp16)[name = string("x_203_cast_fp16")]; string hidden_states_69_pad_type_0 = const()[name = string("hidden_states_69_pad_type_0"), val = string("valid")]; tensor hidden_states_69_strides_0 = const()[name = string("hidden_states_69_strides_0"), val = tensor([1, 1])]; tensor hidden_states_69_pad_0 = const()[name = string("hidden_states_69_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_69_dilations_0 = const()[name = string("hidden_states_69_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_69_groups_0 = const()[name = string("hidden_states_69_groups_0"), val = int32(1)]; tensor var_3505_to_fp16 = const()[name = string("op_3505_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(339852032)))]; tensor hidden_states_69_cast_fp16 = conv(dilations = hidden_states_69_dilations_0, groups = hidden_states_69_groups_0, pad = hidden_states_69_pad_0, pad_type = hidden_states_69_pad_type_0, strides = hidden_states_69_strides_0, weight = var_3505_to_fp16, x = x_203_cast_fp16)[name = string("hidden_states_69_cast_fp16")]; tensor x_205_cast_fp16 = add(x = x_197_cast_fp16, y = hidden_states_69_cast_fp16)[name = string("x_205_cast_fp16")]; int32 var_3517 = const()[name = string("op_3517"), val = int32(1)]; fp16 const_123_promoted_to_fp16 = const()[name = string("const_123_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_3520_cast_fp16 = mul(x = x_205_cast_fp16, y = const_123_promoted_to_fp16)[name = string("op_3520_cast_fp16")]; bool x_207_interleave_0 = const()[name = string("x_207_interleave_0"), val = bool(false)]; tensor x_207_cast_fp16 = concat(axis = var_3517, interleave = x_207_interleave_0, values = (x_205_cast_fp16, var_3520_cast_fp16))[name = string("x_207_cast_fp16")]; tensor out_139_axes_0 = const()[name = string("out_139_axes_0"), val = tensor([1])]; fp16 var_3530_to_fp16 = const()[name = string("op_3530_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_139_cast_fp16 = layer_norm(axes = out_139_axes_0, epsilon = var_3530_to_fp16, x = x_207_cast_fp16)[name = string("out_139_cast_fp16")]; tensor layer_layers_11_post_attention_layernorm_weight_to_fp16 = const()[name = string("layer_layers_11_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(341949248)))]; tensor out_141_cast_fp16 = mul(x = out_139_cast_fp16, y = layer_layers_11_post_attention_layernorm_weight_to_fp16)[name = string("out_141_cast_fp16")]; tensor var_3536_split_sizes_0 = const()[name = string("op_3536_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_3536_axis_0 = const()[name = string("op_3536_axis_0"), val = int32(1)]; tensor var_3536_cast_fp16_0, tensor var_3536_cast_fp16_1 = split(axis = var_3536_axis_0, split_sizes = var_3536_split_sizes_0, x = out_141_cast_fp16)[name = string("op_3536_cast_fp16")]; string input_23_pad_type_0 = const()[name = string("input_23_pad_type_0"), val = string("valid")]; tensor input_23_strides_0 = const()[name = string("input_23_strides_0"), val = tensor([1, 1])]; tensor input_23_pad_0 = const()[name = string("input_23_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_23_dilations_0 = const()[name = string("input_23_dilations_0"), val = tensor([1, 1])]; int32 input_23_groups_0 = const()[name = string("input_23_groups_0"), val = int32(1)]; tensor var_3541_to_fp16 = const()[name = string("op_3541_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(341953408)))]; tensor input_23_cast_fp16 = conv(dilations = input_23_dilations_0, groups = input_23_groups_0, pad = input_23_pad_0, pad_type = input_23_pad_type_0, strides = input_23_strides_0, weight = var_3541_to_fp16, x = var_3536_cast_fp16_0)[name = string("input_23_cast_fp16")]; tensor var_3552_cast_fp16 = silu(x = input_23_cast_fp16)[name = string("op_3552_cast_fp16")]; string var_3557_pad_type_0 = const()[name = string("op_3557_pad_type_0"), val = string("valid")]; tensor var_3557_strides_0 = const()[name = string("op_3557_strides_0"), val = tensor([1, 1])]; tensor var_3557_pad_0 = const()[name = string("op_3557_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_3557_dilations_0 = const()[name = string("op_3557_dilations_0"), val = tensor([1, 1])]; int32 var_3557_groups_0 = const()[name = string("op_3557_groups_0"), val = int32(1)]; tensor var_3540_to_fp16 = const()[name = string("op_3540_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(350342080)))]; tensor var_3557_cast_fp16 = conv(dilations = var_3557_dilations_0, groups = var_3557_groups_0, pad = var_3557_pad_0, pad_type = var_3557_pad_type_0, strides = var_3557_strides_0, weight = var_3540_to_fp16, x = var_3536_cast_fp16_0)[name = string("op_3557_cast_fp16")]; tensor x_213_cast_fp16 = mul(x = var_3552_cast_fp16, y = var_3557_cast_fp16)[name = string("x_213_cast_fp16")]; string hidden_states_71_pad_type_0 = const()[name = string("hidden_states_71_pad_type_0"), val = string("valid")]; tensor hidden_states_71_strides_0 = const()[name = string("hidden_states_71_strides_0"), val = tensor([1, 1])]; tensor hidden_states_71_pad_0 = const()[name = string("hidden_states_71_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_71_dilations_0 = const()[name = string("hidden_states_71_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_71_groups_0 = const()[name = string("hidden_states_71_groups_0"), val = int32(1)]; tensor var_3539_to_fp16 = const()[name = string("op_3539_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(358730752)))]; tensor hidden_states_71_cast_fp16 = conv(dilations = hidden_states_71_dilations_0, groups = hidden_states_71_groups_0, pad = hidden_states_71_pad_0, pad_type = hidden_states_71_pad_type_0, strides = hidden_states_71_strides_0, weight = var_3539_to_fp16, x = x_213_cast_fp16)[name = string("hidden_states_71_cast_fp16")]; tensor x_215_cast_fp16 = add(x = x_205_cast_fp16, y = hidden_states_71_cast_fp16)[name = string("x_215_cast_fp16")]; int32 var_3570 = const()[name = string("op_3570"), val = int32(1)]; fp16 const_124_promoted_to_fp16 = const()[name = string("const_124_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_3573_cast_fp16 = mul(x = x_215_cast_fp16, y = const_124_promoted_to_fp16)[name = string("op_3573_cast_fp16")]; bool x_217_interleave_0 = const()[name = string("x_217_interleave_0"), val = bool(false)]; tensor x_217_cast_fp16 = concat(axis = var_3570, interleave = x_217_interleave_0, values = (x_215_cast_fp16, var_3573_cast_fp16))[name = string("x_217_cast_fp16")]; tensor out_145_axes_0 = const()[name = string("out_145_axes_0"), val = tensor([1])]; fp16 var_3583_to_fp16 = const()[name = string("op_3583_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_145_cast_fp16 = layer_norm(axes = out_145_axes_0, epsilon = var_3583_to_fp16, x = x_217_cast_fp16)[name = string("out_145_cast_fp16")]; tensor layer_layers_12_input_layernorm_weight_to_fp16 = const()[name = string("layer_layers_12_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(367119424)))]; tensor out_147_cast_fp16 = mul(x = out_145_cast_fp16, y = layer_layers_12_input_layernorm_weight_to_fp16)[name = string("out_147_cast_fp16")]; tensor var_3589_split_sizes_0 = const()[name = string("op_3589_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_3589_axis_0 = const()[name = string("op_3589_axis_0"), val = int32(1)]; tensor var_3589_cast_fp16_0, tensor var_3589_cast_fp16_1 = split(axis = var_3589_axis_0, split_sizes = var_3589_split_sizes_0, x = out_147_cast_fp16)[name = string("op_3589_cast_fp16")]; string query_states_49_pad_type_0 = const()[name = string("query_states_49_pad_type_0"), val = string("valid")]; tensor query_states_49_strides_0 = const()[name = string("query_states_49_strides_0"), val = tensor([1, 1])]; tensor query_states_49_pad_0 = const()[name = string("query_states_49_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_states_49_dilations_0 = const()[name = string("query_states_49_dilations_0"), val = tensor([1, 1])]; int32 query_states_49_groups_0 = const()[name = string("query_states_49_groups_0"), val = int32(1)]; tensor var_3611_to_fp16 = const()[name = string("op_3611_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(367123584)))]; tensor query_states_49_cast_fp16 = conv(dilations = query_states_49_dilations_0, groups = query_states_49_groups_0, pad = query_states_49_pad_0, pad_type = query_states_49_pad_type_0, strides = query_states_49_strides_0, weight = var_3611_to_fp16, x = var_3589_cast_fp16_0)[name = string("query_states_49_cast_fp16")]; string key_states_49_pad_type_0 = const()[name = string("key_states_49_pad_type_0"), val = string("valid")]; tensor key_states_49_strides_0 = const()[name = string("key_states_49_strides_0"), val = tensor([1, 1])]; tensor key_states_49_pad_0 = const()[name = string("key_states_49_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_states_49_dilations_0 = const()[name = string("key_states_49_dilations_0"), val = tensor([1, 1])]; int32 key_states_49_groups_0 = const()[name = string("key_states_49_groups_0"), val = int32(1)]; tensor var_3622_to_fp16 = const()[name = string("op_3622_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(369220800)))]; tensor key_states_49_cast_fp16 = conv(dilations = key_states_49_dilations_0, groups = key_states_49_groups_0, pad = key_states_49_pad_0, pad_type = key_states_49_pad_type_0, strides = key_states_49_strides_0, weight = var_3622_to_fp16, x = var_3589_cast_fp16_0)[name = string("key_states_49_cast_fp16")]; string value_states_49_pad_type_0 = const()[name = string("value_states_49_pad_type_0"), val = string("valid")]; tensor value_states_49_strides_0 = const()[name = string("value_states_49_strides_0"), val = tensor([1, 1])]; tensor value_states_49_pad_0 = const()[name = string("value_states_49_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_states_49_dilations_0 = const()[name = string("value_states_49_dilations_0"), val = tensor([1, 1])]; int32 value_states_49_groups_0 = const()[name = string("value_states_49_groups_0"), val = int32(1)]; tensor var_3633_to_fp16 = const()[name = string("op_3633_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(369483008)))]; tensor value_states_49_cast_fp16 = conv(dilations = value_states_49_dilations_0, groups = value_states_49_groups_0, pad = value_states_49_pad_0, pad_type = value_states_49_pad_type_0, strides = value_states_49_strides_0, weight = var_3633_to_fp16, x = var_3589_cast_fp16_0)[name = string("value_states_49_cast_fp16")]; tensor var_3641 = const()[name = string("op_3641"), val = tensor([1, 16, 64, 128])]; tensor embed_49_cast_fp16 = reshape(shape = var_3641, x = query_states_49_cast_fp16)[name = string("embed_49_cast_fp16")]; tensor var_3645 = const()[name = string("op_3645"), val = tensor([1, 2, 64, 128])]; tensor var_3646_cast_fp16 = reshape(shape = var_3645, x = key_states_49_cast_fp16)[name = string("op_3646_cast_fp16")]; tensor embed_51_perm_0 = const()[name = string("embed_51_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_3652 = const()[name = string("op_3652"), val = tensor([1, 2, 64, 128])]; tensor var_3653_cast_fp16 = reshape(shape = var_3652, x = value_states_49_cast_fp16)[name = string("op_3653_cast_fp16")]; tensor value_states_51_perm_0 = const()[name = string("value_states_51_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_3657_cast_fp16 = mul(x = embed_49_cast_fp16, y = cos_1_cast_fp16)[name = string("op_3657_cast_fp16")]; tensor var_3658_split_sizes_0 = const()[name = string("op_3658_split_sizes_0"), val = tensor([32, 32])]; int32 var_3658_axis_0 = const()[name = string("op_3658_axis_0"), val = int32(-2)]; tensor var_3658_cast_fp16_0, tensor var_3658_cast_fp16_1 = split(axis = var_3658_axis_0, split_sizes = var_3658_split_sizes_0, x = embed_49_cast_fp16)[name = string("op_3658_cast_fp16")]; fp16 const_127_promoted_to_fp16 = const()[name = string("const_127_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_3660_cast_fp16 = mul(x = var_3658_cast_fp16_1, y = const_127_promoted_to_fp16)[name = string("op_3660_cast_fp16")]; int32 var_3662 = const()[name = string("op_3662"), val = int32(-2)]; bool var_3663_interleave_0 = const()[name = string("op_3663_interleave_0"), val = bool(false)]; tensor var_3663_cast_fp16 = concat(axis = var_3662, interleave = var_3663_interleave_0, values = (var_3660_cast_fp16, var_3658_cast_fp16_0))[name = string("op_3663_cast_fp16")]; tensor var_3664_cast_fp16 = mul(x = var_3663_cast_fp16, y = sin_1_cast_fp16)[name = string("op_3664_cast_fp16")]; tensor query_states_51_cast_fp16 = add(x = var_3657_cast_fp16, y = var_3664_cast_fp16)[name = string("query_states_51_cast_fp16")]; tensor embed_51_cast_fp16 = transpose(perm = embed_51_perm_0, x = var_3646_cast_fp16)[name = string("transpose_35")]; tensor var_3667_cast_fp16 = mul(x = embed_51_cast_fp16, y = cos_cast_fp16)[name = string("op_3667_cast_fp16")]; tensor var_3668_split_sizes_0 = const()[name = string("op_3668_split_sizes_0"), val = tensor([32, 32])]; int32 var_3668_axis_0 = const()[name = string("op_3668_axis_0"), val = int32(-1)]; tensor var_3668_cast_fp16_0, tensor var_3668_cast_fp16_1 = split(axis = var_3668_axis_0, split_sizes = var_3668_split_sizes_0, x = embed_51_cast_fp16)[name = string("op_3668_cast_fp16")]; fp16 const_128_promoted_to_fp16 = const()[name = string("const_128_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_3670_cast_fp16 = mul(x = var_3668_cast_fp16_1, y = const_128_promoted_to_fp16)[name = string("op_3670_cast_fp16")]; int32 var_3672 = const()[name = string("op_3672"), val = int32(-1)]; bool var_3673_interleave_0 = const()[name = string("op_3673_interleave_0"), val = bool(false)]; tensor var_3673_cast_fp16 = concat(axis = var_3672, interleave = var_3673_interleave_0, values = (var_3670_cast_fp16, var_3668_cast_fp16_0))[name = string("op_3673_cast_fp16")]; tensor var_3674_cast_fp16 = mul(x = var_3673_cast_fp16, y = sin_cast_fp16)[name = string("op_3674_cast_fp16")]; tensor key_states_51_cast_fp16 = add(x = var_3667_cast_fp16, y = var_3674_cast_fp16)[name = string("key_states_51_cast_fp16")]; tensor expand_dims_121 = const()[name = string("expand_dims_121"), val = tensor([12])]; tensor expand_dims_122 = const()[name = string("expand_dims_122"), val = tensor([0])]; tensor expand_dims_124 = const()[name = string("expand_dims_124"), val = tensor([13])]; tensor concat_99_values3_0 = const()[name = string("concat_99_values3_0"), val = tensor([0])]; int32 concat_99_axis_0 = const()[name = string("concat_99_axis_0"), val = int32(0)]; bool concat_99_interleave_0 = const()[name = string("concat_99_interleave_0"), val = bool(false)]; tensor concat_99 = concat(axis = concat_99_axis_0, interleave = concat_99_interleave_0, values = (expand_dims_121, expand_dims_122, position_id, concat_99_values3_0))[name = string("concat_99")]; tensor concat_100_values1_0 = const()[name = string("concat_100_values1_0"), val = tensor([0])]; tensor concat_100_values3_0 = const()[name = string("concat_100_values3_0"), val = tensor([0])]; int32 concat_100_axis_0 = const()[name = string("concat_100_axis_0"), val = int32(0)]; bool concat_100_interleave_0 = const()[name = string("concat_100_interleave_0"), val = bool(false)]; tensor concat_100 = concat(axis = concat_100_axis_0, interleave = concat_100_interleave_0, values = (expand_dims_124, concat_100_values1_0, var_426, concat_100_values3_0))[name = string("concat_100")]; tensor key_cache_internal_tensor_assign_13_stride_0 = const()[name = string("key_cache_internal_tensor_assign_13_stride_0"), val = tensor([1, 1, 1, 1])]; tensor key_cache_internal_tensor_assign_13_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_13_begin_mask_0"), val = tensor([false, false, false, true])]; tensor key_cache_internal_tensor_assign_13_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_13_end_mask_0"), val = tensor([false, true, false, true])]; tensor key_cache_internal_tensor_assign_13_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_13_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor key_cache_internal_tensor_assign_13_cast_fp16 = slice_update(begin = concat_99, begin_mask = key_cache_internal_tensor_assign_13_begin_mask_0, end = concat_100, end_mask = key_cache_internal_tensor_assign_13_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_13_squeeze_mask_0, stride = key_cache_internal_tensor_assign_13_stride_0, update = key_states_51_cast_fp16, x = coreml_update_state_70)[name = string("key_cache_internal_tensor_assign_13_cast_fp16")]; write_state(data = key_cache_internal_tensor_assign_13_cast_fp16, input = key_cache)[name = string("coreml_update_state_142_write_state")]; tensor coreml_update_state_72 = read_state(input = key_cache)[name = string("coreml_update_state_142")]; tensor value_cache_internal_tensor_assign_13_stride_0 = const()[name = string("value_cache_internal_tensor_assign_13_stride_0"), val = tensor([1, 1, 1, 1])]; tensor value_cache_internal_tensor_assign_13_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_13_begin_mask_0"), val = tensor([false, false, false, true])]; tensor value_cache_internal_tensor_assign_13_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_13_end_mask_0"), val = tensor([false, true, false, true])]; tensor value_cache_internal_tensor_assign_13_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_13_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor value_states_51_cast_fp16 = transpose(perm = value_states_51_perm_0, x = var_3653_cast_fp16)[name = string("transpose_34")]; tensor value_cache_internal_tensor_assign_13_cast_fp16 = slice_update(begin = concat_99, begin_mask = value_cache_internal_tensor_assign_13_begin_mask_0, end = concat_100, end_mask = value_cache_internal_tensor_assign_13_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_13_squeeze_mask_0, stride = value_cache_internal_tensor_assign_13_stride_0, update = value_states_51_cast_fp16, x = coreml_update_state_71)[name = string("value_cache_internal_tensor_assign_13_cast_fp16")]; write_state(data = value_cache_internal_tensor_assign_13_cast_fp16, input = value_cache)[name = string("coreml_update_state_143_write_state")]; tensor coreml_update_state_73 = read_state(input = value_cache)[name = string("coreml_update_state_143")]; tensor var_3717_begin_0 = const()[name = string("op_3717_begin_0"), val = tensor([12, 0, 0, 0])]; tensor var_3717_end_0 = const()[name = string("op_3717_end_0"), val = tensor([13, 2, 2048, 64])]; tensor var_3717_end_mask_0 = const()[name = string("op_3717_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_3717_cast_fp16 = slice_by_index(begin = var_3717_begin_0, end = var_3717_end_0, end_mask = var_3717_end_mask_0, x = coreml_update_state_72)[name = string("op_3717_cast_fp16")]; tensor tile_24 = const()[name = string("tile_24"), val = tensor([1, 1])]; int32 var_3720_axis_0 = const()[name = string("op_3720_axis_0"), val = int32(1)]; tensor var_3720_cast_fp16_0, tensor var_3720_cast_fp16_1 = split(axis = var_3720_axis_0, split_sizes = tile_24, x = var_3717_cast_fp16)[name = string("op_3720_cast_fp16")]; tensor var_3727_begin_0 = const()[name = string("op_3727_begin_0"), val = tensor([12, 0, 0, 0])]; tensor var_3727_end_0 = const()[name = string("op_3727_end_0"), val = tensor([13, 2, 2048, 64])]; tensor var_3727_end_mask_0 = const()[name = string("op_3727_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_3727_cast_fp16 = slice_by_index(begin = var_3727_begin_0, end = var_3727_end_0, end_mask = var_3727_end_mask_0, x = coreml_update_state_73)[name = string("op_3727_cast_fp16")]; tensor tile_25 = const()[name = string("tile_25"), val = tensor([1, 1])]; int32 var_3730_axis_0 = const()[name = string("op_3730_axis_0"), val = int32(1)]; tensor var_3730_cast_fp16_0, tensor var_3730_cast_fp16_1 = split(axis = var_3730_axis_0, split_sizes = tile_25, x = var_3727_cast_fp16)[name = string("op_3730_cast_fp16")]; tensor var_3733_split_sizes_0 = const()[name = string("op_3733_split_sizes_0"), val = tensor([8, 8])]; int32 var_3733_axis_0 = const()[name = string("op_3733_axis_0"), val = int32(1)]; tensor var_3733_cast_fp16_0, tensor var_3733_cast_fp16_1 = split(axis = var_3733_axis_0, split_sizes = var_3733_split_sizes_0, x = query_states_51_cast_fp16)[name = string("op_3733_cast_fp16")]; bool attn_weights_193_transpose_x_0 = const()[name = string("attn_weights_193_transpose_x_0"), val = bool(false)]; bool attn_weights_193_transpose_y_0 = const()[name = string("attn_weights_193_transpose_y_0"), val = bool(false)]; tensor attn_weights_193_cast_fp16 = matmul(transpose_x = attn_weights_193_transpose_x_0, transpose_y = attn_weights_193_transpose_y_0, x = var_3720_cast_fp16_0, y = var_3733_cast_fp16_0)[name = string("attn_weights_193_cast_fp16")]; fp16 _inversed_attn_weights_195_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_195_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_195_cast_fp16 = mul(x = attn_weights_193_cast_fp16, y = _inversed_attn_weights_195_y_0_to_fp16)[name = string("_inversed_attn_weights_195_cast_fp16")]; tensor attn_weights_197_cast_fp16 = add(x = _inversed_attn_weights_195_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_197_cast_fp16")]; int32 var_3740 = const()[name = string("op_3740"), val = int32(2)]; tensor attn_weights_199_cast_fp16 = softmax(axis = var_3740, x = attn_weights_197_cast_fp16)[name = string("attn_weights_199_cast_fp16")]; bool var_3746_transpose_x_1 = const()[name = string("op_3746_transpose_x_1"), val = bool(true)]; bool var_3746_transpose_y_1 = const()[name = string("op_3746_transpose_y_1"), val = bool(false)]; tensor var_3746_cast_fp16 = matmul(transpose_x = var_3746_transpose_x_1, transpose_y = var_3746_transpose_y_1, x = attn_weights_199_cast_fp16, y = var_3730_cast_fp16_0)[name = string("op_3746_cast_fp16")]; bool attn_weights_201_transpose_x_0 = const()[name = string("attn_weights_201_transpose_x_0"), val = bool(false)]; bool attn_weights_201_transpose_y_0 = const()[name = string("attn_weights_201_transpose_y_0"), val = bool(false)]; tensor attn_weights_201_cast_fp16 = matmul(transpose_x = attn_weights_201_transpose_x_0, transpose_y = attn_weights_201_transpose_y_0, x = var_3720_cast_fp16_1, y = var_3733_cast_fp16_1)[name = string("attn_weights_201_cast_fp16")]; fp16 _inversed_attn_weights_203_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_203_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_203_cast_fp16 = mul(x = attn_weights_201_cast_fp16, y = _inversed_attn_weights_203_y_0_to_fp16)[name = string("_inversed_attn_weights_203_cast_fp16")]; tensor attn_weights_205_cast_fp16 = add(x = _inversed_attn_weights_203_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_205_cast_fp16")]; int32 var_3752 = const()[name = string("op_3752"), val = int32(2)]; tensor attn_weights_207_cast_fp16 = softmax(axis = var_3752, x = attn_weights_205_cast_fp16)[name = string("attn_weights_207_cast_fp16")]; bool attn_output_73_transpose_x_1 = const()[name = string("attn_output_73_transpose_x_1"), val = bool(true)]; bool attn_output_73_transpose_y_1 = const()[name = string("attn_output_73_transpose_y_1"), val = bool(false)]; tensor attn_output_73_cast_fp16 = matmul(transpose_x = attn_output_73_transpose_x_1, transpose_y = attn_output_73_transpose_y_1, x = attn_weights_207_cast_fp16, y = var_3730_cast_fp16_1)[name = string("attn_output_73_cast_fp16")]; int32 var_3760 = const()[name = string("op_3760"), val = int32(1)]; bool attn_output_75_interleave_0 = const()[name = string("attn_output_75_interleave_0"), val = bool(false)]; tensor attn_output_75_cast_fp16 = concat(axis = var_3760, interleave = attn_output_75_interleave_0, values = (var_3746_cast_fp16, attn_output_73_cast_fp16))[name = string("attn_output_75_cast_fp16")]; tensor var_3764_perm_0 = const()[name = string("op_3764_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_3769 = const()[name = string("op_3769"), val = tensor([1, 1024, 1, 128])]; tensor var_3764_cast_fp16 = transpose(perm = var_3764_perm_0, x = attn_output_75_cast_fp16)[name = string("transpose_33")]; tensor x_221_cast_fp16 = reshape(shape = var_3769, x = var_3764_cast_fp16)[name = string("x_221_cast_fp16")]; string hidden_states_75_pad_type_0 = const()[name = string("hidden_states_75_pad_type_0"), val = string("valid")]; tensor hidden_states_75_strides_0 = const()[name = string("hidden_states_75_strides_0"), val = tensor([1, 1])]; tensor hidden_states_75_pad_0 = const()[name = string("hidden_states_75_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_75_dilations_0 = const()[name = string("hidden_states_75_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_75_groups_0 = const()[name = string("hidden_states_75_groups_0"), val = int32(1)]; tensor var_3776_to_fp16 = const()[name = string("op_3776_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(369745216)))]; tensor hidden_states_75_cast_fp16 = conv(dilations = hidden_states_75_dilations_0, groups = hidden_states_75_groups_0, pad = hidden_states_75_pad_0, pad_type = hidden_states_75_pad_type_0, strides = hidden_states_75_strides_0, weight = var_3776_to_fp16, x = x_221_cast_fp16)[name = string("hidden_states_75_cast_fp16")]; tensor x_223_cast_fp16 = add(x = x_215_cast_fp16, y = hidden_states_75_cast_fp16)[name = string("x_223_cast_fp16")]; int32 var_3788 = const()[name = string("op_3788"), val = int32(1)]; fp16 const_133_promoted_to_fp16 = const()[name = string("const_133_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_3791_cast_fp16 = mul(x = x_223_cast_fp16, y = const_133_promoted_to_fp16)[name = string("op_3791_cast_fp16")]; bool x_225_interleave_0 = const()[name = string("x_225_interleave_0"), val = bool(false)]; tensor x_225_cast_fp16 = concat(axis = var_3788, interleave = x_225_interleave_0, values = (x_223_cast_fp16, var_3791_cast_fp16))[name = string("x_225_cast_fp16")]; tensor out_151_axes_0 = const()[name = string("out_151_axes_0"), val = tensor([1])]; fp16 var_3801_to_fp16 = const()[name = string("op_3801_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_151_cast_fp16 = layer_norm(axes = out_151_axes_0, epsilon = var_3801_to_fp16, x = x_225_cast_fp16)[name = string("out_151_cast_fp16")]; tensor layer_layers_12_post_attention_layernorm_weight_to_fp16 = const()[name = string("layer_layers_12_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(371842432)))]; tensor out_153_cast_fp16 = mul(x = out_151_cast_fp16, y = layer_layers_12_post_attention_layernorm_weight_to_fp16)[name = string("out_153_cast_fp16")]; tensor var_3807_split_sizes_0 = const()[name = string("op_3807_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_3807_axis_0 = const()[name = string("op_3807_axis_0"), val = int32(1)]; tensor var_3807_cast_fp16_0, tensor var_3807_cast_fp16_1 = split(axis = var_3807_axis_0, split_sizes = var_3807_split_sizes_0, x = out_153_cast_fp16)[name = string("op_3807_cast_fp16")]; string input_25_pad_type_0 = const()[name = string("input_25_pad_type_0"), val = string("valid")]; tensor input_25_strides_0 = const()[name = string("input_25_strides_0"), val = tensor([1, 1])]; tensor input_25_pad_0 = const()[name = string("input_25_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_25_dilations_0 = const()[name = string("input_25_dilations_0"), val = tensor([1, 1])]; int32 input_25_groups_0 = const()[name = string("input_25_groups_0"), val = int32(1)]; tensor var_3812_to_fp16 = const()[name = string("op_3812_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(371846592)))]; tensor input_25_cast_fp16 = conv(dilations = input_25_dilations_0, groups = input_25_groups_0, pad = input_25_pad_0, pad_type = input_25_pad_type_0, strides = input_25_strides_0, weight = var_3812_to_fp16, x = var_3807_cast_fp16_0)[name = string("input_25_cast_fp16")]; tensor var_3823_cast_fp16 = silu(x = input_25_cast_fp16)[name = string("op_3823_cast_fp16")]; string var_3828_pad_type_0 = const()[name = string("op_3828_pad_type_0"), val = string("valid")]; tensor var_3828_strides_0 = const()[name = string("op_3828_strides_0"), val = tensor([1, 1])]; tensor var_3828_pad_0 = const()[name = string("op_3828_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_3828_dilations_0 = const()[name = string("op_3828_dilations_0"), val = tensor([1, 1])]; int32 var_3828_groups_0 = const()[name = string("op_3828_groups_0"), val = int32(1)]; tensor var_3811_to_fp16 = const()[name = string("op_3811_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(380235264)))]; tensor var_3828_cast_fp16 = conv(dilations = var_3828_dilations_0, groups = var_3828_groups_0, pad = var_3828_pad_0, pad_type = var_3828_pad_type_0, strides = var_3828_strides_0, weight = var_3811_to_fp16, x = var_3807_cast_fp16_0)[name = string("op_3828_cast_fp16")]; tensor x_231_cast_fp16 = mul(x = var_3823_cast_fp16, y = var_3828_cast_fp16)[name = string("x_231_cast_fp16")]; string hidden_states_77_pad_type_0 = const()[name = string("hidden_states_77_pad_type_0"), val = string("valid")]; tensor hidden_states_77_strides_0 = const()[name = string("hidden_states_77_strides_0"), val = tensor([1, 1])]; tensor hidden_states_77_pad_0 = const()[name = string("hidden_states_77_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_77_dilations_0 = const()[name = string("hidden_states_77_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_77_groups_0 = const()[name = string("hidden_states_77_groups_0"), val = int32(1)]; tensor var_3810_to_fp16 = const()[name = string("op_3810_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(388623936)))]; tensor hidden_states_77_cast_fp16 = conv(dilations = hidden_states_77_dilations_0, groups = hidden_states_77_groups_0, pad = hidden_states_77_pad_0, pad_type = hidden_states_77_pad_type_0, strides = hidden_states_77_strides_0, weight = var_3810_to_fp16, x = x_231_cast_fp16)[name = string("hidden_states_77_cast_fp16")]; tensor x_233_cast_fp16 = add(x = x_223_cast_fp16, y = hidden_states_77_cast_fp16)[name = string("x_233_cast_fp16")]; int32 var_3841 = const()[name = string("op_3841"), val = int32(1)]; fp16 const_134_promoted_to_fp16 = const()[name = string("const_134_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_3844_cast_fp16 = mul(x = x_233_cast_fp16, y = const_134_promoted_to_fp16)[name = string("op_3844_cast_fp16")]; bool x_235_interleave_0 = const()[name = string("x_235_interleave_0"), val = bool(false)]; tensor x_235_cast_fp16 = concat(axis = var_3841, interleave = x_235_interleave_0, values = (x_233_cast_fp16, var_3844_cast_fp16))[name = string("x_235_cast_fp16")]; tensor out_157_axes_0 = const()[name = string("out_157_axes_0"), val = tensor([1])]; fp16 var_3854_to_fp16 = const()[name = string("op_3854_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_157_cast_fp16 = layer_norm(axes = out_157_axes_0, epsilon = var_3854_to_fp16, x = x_235_cast_fp16)[name = string("out_157_cast_fp16")]; tensor layer_layers_13_input_layernorm_weight_to_fp16 = const()[name = string("layer_layers_13_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(397012608)))]; tensor out_159_cast_fp16 = mul(x = out_157_cast_fp16, y = layer_layers_13_input_layernorm_weight_to_fp16)[name = string("out_159_cast_fp16")]; tensor var_3860_split_sizes_0 = const()[name = string("op_3860_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_3860_axis_0 = const()[name = string("op_3860_axis_0"), val = int32(1)]; tensor var_3860_cast_fp16_0, tensor var_3860_cast_fp16_1 = split(axis = var_3860_axis_0, split_sizes = var_3860_split_sizes_0, x = out_159_cast_fp16)[name = string("op_3860_cast_fp16")]; string query_states_53_pad_type_0 = const()[name = string("query_states_53_pad_type_0"), val = string("valid")]; tensor query_states_53_strides_0 = const()[name = string("query_states_53_strides_0"), val = tensor([1, 1])]; tensor query_states_53_pad_0 = const()[name = string("query_states_53_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_states_53_dilations_0 = const()[name = string("query_states_53_dilations_0"), val = tensor([1, 1])]; int32 query_states_53_groups_0 = const()[name = string("query_states_53_groups_0"), val = int32(1)]; tensor var_3882_to_fp16 = const()[name = string("op_3882_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(397016768)))]; tensor query_states_53_cast_fp16 = conv(dilations = query_states_53_dilations_0, groups = query_states_53_groups_0, pad = query_states_53_pad_0, pad_type = query_states_53_pad_type_0, strides = query_states_53_strides_0, weight = var_3882_to_fp16, x = var_3860_cast_fp16_0)[name = string("query_states_53_cast_fp16")]; string key_states_53_pad_type_0 = const()[name = string("key_states_53_pad_type_0"), val = string("valid")]; tensor key_states_53_strides_0 = const()[name = string("key_states_53_strides_0"), val = tensor([1, 1])]; tensor key_states_53_pad_0 = const()[name = string("key_states_53_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_states_53_dilations_0 = const()[name = string("key_states_53_dilations_0"), val = tensor([1, 1])]; int32 key_states_53_groups_0 = const()[name = string("key_states_53_groups_0"), val = int32(1)]; tensor var_3893_to_fp16 = const()[name = string("op_3893_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(399113984)))]; tensor key_states_53_cast_fp16 = conv(dilations = key_states_53_dilations_0, groups = key_states_53_groups_0, pad = key_states_53_pad_0, pad_type = key_states_53_pad_type_0, strides = key_states_53_strides_0, weight = var_3893_to_fp16, x = var_3860_cast_fp16_0)[name = string("key_states_53_cast_fp16")]; string value_states_53_pad_type_0 = const()[name = string("value_states_53_pad_type_0"), val = string("valid")]; tensor value_states_53_strides_0 = const()[name = string("value_states_53_strides_0"), val = tensor([1, 1])]; tensor value_states_53_pad_0 = const()[name = string("value_states_53_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_states_53_dilations_0 = const()[name = string("value_states_53_dilations_0"), val = tensor([1, 1])]; int32 value_states_53_groups_0 = const()[name = string("value_states_53_groups_0"), val = int32(1)]; tensor var_3904_to_fp16 = const()[name = string("op_3904_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(399376192)))]; tensor value_states_53_cast_fp16 = conv(dilations = value_states_53_dilations_0, groups = value_states_53_groups_0, pad = value_states_53_pad_0, pad_type = value_states_53_pad_type_0, strides = value_states_53_strides_0, weight = var_3904_to_fp16, x = var_3860_cast_fp16_0)[name = string("value_states_53_cast_fp16")]; tensor var_3912 = const()[name = string("op_3912"), val = tensor([1, 16, 64, 128])]; tensor embed_53_cast_fp16 = reshape(shape = var_3912, x = query_states_53_cast_fp16)[name = string("embed_53_cast_fp16")]; tensor var_3916 = const()[name = string("op_3916"), val = tensor([1, 2, 64, 128])]; tensor var_3917_cast_fp16 = reshape(shape = var_3916, x = key_states_53_cast_fp16)[name = string("op_3917_cast_fp16")]; tensor embed_55_perm_0 = const()[name = string("embed_55_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_3923 = const()[name = string("op_3923"), val = tensor([1, 2, 64, 128])]; tensor var_3924_cast_fp16 = reshape(shape = var_3923, x = value_states_53_cast_fp16)[name = string("op_3924_cast_fp16")]; tensor value_states_55_perm_0 = const()[name = string("value_states_55_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_3928_cast_fp16 = mul(x = embed_53_cast_fp16, y = cos_1_cast_fp16)[name = string("op_3928_cast_fp16")]; tensor var_3929_split_sizes_0 = const()[name = string("op_3929_split_sizes_0"), val = tensor([32, 32])]; int32 var_3929_axis_0 = const()[name = string("op_3929_axis_0"), val = int32(-2)]; tensor var_3929_cast_fp16_0, tensor var_3929_cast_fp16_1 = split(axis = var_3929_axis_0, split_sizes = var_3929_split_sizes_0, x = embed_53_cast_fp16)[name = string("op_3929_cast_fp16")]; fp16 const_137_promoted_to_fp16 = const()[name = string("const_137_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_3931_cast_fp16 = mul(x = var_3929_cast_fp16_1, y = const_137_promoted_to_fp16)[name = string("op_3931_cast_fp16")]; int32 var_3933 = const()[name = string("op_3933"), val = int32(-2)]; bool var_3934_interleave_0 = const()[name = string("op_3934_interleave_0"), val = bool(false)]; tensor var_3934_cast_fp16 = concat(axis = var_3933, interleave = var_3934_interleave_0, values = (var_3931_cast_fp16, var_3929_cast_fp16_0))[name = string("op_3934_cast_fp16")]; tensor var_3935_cast_fp16 = mul(x = var_3934_cast_fp16, y = sin_1_cast_fp16)[name = string("op_3935_cast_fp16")]; tensor query_states_55_cast_fp16 = add(x = var_3928_cast_fp16, y = var_3935_cast_fp16)[name = string("query_states_55_cast_fp16")]; tensor embed_55_cast_fp16 = transpose(perm = embed_55_perm_0, x = var_3917_cast_fp16)[name = string("transpose_32")]; tensor var_3938_cast_fp16 = mul(x = embed_55_cast_fp16, y = cos_cast_fp16)[name = string("op_3938_cast_fp16")]; tensor var_3939_split_sizes_0 = const()[name = string("op_3939_split_sizes_0"), val = tensor([32, 32])]; int32 var_3939_axis_0 = const()[name = string("op_3939_axis_0"), val = int32(-1)]; tensor var_3939_cast_fp16_0, tensor var_3939_cast_fp16_1 = split(axis = var_3939_axis_0, split_sizes = var_3939_split_sizes_0, x = embed_55_cast_fp16)[name = string("op_3939_cast_fp16")]; fp16 const_138_promoted_to_fp16 = const()[name = string("const_138_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_3941_cast_fp16 = mul(x = var_3939_cast_fp16_1, y = const_138_promoted_to_fp16)[name = string("op_3941_cast_fp16")]; int32 var_3943 = const()[name = string("op_3943"), val = int32(-1)]; bool var_3944_interleave_0 = const()[name = string("op_3944_interleave_0"), val = bool(false)]; tensor var_3944_cast_fp16 = concat(axis = var_3943, interleave = var_3944_interleave_0, values = (var_3941_cast_fp16, var_3939_cast_fp16_0))[name = string("op_3944_cast_fp16")]; tensor var_3945_cast_fp16 = mul(x = var_3944_cast_fp16, y = sin_cast_fp16)[name = string("op_3945_cast_fp16")]; tensor key_states_55_cast_fp16 = add(x = var_3938_cast_fp16, y = var_3945_cast_fp16)[name = string("key_states_55_cast_fp16")]; tensor expand_dims_131 = const()[name = string("expand_dims_131"), val = tensor([13])]; tensor expand_dims_132 = const()[name = string("expand_dims_132"), val = tensor([0])]; tensor expand_dims_134 = const()[name = string("expand_dims_134"), val = tensor([14])]; tensor concat_107_values3_0 = const()[name = string("concat_107_values3_0"), val = tensor([0])]; int32 concat_107_axis_0 = const()[name = string("concat_107_axis_0"), val = int32(0)]; bool concat_107_interleave_0 = const()[name = string("concat_107_interleave_0"), val = bool(false)]; tensor concat_107 = concat(axis = concat_107_axis_0, interleave = concat_107_interleave_0, values = (expand_dims_131, expand_dims_132, position_id, concat_107_values3_0))[name = string("concat_107")]; tensor concat_108_values1_0 = const()[name = string("concat_108_values1_0"), val = tensor([0])]; tensor concat_108_values3_0 = const()[name = string("concat_108_values3_0"), val = tensor([0])]; int32 concat_108_axis_0 = const()[name = string("concat_108_axis_0"), val = int32(0)]; bool concat_108_interleave_0 = const()[name = string("concat_108_interleave_0"), val = bool(false)]; tensor concat_108 = concat(axis = concat_108_axis_0, interleave = concat_108_interleave_0, values = (expand_dims_134, concat_108_values1_0, var_426, concat_108_values3_0))[name = string("concat_108")]; tensor key_cache_internal_tensor_assign_14_stride_0 = const()[name = string("key_cache_internal_tensor_assign_14_stride_0"), val = tensor([1, 1, 1, 1])]; tensor key_cache_internal_tensor_assign_14_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_14_begin_mask_0"), val = tensor([false, false, false, true])]; tensor key_cache_internal_tensor_assign_14_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_14_end_mask_0"), val = tensor([false, true, false, true])]; tensor key_cache_internal_tensor_assign_14_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_14_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor key_cache_internal_tensor_assign_14_cast_fp16 = slice_update(begin = concat_107, begin_mask = key_cache_internal_tensor_assign_14_begin_mask_0, end = concat_108, end_mask = key_cache_internal_tensor_assign_14_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_14_squeeze_mask_0, stride = key_cache_internal_tensor_assign_14_stride_0, update = key_states_55_cast_fp16, x = coreml_update_state_72)[name = string("key_cache_internal_tensor_assign_14_cast_fp16")]; write_state(data = key_cache_internal_tensor_assign_14_cast_fp16, input = key_cache)[name = string("coreml_update_state_144_write_state")]; tensor coreml_update_state_74 = read_state(input = key_cache)[name = string("coreml_update_state_144")]; tensor value_cache_internal_tensor_assign_14_stride_0 = const()[name = string("value_cache_internal_tensor_assign_14_stride_0"), val = tensor([1, 1, 1, 1])]; tensor value_cache_internal_tensor_assign_14_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_14_begin_mask_0"), val = tensor([false, false, false, true])]; tensor value_cache_internal_tensor_assign_14_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_14_end_mask_0"), val = tensor([false, true, false, true])]; tensor value_cache_internal_tensor_assign_14_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_14_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor value_states_55_cast_fp16 = transpose(perm = value_states_55_perm_0, x = var_3924_cast_fp16)[name = string("transpose_31")]; tensor value_cache_internal_tensor_assign_14_cast_fp16 = slice_update(begin = concat_107, begin_mask = value_cache_internal_tensor_assign_14_begin_mask_0, end = concat_108, end_mask = value_cache_internal_tensor_assign_14_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_14_squeeze_mask_0, stride = value_cache_internal_tensor_assign_14_stride_0, update = value_states_55_cast_fp16, x = coreml_update_state_73)[name = string("value_cache_internal_tensor_assign_14_cast_fp16")]; write_state(data = value_cache_internal_tensor_assign_14_cast_fp16, input = value_cache)[name = string("coreml_update_state_145_write_state")]; tensor coreml_update_state_75 = read_state(input = value_cache)[name = string("coreml_update_state_145")]; tensor var_3988_begin_0 = const()[name = string("op_3988_begin_0"), val = tensor([13, 0, 0, 0])]; tensor var_3988_end_0 = const()[name = string("op_3988_end_0"), val = tensor([14, 2, 2048, 64])]; tensor var_3988_end_mask_0 = const()[name = string("op_3988_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_3988_cast_fp16 = slice_by_index(begin = var_3988_begin_0, end = var_3988_end_0, end_mask = var_3988_end_mask_0, x = coreml_update_state_74)[name = string("op_3988_cast_fp16")]; tensor tile_26 = const()[name = string("tile_26"), val = tensor([1, 1])]; int32 var_3991_axis_0 = const()[name = string("op_3991_axis_0"), val = int32(1)]; tensor var_3991_cast_fp16_0, tensor var_3991_cast_fp16_1 = split(axis = var_3991_axis_0, split_sizes = tile_26, x = var_3988_cast_fp16)[name = string("op_3991_cast_fp16")]; tensor var_3998_begin_0 = const()[name = string("op_3998_begin_0"), val = tensor([13, 0, 0, 0])]; tensor var_3998_end_0 = const()[name = string("op_3998_end_0"), val = tensor([14, 2, 2048, 64])]; tensor var_3998_end_mask_0 = const()[name = string("op_3998_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_3998_cast_fp16 = slice_by_index(begin = var_3998_begin_0, end = var_3998_end_0, end_mask = var_3998_end_mask_0, x = coreml_update_state_75)[name = string("op_3998_cast_fp16")]; tensor tile_27 = const()[name = string("tile_27"), val = tensor([1, 1])]; int32 var_4001_axis_0 = const()[name = string("op_4001_axis_0"), val = int32(1)]; tensor var_4001_cast_fp16_0, tensor var_4001_cast_fp16_1 = split(axis = var_4001_axis_0, split_sizes = tile_27, x = var_3998_cast_fp16)[name = string("op_4001_cast_fp16")]; tensor var_4004_split_sizes_0 = const()[name = string("op_4004_split_sizes_0"), val = tensor([8, 8])]; int32 var_4004_axis_0 = const()[name = string("op_4004_axis_0"), val = int32(1)]; tensor var_4004_cast_fp16_0, tensor var_4004_cast_fp16_1 = split(axis = var_4004_axis_0, split_sizes = var_4004_split_sizes_0, x = query_states_55_cast_fp16)[name = string("op_4004_cast_fp16")]; bool attn_weights_209_transpose_x_0 = const()[name = string("attn_weights_209_transpose_x_0"), val = bool(false)]; bool attn_weights_209_transpose_y_0 = const()[name = string("attn_weights_209_transpose_y_0"), val = bool(false)]; tensor attn_weights_209_cast_fp16 = matmul(transpose_x = attn_weights_209_transpose_x_0, transpose_y = attn_weights_209_transpose_y_0, x = var_3991_cast_fp16_0, y = var_4004_cast_fp16_0)[name = string("attn_weights_209_cast_fp16")]; fp16 _inversed_attn_weights_211_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_211_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_211_cast_fp16 = mul(x = attn_weights_209_cast_fp16, y = _inversed_attn_weights_211_y_0_to_fp16)[name = string("_inversed_attn_weights_211_cast_fp16")]; tensor attn_weights_213_cast_fp16 = add(x = _inversed_attn_weights_211_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_213_cast_fp16")]; int32 var_4011 = const()[name = string("op_4011"), val = int32(2)]; tensor attn_weights_215_cast_fp16 = softmax(axis = var_4011, x = attn_weights_213_cast_fp16)[name = string("attn_weights_215_cast_fp16")]; bool var_4017_transpose_x_1 = const()[name = string("op_4017_transpose_x_1"), val = bool(true)]; bool var_4017_transpose_y_1 = const()[name = string("op_4017_transpose_y_1"), val = bool(false)]; tensor var_4017_cast_fp16 = matmul(transpose_x = var_4017_transpose_x_1, transpose_y = var_4017_transpose_y_1, x = attn_weights_215_cast_fp16, y = var_4001_cast_fp16_0)[name = string("op_4017_cast_fp16")]; bool attn_weights_217_transpose_x_0 = const()[name = string("attn_weights_217_transpose_x_0"), val = bool(false)]; bool attn_weights_217_transpose_y_0 = const()[name = string("attn_weights_217_transpose_y_0"), val = bool(false)]; tensor attn_weights_217_cast_fp16 = matmul(transpose_x = attn_weights_217_transpose_x_0, transpose_y = attn_weights_217_transpose_y_0, x = var_3991_cast_fp16_1, y = var_4004_cast_fp16_1)[name = string("attn_weights_217_cast_fp16")]; fp16 _inversed_attn_weights_219_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_219_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_219_cast_fp16 = mul(x = attn_weights_217_cast_fp16, y = _inversed_attn_weights_219_y_0_to_fp16)[name = string("_inversed_attn_weights_219_cast_fp16")]; tensor attn_weights_221_cast_fp16 = add(x = _inversed_attn_weights_219_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_221_cast_fp16")]; int32 var_4023 = const()[name = string("op_4023"), val = int32(2)]; tensor attn_weights_223_cast_fp16 = softmax(axis = var_4023, x = attn_weights_221_cast_fp16)[name = string("attn_weights_223_cast_fp16")]; bool attn_output_79_transpose_x_1 = const()[name = string("attn_output_79_transpose_x_1"), val = bool(true)]; bool attn_output_79_transpose_y_1 = const()[name = string("attn_output_79_transpose_y_1"), val = bool(false)]; tensor attn_output_79_cast_fp16 = matmul(transpose_x = attn_output_79_transpose_x_1, transpose_y = attn_output_79_transpose_y_1, x = attn_weights_223_cast_fp16, y = var_4001_cast_fp16_1)[name = string("attn_output_79_cast_fp16")]; int32 var_4031 = const()[name = string("op_4031"), val = int32(1)]; bool attn_output_81_interleave_0 = const()[name = string("attn_output_81_interleave_0"), val = bool(false)]; tensor attn_output_81_cast_fp16 = concat(axis = var_4031, interleave = attn_output_81_interleave_0, values = (var_4017_cast_fp16, attn_output_79_cast_fp16))[name = string("attn_output_81_cast_fp16")]; tensor var_4035_perm_0 = const()[name = string("op_4035_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_4040 = const()[name = string("op_4040"), val = tensor([1, 1024, 1, 128])]; tensor var_4035_cast_fp16 = transpose(perm = var_4035_perm_0, x = attn_output_81_cast_fp16)[name = string("transpose_30")]; tensor x_239_cast_fp16 = reshape(shape = var_4040, x = var_4035_cast_fp16)[name = string("x_239_cast_fp16")]; string hidden_states_81_pad_type_0 = const()[name = string("hidden_states_81_pad_type_0"), val = string("valid")]; tensor hidden_states_81_strides_0 = const()[name = string("hidden_states_81_strides_0"), val = tensor([1, 1])]; tensor hidden_states_81_pad_0 = const()[name = string("hidden_states_81_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_81_dilations_0 = const()[name = string("hidden_states_81_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_81_groups_0 = const()[name = string("hidden_states_81_groups_0"), val = int32(1)]; tensor var_4047_to_fp16 = const()[name = string("op_4047_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(399638400)))]; tensor hidden_states_81_cast_fp16 = conv(dilations = hidden_states_81_dilations_0, groups = hidden_states_81_groups_0, pad = hidden_states_81_pad_0, pad_type = hidden_states_81_pad_type_0, strides = hidden_states_81_strides_0, weight = var_4047_to_fp16, x = x_239_cast_fp16)[name = string("hidden_states_81_cast_fp16")]; tensor x_241_cast_fp16 = add(x = x_233_cast_fp16, y = hidden_states_81_cast_fp16)[name = string("x_241_cast_fp16")]; int32 var_4059 = const()[name = string("op_4059"), val = int32(1)]; fp16 const_143_promoted_to_fp16 = const()[name = string("const_143_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_4062_cast_fp16 = mul(x = x_241_cast_fp16, y = const_143_promoted_to_fp16)[name = string("op_4062_cast_fp16")]; bool x_243_interleave_0 = const()[name = string("x_243_interleave_0"), val = bool(false)]; tensor x_243_cast_fp16 = concat(axis = var_4059, interleave = x_243_interleave_0, values = (x_241_cast_fp16, var_4062_cast_fp16))[name = string("x_243_cast_fp16")]; tensor out_163_axes_0 = const()[name = string("out_163_axes_0"), val = tensor([1])]; fp16 var_4072_to_fp16 = const()[name = string("op_4072_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_163_cast_fp16 = layer_norm(axes = out_163_axes_0, epsilon = var_4072_to_fp16, x = x_243_cast_fp16)[name = string("out_163_cast_fp16")]; tensor layer_layers_13_post_attention_layernorm_weight_to_fp16 = const()[name = string("layer_layers_13_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(401735616)))]; tensor out_165_cast_fp16 = mul(x = out_163_cast_fp16, y = layer_layers_13_post_attention_layernorm_weight_to_fp16)[name = string("out_165_cast_fp16")]; tensor var_4078_split_sizes_0 = const()[name = string("op_4078_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_4078_axis_0 = const()[name = string("op_4078_axis_0"), val = int32(1)]; tensor var_4078_cast_fp16_0, tensor var_4078_cast_fp16_1 = split(axis = var_4078_axis_0, split_sizes = var_4078_split_sizes_0, x = out_165_cast_fp16)[name = string("op_4078_cast_fp16")]; string input_27_pad_type_0 = const()[name = string("input_27_pad_type_0"), val = string("valid")]; tensor input_27_strides_0 = const()[name = string("input_27_strides_0"), val = tensor([1, 1])]; tensor input_27_pad_0 = const()[name = string("input_27_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_27_dilations_0 = const()[name = string("input_27_dilations_0"), val = tensor([1, 1])]; int32 input_27_groups_0 = const()[name = string("input_27_groups_0"), val = int32(1)]; tensor var_4083_to_fp16 = const()[name = string("op_4083_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(401739776)))]; tensor input_27_cast_fp16 = conv(dilations = input_27_dilations_0, groups = input_27_groups_0, pad = input_27_pad_0, pad_type = input_27_pad_type_0, strides = input_27_strides_0, weight = var_4083_to_fp16, x = var_4078_cast_fp16_0)[name = string("input_27_cast_fp16")]; tensor var_4094_cast_fp16 = silu(x = input_27_cast_fp16)[name = string("op_4094_cast_fp16")]; string var_4099_pad_type_0 = const()[name = string("op_4099_pad_type_0"), val = string("valid")]; tensor var_4099_strides_0 = const()[name = string("op_4099_strides_0"), val = tensor([1, 1])]; tensor var_4099_pad_0 = const()[name = string("op_4099_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_4099_dilations_0 = const()[name = string("op_4099_dilations_0"), val = tensor([1, 1])]; int32 var_4099_groups_0 = const()[name = string("op_4099_groups_0"), val = int32(1)]; tensor var_4082_to_fp16 = const()[name = string("op_4082_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(410128448)))]; tensor var_4099_cast_fp16 = conv(dilations = var_4099_dilations_0, groups = var_4099_groups_0, pad = var_4099_pad_0, pad_type = var_4099_pad_type_0, strides = var_4099_strides_0, weight = var_4082_to_fp16, x = var_4078_cast_fp16_0)[name = string("op_4099_cast_fp16")]; tensor x_249_cast_fp16 = mul(x = var_4094_cast_fp16, y = var_4099_cast_fp16)[name = string("x_249_cast_fp16")]; string hidden_states_83_pad_type_0 = const()[name = string("hidden_states_83_pad_type_0"), val = string("valid")]; tensor hidden_states_83_strides_0 = const()[name = string("hidden_states_83_strides_0"), val = tensor([1, 1])]; tensor hidden_states_83_pad_0 = const()[name = string("hidden_states_83_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_83_dilations_0 = const()[name = string("hidden_states_83_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_83_groups_0 = const()[name = string("hidden_states_83_groups_0"), val = int32(1)]; tensor var_4081_to_fp16 = const()[name = string("op_4081_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(418517120)))]; tensor hidden_states_83_cast_fp16 = conv(dilations = hidden_states_83_dilations_0, groups = hidden_states_83_groups_0, pad = hidden_states_83_pad_0, pad_type = hidden_states_83_pad_type_0, strides = hidden_states_83_strides_0, weight = var_4081_to_fp16, x = x_249_cast_fp16)[name = string("hidden_states_83_cast_fp16")]; tensor x_251_cast_fp16 = add(x = x_241_cast_fp16, y = hidden_states_83_cast_fp16)[name = string("x_251_cast_fp16")]; int32 var_4112 = const()[name = string("op_4112"), val = int32(1)]; fp16 const_144_promoted_to_fp16 = const()[name = string("const_144_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_4115_cast_fp16 = mul(x = x_251_cast_fp16, y = const_144_promoted_to_fp16)[name = string("op_4115_cast_fp16")]; bool x_253_interleave_0 = const()[name = string("x_253_interleave_0"), val = bool(false)]; tensor x_253_cast_fp16 = concat(axis = var_4112, interleave = x_253_interleave_0, values = (x_251_cast_fp16, var_4115_cast_fp16))[name = string("x_253_cast_fp16")]; tensor out_169_axes_0 = const()[name = string("out_169_axes_0"), val = tensor([1])]; fp16 var_4125_to_fp16 = const()[name = string("op_4125_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_169_cast_fp16 = layer_norm(axes = out_169_axes_0, epsilon = var_4125_to_fp16, x = x_253_cast_fp16)[name = string("out_169_cast_fp16")]; tensor layer_layers_14_input_layernorm_weight_to_fp16 = const()[name = string("layer_layers_14_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(426905792)))]; tensor out_171_cast_fp16 = mul(x = out_169_cast_fp16, y = layer_layers_14_input_layernorm_weight_to_fp16)[name = string("out_171_cast_fp16")]; tensor var_4131_split_sizes_0 = const()[name = string("op_4131_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_4131_axis_0 = const()[name = string("op_4131_axis_0"), val = int32(1)]; tensor var_4131_cast_fp16_0, tensor var_4131_cast_fp16_1 = split(axis = var_4131_axis_0, split_sizes = var_4131_split_sizes_0, x = out_171_cast_fp16)[name = string("op_4131_cast_fp16")]; string query_states_57_pad_type_0 = const()[name = string("query_states_57_pad_type_0"), val = string("valid")]; tensor query_states_57_strides_0 = const()[name = string("query_states_57_strides_0"), val = tensor([1, 1])]; tensor query_states_57_pad_0 = const()[name = string("query_states_57_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_states_57_dilations_0 = const()[name = string("query_states_57_dilations_0"), val = tensor([1, 1])]; int32 query_states_57_groups_0 = const()[name = string("query_states_57_groups_0"), val = int32(1)]; tensor var_4153_to_fp16 = const()[name = string("op_4153_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(426909952)))]; tensor query_states_57_cast_fp16 = conv(dilations = query_states_57_dilations_0, groups = query_states_57_groups_0, pad = query_states_57_pad_0, pad_type = query_states_57_pad_type_0, strides = query_states_57_strides_0, weight = var_4153_to_fp16, x = var_4131_cast_fp16_0)[name = string("query_states_57_cast_fp16")]; string key_states_57_pad_type_0 = const()[name = string("key_states_57_pad_type_0"), val = string("valid")]; tensor key_states_57_strides_0 = const()[name = string("key_states_57_strides_0"), val = tensor([1, 1])]; tensor key_states_57_pad_0 = const()[name = string("key_states_57_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_states_57_dilations_0 = const()[name = string("key_states_57_dilations_0"), val = tensor([1, 1])]; int32 key_states_57_groups_0 = const()[name = string("key_states_57_groups_0"), val = int32(1)]; tensor var_4164_to_fp16 = const()[name = string("op_4164_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(429007168)))]; tensor key_states_57_cast_fp16 = conv(dilations = key_states_57_dilations_0, groups = key_states_57_groups_0, pad = key_states_57_pad_0, pad_type = key_states_57_pad_type_0, strides = key_states_57_strides_0, weight = var_4164_to_fp16, x = var_4131_cast_fp16_0)[name = string("key_states_57_cast_fp16")]; string value_states_57_pad_type_0 = const()[name = string("value_states_57_pad_type_0"), val = string("valid")]; tensor value_states_57_strides_0 = const()[name = string("value_states_57_strides_0"), val = tensor([1, 1])]; tensor value_states_57_pad_0 = const()[name = string("value_states_57_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_states_57_dilations_0 = const()[name = string("value_states_57_dilations_0"), val = tensor([1, 1])]; int32 value_states_57_groups_0 = const()[name = string("value_states_57_groups_0"), val = int32(1)]; tensor var_4175_to_fp16 = const()[name = string("op_4175_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(429269376)))]; tensor value_states_57_cast_fp16 = conv(dilations = value_states_57_dilations_0, groups = value_states_57_groups_0, pad = value_states_57_pad_0, pad_type = value_states_57_pad_type_0, strides = value_states_57_strides_0, weight = var_4175_to_fp16, x = var_4131_cast_fp16_0)[name = string("value_states_57_cast_fp16")]; tensor var_4183 = const()[name = string("op_4183"), val = tensor([1, 16, 64, 128])]; tensor embed_57_cast_fp16 = reshape(shape = var_4183, x = query_states_57_cast_fp16)[name = string("embed_57_cast_fp16")]; tensor var_4187 = const()[name = string("op_4187"), val = tensor([1, 2, 64, 128])]; tensor var_4188_cast_fp16 = reshape(shape = var_4187, x = key_states_57_cast_fp16)[name = string("op_4188_cast_fp16")]; tensor embed_59_perm_0 = const()[name = string("embed_59_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_4194 = const()[name = string("op_4194"), val = tensor([1, 2, 64, 128])]; tensor var_4195_cast_fp16 = reshape(shape = var_4194, x = value_states_57_cast_fp16)[name = string("op_4195_cast_fp16")]; tensor value_states_59_perm_0 = const()[name = string("value_states_59_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_4199_cast_fp16 = mul(x = embed_57_cast_fp16, y = cos_1_cast_fp16)[name = string("op_4199_cast_fp16")]; tensor var_4200_split_sizes_0 = const()[name = string("op_4200_split_sizes_0"), val = tensor([32, 32])]; int32 var_4200_axis_0 = const()[name = string("op_4200_axis_0"), val = int32(-2)]; tensor var_4200_cast_fp16_0, tensor var_4200_cast_fp16_1 = split(axis = var_4200_axis_0, split_sizes = var_4200_split_sizes_0, x = embed_57_cast_fp16)[name = string("op_4200_cast_fp16")]; fp16 const_147_promoted_to_fp16 = const()[name = string("const_147_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_4202_cast_fp16 = mul(x = var_4200_cast_fp16_1, y = const_147_promoted_to_fp16)[name = string("op_4202_cast_fp16")]; int32 var_4204 = const()[name = string("op_4204"), val = int32(-2)]; bool var_4205_interleave_0 = const()[name = string("op_4205_interleave_0"), val = bool(false)]; tensor var_4205_cast_fp16 = concat(axis = var_4204, interleave = var_4205_interleave_0, values = (var_4202_cast_fp16, var_4200_cast_fp16_0))[name = string("op_4205_cast_fp16")]; tensor var_4206_cast_fp16 = mul(x = var_4205_cast_fp16, y = sin_1_cast_fp16)[name = string("op_4206_cast_fp16")]; tensor query_states_59_cast_fp16 = add(x = var_4199_cast_fp16, y = var_4206_cast_fp16)[name = string("query_states_59_cast_fp16")]; tensor embed_59_cast_fp16 = transpose(perm = embed_59_perm_0, x = var_4188_cast_fp16)[name = string("transpose_29")]; tensor var_4209_cast_fp16 = mul(x = embed_59_cast_fp16, y = cos_cast_fp16)[name = string("op_4209_cast_fp16")]; tensor var_4210_split_sizes_0 = const()[name = string("op_4210_split_sizes_0"), val = tensor([32, 32])]; int32 var_4210_axis_0 = const()[name = string("op_4210_axis_0"), val = int32(-1)]; tensor var_4210_cast_fp16_0, tensor var_4210_cast_fp16_1 = split(axis = var_4210_axis_0, split_sizes = var_4210_split_sizes_0, x = embed_59_cast_fp16)[name = string("op_4210_cast_fp16")]; fp16 const_148_promoted_to_fp16 = const()[name = string("const_148_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_4212_cast_fp16 = mul(x = var_4210_cast_fp16_1, y = const_148_promoted_to_fp16)[name = string("op_4212_cast_fp16")]; int32 var_4214 = const()[name = string("op_4214"), val = int32(-1)]; bool var_4215_interleave_0 = const()[name = string("op_4215_interleave_0"), val = bool(false)]; tensor var_4215_cast_fp16 = concat(axis = var_4214, interleave = var_4215_interleave_0, values = (var_4212_cast_fp16, var_4210_cast_fp16_0))[name = string("op_4215_cast_fp16")]; tensor var_4216_cast_fp16 = mul(x = var_4215_cast_fp16, y = sin_cast_fp16)[name = string("op_4216_cast_fp16")]; tensor key_states_59_cast_fp16 = add(x = var_4209_cast_fp16, y = var_4216_cast_fp16)[name = string("key_states_59_cast_fp16")]; tensor expand_dims_141 = const()[name = string("expand_dims_141"), val = tensor([14])]; tensor expand_dims_142 = const()[name = string("expand_dims_142"), val = tensor([0])]; tensor expand_dims_144 = const()[name = string("expand_dims_144"), val = tensor([15])]; tensor concat_115_values3_0 = const()[name = string("concat_115_values3_0"), val = tensor([0])]; int32 concat_115_axis_0 = const()[name = string("concat_115_axis_0"), val = int32(0)]; bool concat_115_interleave_0 = const()[name = string("concat_115_interleave_0"), val = bool(false)]; tensor concat_115 = concat(axis = concat_115_axis_0, interleave = concat_115_interleave_0, values = (expand_dims_141, expand_dims_142, position_id, concat_115_values3_0))[name = string("concat_115")]; tensor concat_116_values1_0 = const()[name = string("concat_116_values1_0"), val = tensor([0])]; tensor concat_116_values3_0 = const()[name = string("concat_116_values3_0"), val = tensor([0])]; int32 concat_116_axis_0 = const()[name = string("concat_116_axis_0"), val = int32(0)]; bool concat_116_interleave_0 = const()[name = string("concat_116_interleave_0"), val = bool(false)]; tensor concat_116 = concat(axis = concat_116_axis_0, interleave = concat_116_interleave_0, values = (expand_dims_144, concat_116_values1_0, var_426, concat_116_values3_0))[name = string("concat_116")]; tensor key_cache_internal_tensor_assign_15_stride_0 = const()[name = string("key_cache_internal_tensor_assign_15_stride_0"), val = tensor([1, 1, 1, 1])]; tensor key_cache_internal_tensor_assign_15_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_15_begin_mask_0"), val = tensor([false, false, false, true])]; tensor key_cache_internal_tensor_assign_15_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_15_end_mask_0"), val = tensor([false, true, false, true])]; tensor key_cache_internal_tensor_assign_15_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_15_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor key_cache_internal_tensor_assign_15_cast_fp16 = slice_update(begin = concat_115, begin_mask = key_cache_internal_tensor_assign_15_begin_mask_0, end = concat_116, end_mask = key_cache_internal_tensor_assign_15_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_15_squeeze_mask_0, stride = key_cache_internal_tensor_assign_15_stride_0, update = key_states_59_cast_fp16, x = coreml_update_state_74)[name = string("key_cache_internal_tensor_assign_15_cast_fp16")]; write_state(data = key_cache_internal_tensor_assign_15_cast_fp16, input = key_cache)[name = string("coreml_update_state_146_write_state")]; tensor coreml_update_state_76 = read_state(input = key_cache)[name = string("coreml_update_state_146")]; tensor value_cache_internal_tensor_assign_15_stride_0 = const()[name = string("value_cache_internal_tensor_assign_15_stride_0"), val = tensor([1, 1, 1, 1])]; tensor value_cache_internal_tensor_assign_15_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_15_begin_mask_0"), val = tensor([false, false, false, true])]; tensor value_cache_internal_tensor_assign_15_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_15_end_mask_0"), val = tensor([false, true, false, true])]; tensor value_cache_internal_tensor_assign_15_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_15_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor value_states_59_cast_fp16 = transpose(perm = value_states_59_perm_0, x = var_4195_cast_fp16)[name = string("transpose_28")]; tensor value_cache_internal_tensor_assign_15_cast_fp16 = slice_update(begin = concat_115, begin_mask = value_cache_internal_tensor_assign_15_begin_mask_0, end = concat_116, end_mask = value_cache_internal_tensor_assign_15_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_15_squeeze_mask_0, stride = value_cache_internal_tensor_assign_15_stride_0, update = value_states_59_cast_fp16, x = coreml_update_state_75)[name = string("value_cache_internal_tensor_assign_15_cast_fp16")]; write_state(data = value_cache_internal_tensor_assign_15_cast_fp16, input = value_cache)[name = string("coreml_update_state_147_write_state")]; tensor coreml_update_state_77 = read_state(input = value_cache)[name = string("coreml_update_state_147")]; tensor var_4259_begin_0 = const()[name = string("op_4259_begin_0"), val = tensor([14, 0, 0, 0])]; tensor var_4259_end_0 = const()[name = string("op_4259_end_0"), val = tensor([15, 2, 2048, 64])]; tensor var_4259_end_mask_0 = const()[name = string("op_4259_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_4259_cast_fp16 = slice_by_index(begin = var_4259_begin_0, end = var_4259_end_0, end_mask = var_4259_end_mask_0, x = coreml_update_state_76)[name = string("op_4259_cast_fp16")]; tensor tile_28 = const()[name = string("tile_28"), val = tensor([1, 1])]; int32 var_4262_axis_0 = const()[name = string("op_4262_axis_0"), val = int32(1)]; tensor var_4262_cast_fp16_0, tensor var_4262_cast_fp16_1 = split(axis = var_4262_axis_0, split_sizes = tile_28, x = var_4259_cast_fp16)[name = string("op_4262_cast_fp16")]; tensor var_4269_begin_0 = const()[name = string("op_4269_begin_0"), val = tensor([14, 0, 0, 0])]; tensor var_4269_end_0 = const()[name = string("op_4269_end_0"), val = tensor([15, 2, 2048, 64])]; tensor var_4269_end_mask_0 = const()[name = string("op_4269_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_4269_cast_fp16 = slice_by_index(begin = var_4269_begin_0, end = var_4269_end_0, end_mask = var_4269_end_mask_0, x = coreml_update_state_77)[name = string("op_4269_cast_fp16")]; tensor tile_29 = const()[name = string("tile_29"), val = tensor([1, 1])]; int32 var_4272_axis_0 = const()[name = string("op_4272_axis_0"), val = int32(1)]; tensor var_4272_cast_fp16_0, tensor var_4272_cast_fp16_1 = split(axis = var_4272_axis_0, split_sizes = tile_29, x = var_4269_cast_fp16)[name = string("op_4272_cast_fp16")]; tensor var_4275_split_sizes_0 = const()[name = string("op_4275_split_sizes_0"), val = tensor([8, 8])]; int32 var_4275_axis_0 = const()[name = string("op_4275_axis_0"), val = int32(1)]; tensor var_4275_cast_fp16_0, tensor var_4275_cast_fp16_1 = split(axis = var_4275_axis_0, split_sizes = var_4275_split_sizes_0, x = query_states_59_cast_fp16)[name = string("op_4275_cast_fp16")]; bool attn_weights_225_transpose_x_0 = const()[name = string("attn_weights_225_transpose_x_0"), val = bool(false)]; bool attn_weights_225_transpose_y_0 = const()[name = string("attn_weights_225_transpose_y_0"), val = bool(false)]; tensor attn_weights_225_cast_fp16 = matmul(transpose_x = attn_weights_225_transpose_x_0, transpose_y = attn_weights_225_transpose_y_0, x = var_4262_cast_fp16_0, y = var_4275_cast_fp16_0)[name = string("attn_weights_225_cast_fp16")]; fp16 _inversed_attn_weights_227_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_227_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_227_cast_fp16 = mul(x = attn_weights_225_cast_fp16, y = _inversed_attn_weights_227_y_0_to_fp16)[name = string("_inversed_attn_weights_227_cast_fp16")]; tensor attn_weights_229_cast_fp16 = add(x = _inversed_attn_weights_227_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_229_cast_fp16")]; int32 var_4282 = const()[name = string("op_4282"), val = int32(2)]; tensor attn_weights_231_cast_fp16 = softmax(axis = var_4282, x = attn_weights_229_cast_fp16)[name = string("attn_weights_231_cast_fp16")]; bool var_4288_transpose_x_1 = const()[name = string("op_4288_transpose_x_1"), val = bool(true)]; bool var_4288_transpose_y_1 = const()[name = string("op_4288_transpose_y_1"), val = bool(false)]; tensor var_4288_cast_fp16 = matmul(transpose_x = var_4288_transpose_x_1, transpose_y = var_4288_transpose_y_1, x = attn_weights_231_cast_fp16, y = var_4272_cast_fp16_0)[name = string("op_4288_cast_fp16")]; bool attn_weights_233_transpose_x_0 = const()[name = string("attn_weights_233_transpose_x_0"), val = bool(false)]; bool attn_weights_233_transpose_y_0 = const()[name = string("attn_weights_233_transpose_y_0"), val = bool(false)]; tensor attn_weights_233_cast_fp16 = matmul(transpose_x = attn_weights_233_transpose_x_0, transpose_y = attn_weights_233_transpose_y_0, x = var_4262_cast_fp16_1, y = var_4275_cast_fp16_1)[name = string("attn_weights_233_cast_fp16")]; fp16 _inversed_attn_weights_235_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_235_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_235_cast_fp16 = mul(x = attn_weights_233_cast_fp16, y = _inversed_attn_weights_235_y_0_to_fp16)[name = string("_inversed_attn_weights_235_cast_fp16")]; tensor attn_weights_237_cast_fp16 = add(x = _inversed_attn_weights_235_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_237_cast_fp16")]; int32 var_4294 = const()[name = string("op_4294"), val = int32(2)]; tensor attn_weights_239_cast_fp16 = softmax(axis = var_4294, x = attn_weights_237_cast_fp16)[name = string("attn_weights_239_cast_fp16")]; bool attn_output_85_transpose_x_1 = const()[name = string("attn_output_85_transpose_x_1"), val = bool(true)]; bool attn_output_85_transpose_y_1 = const()[name = string("attn_output_85_transpose_y_1"), val = bool(false)]; tensor attn_output_85_cast_fp16 = matmul(transpose_x = attn_output_85_transpose_x_1, transpose_y = attn_output_85_transpose_y_1, x = attn_weights_239_cast_fp16, y = var_4272_cast_fp16_1)[name = string("attn_output_85_cast_fp16")]; int32 var_4302 = const()[name = string("op_4302"), val = int32(1)]; bool attn_output_87_interleave_0 = const()[name = string("attn_output_87_interleave_0"), val = bool(false)]; tensor attn_output_87_cast_fp16 = concat(axis = var_4302, interleave = attn_output_87_interleave_0, values = (var_4288_cast_fp16, attn_output_85_cast_fp16))[name = string("attn_output_87_cast_fp16")]; tensor var_4306_perm_0 = const()[name = string("op_4306_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_4311 = const()[name = string("op_4311"), val = tensor([1, 1024, 1, 128])]; tensor var_4306_cast_fp16 = transpose(perm = var_4306_perm_0, x = attn_output_87_cast_fp16)[name = string("transpose_27")]; tensor x_257_cast_fp16 = reshape(shape = var_4311, x = var_4306_cast_fp16)[name = string("x_257_cast_fp16")]; string hidden_states_87_pad_type_0 = const()[name = string("hidden_states_87_pad_type_0"), val = string("valid")]; tensor hidden_states_87_strides_0 = const()[name = string("hidden_states_87_strides_0"), val = tensor([1, 1])]; tensor hidden_states_87_pad_0 = const()[name = string("hidden_states_87_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_87_dilations_0 = const()[name = string("hidden_states_87_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_87_groups_0 = const()[name = string("hidden_states_87_groups_0"), val = int32(1)]; tensor var_4318_to_fp16 = const()[name = string("op_4318_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(429531584)))]; tensor hidden_states_87_cast_fp16 = conv(dilations = hidden_states_87_dilations_0, groups = hidden_states_87_groups_0, pad = hidden_states_87_pad_0, pad_type = hidden_states_87_pad_type_0, strides = hidden_states_87_strides_0, weight = var_4318_to_fp16, x = x_257_cast_fp16)[name = string("hidden_states_87_cast_fp16")]; tensor x_259_cast_fp16 = add(x = x_251_cast_fp16, y = hidden_states_87_cast_fp16)[name = string("x_259_cast_fp16")]; int32 var_4330 = const()[name = string("op_4330"), val = int32(1)]; fp16 const_153_promoted_to_fp16 = const()[name = string("const_153_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_4333_cast_fp16 = mul(x = x_259_cast_fp16, y = const_153_promoted_to_fp16)[name = string("op_4333_cast_fp16")]; bool x_261_interleave_0 = const()[name = string("x_261_interleave_0"), val = bool(false)]; tensor x_261_cast_fp16 = concat(axis = var_4330, interleave = x_261_interleave_0, values = (x_259_cast_fp16, var_4333_cast_fp16))[name = string("x_261_cast_fp16")]; tensor out_175_axes_0 = const()[name = string("out_175_axes_0"), val = tensor([1])]; fp16 var_4343_to_fp16 = const()[name = string("op_4343_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_175_cast_fp16 = layer_norm(axes = out_175_axes_0, epsilon = var_4343_to_fp16, x = x_261_cast_fp16)[name = string("out_175_cast_fp16")]; tensor layer_layers_14_post_attention_layernorm_weight_to_fp16 = const()[name = string("layer_layers_14_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(431628800)))]; tensor out_177_cast_fp16 = mul(x = out_175_cast_fp16, y = layer_layers_14_post_attention_layernorm_weight_to_fp16)[name = string("out_177_cast_fp16")]; tensor var_4349_split_sizes_0 = const()[name = string("op_4349_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_4349_axis_0 = const()[name = string("op_4349_axis_0"), val = int32(1)]; tensor var_4349_cast_fp16_0, tensor var_4349_cast_fp16_1 = split(axis = var_4349_axis_0, split_sizes = var_4349_split_sizes_0, x = out_177_cast_fp16)[name = string("op_4349_cast_fp16")]; string input_29_pad_type_0 = const()[name = string("input_29_pad_type_0"), val = string("valid")]; tensor input_29_strides_0 = const()[name = string("input_29_strides_0"), val = tensor([1, 1])]; tensor input_29_pad_0 = const()[name = string("input_29_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_29_dilations_0 = const()[name = string("input_29_dilations_0"), val = tensor([1, 1])]; int32 input_29_groups_0 = const()[name = string("input_29_groups_0"), val = int32(1)]; tensor var_4354_to_fp16 = const()[name = string("op_4354_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(431632960)))]; tensor input_29_cast_fp16 = conv(dilations = input_29_dilations_0, groups = input_29_groups_0, pad = input_29_pad_0, pad_type = input_29_pad_type_0, strides = input_29_strides_0, weight = var_4354_to_fp16, x = var_4349_cast_fp16_0)[name = string("input_29_cast_fp16")]; tensor var_4365_cast_fp16 = silu(x = input_29_cast_fp16)[name = string("op_4365_cast_fp16")]; string var_4370_pad_type_0 = const()[name = string("op_4370_pad_type_0"), val = string("valid")]; tensor var_4370_strides_0 = const()[name = string("op_4370_strides_0"), val = tensor([1, 1])]; tensor var_4370_pad_0 = const()[name = string("op_4370_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_4370_dilations_0 = const()[name = string("op_4370_dilations_0"), val = tensor([1, 1])]; int32 var_4370_groups_0 = const()[name = string("op_4370_groups_0"), val = int32(1)]; tensor var_4353_to_fp16 = const()[name = string("op_4353_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(440021632)))]; tensor var_4370_cast_fp16 = conv(dilations = var_4370_dilations_0, groups = var_4370_groups_0, pad = var_4370_pad_0, pad_type = var_4370_pad_type_0, strides = var_4370_strides_0, weight = var_4353_to_fp16, x = var_4349_cast_fp16_0)[name = string("op_4370_cast_fp16")]; tensor x_267_cast_fp16 = mul(x = var_4365_cast_fp16, y = var_4370_cast_fp16)[name = string("x_267_cast_fp16")]; string hidden_states_89_pad_type_0 = const()[name = string("hidden_states_89_pad_type_0"), val = string("valid")]; tensor hidden_states_89_strides_0 = const()[name = string("hidden_states_89_strides_0"), val = tensor([1, 1])]; tensor hidden_states_89_pad_0 = const()[name = string("hidden_states_89_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_89_dilations_0 = const()[name = string("hidden_states_89_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_89_groups_0 = const()[name = string("hidden_states_89_groups_0"), val = int32(1)]; tensor var_4352_to_fp16 = const()[name = string("op_4352_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(448410304)))]; tensor hidden_states_89_cast_fp16 = conv(dilations = hidden_states_89_dilations_0, groups = hidden_states_89_groups_0, pad = hidden_states_89_pad_0, pad_type = hidden_states_89_pad_type_0, strides = hidden_states_89_strides_0, weight = var_4352_to_fp16, x = x_267_cast_fp16)[name = string("hidden_states_89_cast_fp16")]; tensor x_269_cast_fp16 = add(x = x_259_cast_fp16, y = hidden_states_89_cast_fp16)[name = string("x_269_cast_fp16")]; int32 var_4383 = const()[name = string("op_4383"), val = int32(1)]; fp16 const_154_promoted_to_fp16 = const()[name = string("const_154_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_4386_cast_fp16 = mul(x = x_269_cast_fp16, y = const_154_promoted_to_fp16)[name = string("op_4386_cast_fp16")]; bool x_271_interleave_0 = const()[name = string("x_271_interleave_0"), val = bool(false)]; tensor x_271_cast_fp16 = concat(axis = var_4383, interleave = x_271_interleave_0, values = (x_269_cast_fp16, var_4386_cast_fp16))[name = string("x_271_cast_fp16")]; tensor out_181_axes_0 = const()[name = string("out_181_axes_0"), val = tensor([1])]; fp16 var_4396_to_fp16 = const()[name = string("op_4396_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_181_cast_fp16 = layer_norm(axes = out_181_axes_0, epsilon = var_4396_to_fp16, x = x_271_cast_fp16)[name = string("out_181_cast_fp16")]; tensor layer_layers_15_input_layernorm_weight_to_fp16 = const()[name = string("layer_layers_15_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(456798976)))]; tensor out_183_cast_fp16 = mul(x = out_181_cast_fp16, y = layer_layers_15_input_layernorm_weight_to_fp16)[name = string("out_183_cast_fp16")]; tensor var_4402_split_sizes_0 = const()[name = string("op_4402_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_4402_axis_0 = const()[name = string("op_4402_axis_0"), val = int32(1)]; tensor var_4402_cast_fp16_0, tensor var_4402_cast_fp16_1 = split(axis = var_4402_axis_0, split_sizes = var_4402_split_sizes_0, x = out_183_cast_fp16)[name = string("op_4402_cast_fp16")]; string query_states_61_pad_type_0 = const()[name = string("query_states_61_pad_type_0"), val = string("valid")]; tensor query_states_61_strides_0 = const()[name = string("query_states_61_strides_0"), val = tensor([1, 1])]; tensor query_states_61_pad_0 = const()[name = string("query_states_61_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_states_61_dilations_0 = const()[name = string("query_states_61_dilations_0"), val = tensor([1, 1])]; int32 query_states_61_groups_0 = const()[name = string("query_states_61_groups_0"), val = int32(1)]; tensor var_4424_to_fp16 = const()[name = string("op_4424_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(456803136)))]; tensor query_states_61_cast_fp16 = conv(dilations = query_states_61_dilations_0, groups = query_states_61_groups_0, pad = query_states_61_pad_0, pad_type = query_states_61_pad_type_0, strides = query_states_61_strides_0, weight = var_4424_to_fp16, x = var_4402_cast_fp16_0)[name = string("query_states_61_cast_fp16")]; string key_states_61_pad_type_0 = const()[name = string("key_states_61_pad_type_0"), val = string("valid")]; tensor key_states_61_strides_0 = const()[name = string("key_states_61_strides_0"), val = tensor([1, 1])]; tensor key_states_61_pad_0 = const()[name = string("key_states_61_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_states_61_dilations_0 = const()[name = string("key_states_61_dilations_0"), val = tensor([1, 1])]; int32 key_states_61_groups_0 = const()[name = string("key_states_61_groups_0"), val = int32(1)]; tensor var_4435_to_fp16 = const()[name = string("op_4435_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(458900352)))]; tensor key_states_61_cast_fp16 = conv(dilations = key_states_61_dilations_0, groups = key_states_61_groups_0, pad = key_states_61_pad_0, pad_type = key_states_61_pad_type_0, strides = key_states_61_strides_0, weight = var_4435_to_fp16, x = var_4402_cast_fp16_0)[name = string("key_states_61_cast_fp16")]; string value_states_61_pad_type_0 = const()[name = string("value_states_61_pad_type_0"), val = string("valid")]; tensor value_states_61_strides_0 = const()[name = string("value_states_61_strides_0"), val = tensor([1, 1])]; tensor value_states_61_pad_0 = const()[name = string("value_states_61_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_states_61_dilations_0 = const()[name = string("value_states_61_dilations_0"), val = tensor([1, 1])]; int32 value_states_61_groups_0 = const()[name = string("value_states_61_groups_0"), val = int32(1)]; tensor var_4446_to_fp16 = const()[name = string("op_4446_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(459162560)))]; tensor value_states_61_cast_fp16 = conv(dilations = value_states_61_dilations_0, groups = value_states_61_groups_0, pad = value_states_61_pad_0, pad_type = value_states_61_pad_type_0, strides = value_states_61_strides_0, weight = var_4446_to_fp16, x = var_4402_cast_fp16_0)[name = string("value_states_61_cast_fp16")]; tensor var_4454 = const()[name = string("op_4454"), val = tensor([1, 16, 64, 128])]; tensor embed_61_cast_fp16 = reshape(shape = var_4454, x = query_states_61_cast_fp16)[name = string("embed_61_cast_fp16")]; tensor var_4458 = const()[name = string("op_4458"), val = tensor([1, 2, 64, 128])]; tensor var_4459_cast_fp16 = reshape(shape = var_4458, x = key_states_61_cast_fp16)[name = string("op_4459_cast_fp16")]; tensor embed_63_perm_0 = const()[name = string("embed_63_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_4465 = const()[name = string("op_4465"), val = tensor([1, 2, 64, 128])]; tensor var_4466_cast_fp16 = reshape(shape = var_4465, x = value_states_61_cast_fp16)[name = string("op_4466_cast_fp16")]; tensor value_states_63_perm_0 = const()[name = string("value_states_63_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_4470_cast_fp16 = mul(x = embed_61_cast_fp16, y = cos_1_cast_fp16)[name = string("op_4470_cast_fp16")]; tensor var_4471_split_sizes_0 = const()[name = string("op_4471_split_sizes_0"), val = tensor([32, 32])]; int32 var_4471_axis_0 = const()[name = string("op_4471_axis_0"), val = int32(-2)]; tensor var_4471_cast_fp16_0, tensor var_4471_cast_fp16_1 = split(axis = var_4471_axis_0, split_sizes = var_4471_split_sizes_0, x = embed_61_cast_fp16)[name = string("op_4471_cast_fp16")]; fp16 const_157_promoted_to_fp16 = const()[name = string("const_157_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_4473_cast_fp16 = mul(x = var_4471_cast_fp16_1, y = const_157_promoted_to_fp16)[name = string("op_4473_cast_fp16")]; int32 var_4475 = const()[name = string("op_4475"), val = int32(-2)]; bool var_4476_interleave_0 = const()[name = string("op_4476_interleave_0"), val = bool(false)]; tensor var_4476_cast_fp16 = concat(axis = var_4475, interleave = var_4476_interleave_0, values = (var_4473_cast_fp16, var_4471_cast_fp16_0))[name = string("op_4476_cast_fp16")]; tensor var_4477_cast_fp16 = mul(x = var_4476_cast_fp16, y = sin_1_cast_fp16)[name = string("op_4477_cast_fp16")]; tensor query_states_63_cast_fp16 = add(x = var_4470_cast_fp16, y = var_4477_cast_fp16)[name = string("query_states_63_cast_fp16")]; tensor embed_63_cast_fp16 = transpose(perm = embed_63_perm_0, x = var_4459_cast_fp16)[name = string("transpose_26")]; tensor var_4480_cast_fp16 = mul(x = embed_63_cast_fp16, y = cos_cast_fp16)[name = string("op_4480_cast_fp16")]; tensor var_4481_split_sizes_0 = const()[name = string("op_4481_split_sizes_0"), val = tensor([32, 32])]; int32 var_4481_axis_0 = const()[name = string("op_4481_axis_0"), val = int32(-1)]; tensor var_4481_cast_fp16_0, tensor var_4481_cast_fp16_1 = split(axis = var_4481_axis_0, split_sizes = var_4481_split_sizes_0, x = embed_63_cast_fp16)[name = string("op_4481_cast_fp16")]; fp16 const_158_promoted_to_fp16 = const()[name = string("const_158_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_4483_cast_fp16 = mul(x = var_4481_cast_fp16_1, y = const_158_promoted_to_fp16)[name = string("op_4483_cast_fp16")]; int32 var_4485 = const()[name = string("op_4485"), val = int32(-1)]; bool var_4486_interleave_0 = const()[name = string("op_4486_interleave_0"), val = bool(false)]; tensor var_4486_cast_fp16 = concat(axis = var_4485, interleave = var_4486_interleave_0, values = (var_4483_cast_fp16, var_4481_cast_fp16_0))[name = string("op_4486_cast_fp16")]; tensor var_4487_cast_fp16 = mul(x = var_4486_cast_fp16, y = sin_cast_fp16)[name = string("op_4487_cast_fp16")]; tensor key_states_63_cast_fp16 = add(x = var_4480_cast_fp16, y = var_4487_cast_fp16)[name = string("key_states_63_cast_fp16")]; tensor expand_dims_151 = const()[name = string("expand_dims_151"), val = tensor([15])]; tensor expand_dims_152 = const()[name = string("expand_dims_152"), val = tensor([0])]; tensor expand_dims_154 = const()[name = string("expand_dims_154"), val = tensor([16])]; tensor concat_123_values3_0 = const()[name = string("concat_123_values3_0"), val = tensor([0])]; int32 concat_123_axis_0 = const()[name = string("concat_123_axis_0"), val = int32(0)]; bool concat_123_interleave_0 = const()[name = string("concat_123_interleave_0"), val = bool(false)]; tensor concat_123 = concat(axis = concat_123_axis_0, interleave = concat_123_interleave_0, values = (expand_dims_151, expand_dims_152, position_id, concat_123_values3_0))[name = string("concat_123")]; tensor concat_124_values1_0 = const()[name = string("concat_124_values1_0"), val = tensor([0])]; tensor concat_124_values3_0 = const()[name = string("concat_124_values3_0"), val = tensor([0])]; int32 concat_124_axis_0 = const()[name = string("concat_124_axis_0"), val = int32(0)]; bool concat_124_interleave_0 = const()[name = string("concat_124_interleave_0"), val = bool(false)]; tensor concat_124 = concat(axis = concat_124_axis_0, interleave = concat_124_interleave_0, values = (expand_dims_154, concat_124_values1_0, var_426, concat_124_values3_0))[name = string("concat_124")]; tensor key_cache_internal_tensor_assign_16_stride_0 = const()[name = string("key_cache_internal_tensor_assign_16_stride_0"), val = tensor([1, 1, 1, 1])]; tensor key_cache_internal_tensor_assign_16_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_16_begin_mask_0"), val = tensor([false, false, false, true])]; tensor key_cache_internal_tensor_assign_16_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_16_end_mask_0"), val = tensor([false, true, false, true])]; tensor key_cache_internal_tensor_assign_16_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_16_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor key_cache_internal_tensor_assign_16_cast_fp16 = slice_update(begin = concat_123, begin_mask = key_cache_internal_tensor_assign_16_begin_mask_0, end = concat_124, end_mask = key_cache_internal_tensor_assign_16_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_16_squeeze_mask_0, stride = key_cache_internal_tensor_assign_16_stride_0, update = key_states_63_cast_fp16, x = coreml_update_state_76)[name = string("key_cache_internal_tensor_assign_16_cast_fp16")]; write_state(data = key_cache_internal_tensor_assign_16_cast_fp16, input = key_cache)[name = string("coreml_update_state_148_write_state")]; tensor coreml_update_state_78 = read_state(input = key_cache)[name = string("coreml_update_state_148")]; tensor value_cache_internal_tensor_assign_16_stride_0 = const()[name = string("value_cache_internal_tensor_assign_16_stride_0"), val = tensor([1, 1, 1, 1])]; tensor value_cache_internal_tensor_assign_16_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_16_begin_mask_0"), val = tensor([false, false, false, true])]; tensor value_cache_internal_tensor_assign_16_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_16_end_mask_0"), val = tensor([false, true, false, true])]; tensor value_cache_internal_tensor_assign_16_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_16_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor value_states_63_cast_fp16 = transpose(perm = value_states_63_perm_0, x = var_4466_cast_fp16)[name = string("transpose_25")]; tensor value_cache_internal_tensor_assign_16_cast_fp16 = slice_update(begin = concat_123, begin_mask = value_cache_internal_tensor_assign_16_begin_mask_0, end = concat_124, end_mask = value_cache_internal_tensor_assign_16_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_16_squeeze_mask_0, stride = value_cache_internal_tensor_assign_16_stride_0, update = value_states_63_cast_fp16, x = coreml_update_state_77)[name = string("value_cache_internal_tensor_assign_16_cast_fp16")]; write_state(data = value_cache_internal_tensor_assign_16_cast_fp16, input = value_cache)[name = string("coreml_update_state_149_write_state")]; tensor coreml_update_state_79 = read_state(input = value_cache)[name = string("coreml_update_state_149")]; tensor var_4530_begin_0 = const()[name = string("op_4530_begin_0"), val = tensor([15, 0, 0, 0])]; tensor var_4530_end_0 = const()[name = string("op_4530_end_0"), val = tensor([16, 2, 2048, 64])]; tensor var_4530_end_mask_0 = const()[name = string("op_4530_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_4530_cast_fp16 = slice_by_index(begin = var_4530_begin_0, end = var_4530_end_0, end_mask = var_4530_end_mask_0, x = coreml_update_state_78)[name = string("op_4530_cast_fp16")]; tensor tile_30 = const()[name = string("tile_30"), val = tensor([1, 1])]; int32 var_4533_axis_0 = const()[name = string("op_4533_axis_0"), val = int32(1)]; tensor var_4533_cast_fp16_0, tensor var_4533_cast_fp16_1 = split(axis = var_4533_axis_0, split_sizes = tile_30, x = var_4530_cast_fp16)[name = string("op_4533_cast_fp16")]; tensor var_4540_begin_0 = const()[name = string("op_4540_begin_0"), val = tensor([15, 0, 0, 0])]; tensor var_4540_end_0 = const()[name = string("op_4540_end_0"), val = tensor([16, 2, 2048, 64])]; tensor var_4540_end_mask_0 = const()[name = string("op_4540_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_4540_cast_fp16 = slice_by_index(begin = var_4540_begin_0, end = var_4540_end_0, end_mask = var_4540_end_mask_0, x = coreml_update_state_79)[name = string("op_4540_cast_fp16")]; tensor tile_31 = const()[name = string("tile_31"), val = tensor([1, 1])]; int32 var_4543_axis_0 = const()[name = string("op_4543_axis_0"), val = int32(1)]; tensor var_4543_cast_fp16_0, tensor var_4543_cast_fp16_1 = split(axis = var_4543_axis_0, split_sizes = tile_31, x = var_4540_cast_fp16)[name = string("op_4543_cast_fp16")]; tensor var_4546_split_sizes_0 = const()[name = string("op_4546_split_sizes_0"), val = tensor([8, 8])]; int32 var_4546_axis_0 = const()[name = string("op_4546_axis_0"), val = int32(1)]; tensor var_4546_cast_fp16_0, tensor var_4546_cast_fp16_1 = split(axis = var_4546_axis_0, split_sizes = var_4546_split_sizes_0, x = query_states_63_cast_fp16)[name = string("op_4546_cast_fp16")]; bool attn_weights_241_transpose_x_0 = const()[name = string("attn_weights_241_transpose_x_0"), val = bool(false)]; bool attn_weights_241_transpose_y_0 = const()[name = string("attn_weights_241_transpose_y_0"), val = bool(false)]; tensor attn_weights_241_cast_fp16 = matmul(transpose_x = attn_weights_241_transpose_x_0, transpose_y = attn_weights_241_transpose_y_0, x = var_4533_cast_fp16_0, y = var_4546_cast_fp16_0)[name = string("attn_weights_241_cast_fp16")]; fp16 _inversed_attn_weights_243_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_243_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_243_cast_fp16 = mul(x = attn_weights_241_cast_fp16, y = _inversed_attn_weights_243_y_0_to_fp16)[name = string("_inversed_attn_weights_243_cast_fp16")]; tensor attn_weights_245_cast_fp16 = add(x = _inversed_attn_weights_243_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_245_cast_fp16")]; int32 var_4553 = const()[name = string("op_4553"), val = int32(2)]; tensor attn_weights_247_cast_fp16 = softmax(axis = var_4553, x = attn_weights_245_cast_fp16)[name = string("attn_weights_247_cast_fp16")]; bool var_4559_transpose_x_1 = const()[name = string("op_4559_transpose_x_1"), val = bool(true)]; bool var_4559_transpose_y_1 = const()[name = string("op_4559_transpose_y_1"), val = bool(false)]; tensor var_4559_cast_fp16 = matmul(transpose_x = var_4559_transpose_x_1, transpose_y = var_4559_transpose_y_1, x = attn_weights_247_cast_fp16, y = var_4543_cast_fp16_0)[name = string("op_4559_cast_fp16")]; bool attn_weights_249_transpose_x_0 = const()[name = string("attn_weights_249_transpose_x_0"), val = bool(false)]; bool attn_weights_249_transpose_y_0 = const()[name = string("attn_weights_249_transpose_y_0"), val = bool(false)]; tensor attn_weights_249_cast_fp16 = matmul(transpose_x = attn_weights_249_transpose_x_0, transpose_y = attn_weights_249_transpose_y_0, x = var_4533_cast_fp16_1, y = var_4546_cast_fp16_1)[name = string("attn_weights_249_cast_fp16")]; fp16 _inversed_attn_weights_251_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_251_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_251_cast_fp16 = mul(x = attn_weights_249_cast_fp16, y = _inversed_attn_weights_251_y_0_to_fp16)[name = string("_inversed_attn_weights_251_cast_fp16")]; tensor attn_weights_253_cast_fp16 = add(x = _inversed_attn_weights_251_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_253_cast_fp16")]; int32 var_4565 = const()[name = string("op_4565"), val = int32(2)]; tensor attn_weights_255_cast_fp16 = softmax(axis = var_4565, x = attn_weights_253_cast_fp16)[name = string("attn_weights_255_cast_fp16")]; bool attn_output_91_transpose_x_1 = const()[name = string("attn_output_91_transpose_x_1"), val = bool(true)]; bool attn_output_91_transpose_y_1 = const()[name = string("attn_output_91_transpose_y_1"), val = bool(false)]; tensor attn_output_91_cast_fp16 = matmul(transpose_x = attn_output_91_transpose_x_1, transpose_y = attn_output_91_transpose_y_1, x = attn_weights_255_cast_fp16, y = var_4543_cast_fp16_1)[name = string("attn_output_91_cast_fp16")]; int32 var_4573 = const()[name = string("op_4573"), val = int32(1)]; bool attn_output_93_interleave_0 = const()[name = string("attn_output_93_interleave_0"), val = bool(false)]; tensor attn_output_93_cast_fp16 = concat(axis = var_4573, interleave = attn_output_93_interleave_0, values = (var_4559_cast_fp16, attn_output_91_cast_fp16))[name = string("attn_output_93_cast_fp16")]; tensor var_4577_perm_0 = const()[name = string("op_4577_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_4582 = const()[name = string("op_4582"), val = tensor([1, 1024, 1, 128])]; tensor var_4577_cast_fp16 = transpose(perm = var_4577_perm_0, x = attn_output_93_cast_fp16)[name = string("transpose_24")]; tensor x_275_cast_fp16 = reshape(shape = var_4582, x = var_4577_cast_fp16)[name = string("x_275_cast_fp16")]; string hidden_states_93_pad_type_0 = const()[name = string("hidden_states_93_pad_type_0"), val = string("valid")]; tensor hidden_states_93_strides_0 = const()[name = string("hidden_states_93_strides_0"), val = tensor([1, 1])]; tensor hidden_states_93_pad_0 = const()[name = string("hidden_states_93_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_93_dilations_0 = const()[name = string("hidden_states_93_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_93_groups_0 = const()[name = string("hidden_states_93_groups_0"), val = int32(1)]; tensor var_4589_to_fp16 = const()[name = string("op_4589_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(459424768)))]; tensor hidden_states_93_cast_fp16 = conv(dilations = hidden_states_93_dilations_0, groups = hidden_states_93_groups_0, pad = hidden_states_93_pad_0, pad_type = hidden_states_93_pad_type_0, strides = hidden_states_93_strides_0, weight = var_4589_to_fp16, x = x_275_cast_fp16)[name = string("hidden_states_93_cast_fp16")]; tensor x_277_cast_fp16 = add(x = x_269_cast_fp16, y = hidden_states_93_cast_fp16)[name = string("x_277_cast_fp16")]; int32 var_4601 = const()[name = string("op_4601"), val = int32(1)]; fp16 const_163_promoted_to_fp16 = const()[name = string("const_163_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_4604_cast_fp16 = mul(x = x_277_cast_fp16, y = const_163_promoted_to_fp16)[name = string("op_4604_cast_fp16")]; bool x_279_interleave_0 = const()[name = string("x_279_interleave_0"), val = bool(false)]; tensor x_279_cast_fp16 = concat(axis = var_4601, interleave = x_279_interleave_0, values = (x_277_cast_fp16, var_4604_cast_fp16))[name = string("x_279_cast_fp16")]; tensor out_187_axes_0 = const()[name = string("out_187_axes_0"), val = tensor([1])]; fp16 var_4614_to_fp16 = const()[name = string("op_4614_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_187_cast_fp16 = layer_norm(axes = out_187_axes_0, epsilon = var_4614_to_fp16, x = x_279_cast_fp16)[name = string("out_187_cast_fp16")]; tensor layer_layers_15_post_attention_layernorm_weight_to_fp16 = const()[name = string("layer_layers_15_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(461521984)))]; tensor out_189_cast_fp16 = mul(x = out_187_cast_fp16, y = layer_layers_15_post_attention_layernorm_weight_to_fp16)[name = string("out_189_cast_fp16")]; tensor var_4620_split_sizes_0 = const()[name = string("op_4620_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_4620_axis_0 = const()[name = string("op_4620_axis_0"), val = int32(1)]; tensor var_4620_cast_fp16_0, tensor var_4620_cast_fp16_1 = split(axis = var_4620_axis_0, split_sizes = var_4620_split_sizes_0, x = out_189_cast_fp16)[name = string("op_4620_cast_fp16")]; string input_31_pad_type_0 = const()[name = string("input_31_pad_type_0"), val = string("valid")]; tensor input_31_strides_0 = const()[name = string("input_31_strides_0"), val = tensor([1, 1])]; tensor input_31_pad_0 = const()[name = string("input_31_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_31_dilations_0 = const()[name = string("input_31_dilations_0"), val = tensor([1, 1])]; int32 input_31_groups_0 = const()[name = string("input_31_groups_0"), val = int32(1)]; tensor var_4625_to_fp16 = const()[name = string("op_4625_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(461526144)))]; tensor input_31_cast_fp16 = conv(dilations = input_31_dilations_0, groups = input_31_groups_0, pad = input_31_pad_0, pad_type = input_31_pad_type_0, strides = input_31_strides_0, weight = var_4625_to_fp16, x = var_4620_cast_fp16_0)[name = string("input_31_cast_fp16")]; tensor var_4636_cast_fp16 = silu(x = input_31_cast_fp16)[name = string("op_4636_cast_fp16")]; string var_4641_pad_type_0 = const()[name = string("op_4641_pad_type_0"), val = string("valid")]; tensor var_4641_strides_0 = const()[name = string("op_4641_strides_0"), val = tensor([1, 1])]; tensor var_4641_pad_0 = const()[name = string("op_4641_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_4641_dilations_0 = const()[name = string("op_4641_dilations_0"), val = tensor([1, 1])]; int32 var_4641_groups_0 = const()[name = string("op_4641_groups_0"), val = int32(1)]; tensor var_4624_to_fp16 = const()[name = string("op_4624_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(469914816)))]; tensor var_4641_cast_fp16 = conv(dilations = var_4641_dilations_0, groups = var_4641_groups_0, pad = var_4641_pad_0, pad_type = var_4641_pad_type_0, strides = var_4641_strides_0, weight = var_4624_to_fp16, x = var_4620_cast_fp16_0)[name = string("op_4641_cast_fp16")]; tensor x_285_cast_fp16 = mul(x = var_4636_cast_fp16, y = var_4641_cast_fp16)[name = string("x_285_cast_fp16")]; string hidden_states_95_pad_type_0 = const()[name = string("hidden_states_95_pad_type_0"), val = string("valid")]; tensor hidden_states_95_strides_0 = const()[name = string("hidden_states_95_strides_0"), val = tensor([1, 1])]; tensor hidden_states_95_pad_0 = const()[name = string("hidden_states_95_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_95_dilations_0 = const()[name = string("hidden_states_95_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_95_groups_0 = const()[name = string("hidden_states_95_groups_0"), val = int32(1)]; tensor var_4623_to_fp16 = const()[name = string("op_4623_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(478303488)))]; tensor hidden_states_95_cast_fp16 = conv(dilations = hidden_states_95_dilations_0, groups = hidden_states_95_groups_0, pad = hidden_states_95_pad_0, pad_type = hidden_states_95_pad_type_0, strides = hidden_states_95_strides_0, weight = var_4623_to_fp16, x = x_285_cast_fp16)[name = string("hidden_states_95_cast_fp16")]; tensor x_287_cast_fp16 = add(x = x_277_cast_fp16, y = hidden_states_95_cast_fp16)[name = string("x_287_cast_fp16")]; int32 var_4654 = const()[name = string("op_4654"), val = int32(1)]; fp16 const_164_promoted_to_fp16 = const()[name = string("const_164_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_4657_cast_fp16 = mul(x = x_287_cast_fp16, y = const_164_promoted_to_fp16)[name = string("op_4657_cast_fp16")]; bool x_289_interleave_0 = const()[name = string("x_289_interleave_0"), val = bool(false)]; tensor x_289_cast_fp16 = concat(axis = var_4654, interleave = x_289_interleave_0, values = (x_287_cast_fp16, var_4657_cast_fp16))[name = string("x_289_cast_fp16")]; tensor out_193_axes_0 = const()[name = string("out_193_axes_0"), val = tensor([1])]; fp16 var_4667_to_fp16 = const()[name = string("op_4667_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_193_cast_fp16 = layer_norm(axes = out_193_axes_0, epsilon = var_4667_to_fp16, x = x_289_cast_fp16)[name = string("out_193_cast_fp16")]; tensor layer_layers_16_input_layernorm_weight_to_fp16 = const()[name = string("layer_layers_16_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(486692160)))]; tensor out_195_cast_fp16 = mul(x = out_193_cast_fp16, y = layer_layers_16_input_layernorm_weight_to_fp16)[name = string("out_195_cast_fp16")]; tensor var_4673_split_sizes_0 = const()[name = string("op_4673_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_4673_axis_0 = const()[name = string("op_4673_axis_0"), val = int32(1)]; tensor var_4673_cast_fp16_0, tensor var_4673_cast_fp16_1 = split(axis = var_4673_axis_0, split_sizes = var_4673_split_sizes_0, x = out_195_cast_fp16)[name = string("op_4673_cast_fp16")]; string query_states_65_pad_type_0 = const()[name = string("query_states_65_pad_type_0"), val = string("valid")]; tensor query_states_65_strides_0 = const()[name = string("query_states_65_strides_0"), val = tensor([1, 1])]; tensor query_states_65_pad_0 = const()[name = string("query_states_65_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_states_65_dilations_0 = const()[name = string("query_states_65_dilations_0"), val = tensor([1, 1])]; int32 query_states_65_groups_0 = const()[name = string("query_states_65_groups_0"), val = int32(1)]; tensor var_4695_to_fp16 = const()[name = string("op_4695_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(486696320)))]; tensor query_states_65_cast_fp16 = conv(dilations = query_states_65_dilations_0, groups = query_states_65_groups_0, pad = query_states_65_pad_0, pad_type = query_states_65_pad_type_0, strides = query_states_65_strides_0, weight = var_4695_to_fp16, x = var_4673_cast_fp16_0)[name = string("query_states_65_cast_fp16")]; string key_states_65_pad_type_0 = const()[name = string("key_states_65_pad_type_0"), val = string("valid")]; tensor key_states_65_strides_0 = const()[name = string("key_states_65_strides_0"), val = tensor([1, 1])]; tensor key_states_65_pad_0 = const()[name = string("key_states_65_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_states_65_dilations_0 = const()[name = string("key_states_65_dilations_0"), val = tensor([1, 1])]; int32 key_states_65_groups_0 = const()[name = string("key_states_65_groups_0"), val = int32(1)]; tensor var_4706_to_fp16 = const()[name = string("op_4706_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(488793536)))]; tensor key_states_65_cast_fp16 = conv(dilations = key_states_65_dilations_0, groups = key_states_65_groups_0, pad = key_states_65_pad_0, pad_type = key_states_65_pad_type_0, strides = key_states_65_strides_0, weight = var_4706_to_fp16, x = var_4673_cast_fp16_0)[name = string("key_states_65_cast_fp16")]; string value_states_65_pad_type_0 = const()[name = string("value_states_65_pad_type_0"), val = string("valid")]; tensor value_states_65_strides_0 = const()[name = string("value_states_65_strides_0"), val = tensor([1, 1])]; tensor value_states_65_pad_0 = const()[name = string("value_states_65_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_states_65_dilations_0 = const()[name = string("value_states_65_dilations_0"), val = tensor([1, 1])]; int32 value_states_65_groups_0 = const()[name = string("value_states_65_groups_0"), val = int32(1)]; tensor var_4717_to_fp16 = const()[name = string("op_4717_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(489055744)))]; tensor value_states_65_cast_fp16 = conv(dilations = value_states_65_dilations_0, groups = value_states_65_groups_0, pad = value_states_65_pad_0, pad_type = value_states_65_pad_type_0, strides = value_states_65_strides_0, weight = var_4717_to_fp16, x = var_4673_cast_fp16_0)[name = string("value_states_65_cast_fp16")]; tensor var_4725 = const()[name = string("op_4725"), val = tensor([1, 16, 64, 128])]; tensor embed_65_cast_fp16 = reshape(shape = var_4725, x = query_states_65_cast_fp16)[name = string("embed_65_cast_fp16")]; tensor var_4729 = const()[name = string("op_4729"), val = tensor([1, 2, 64, 128])]; tensor var_4730_cast_fp16 = reshape(shape = var_4729, x = key_states_65_cast_fp16)[name = string("op_4730_cast_fp16")]; tensor embed_67_perm_0 = const()[name = string("embed_67_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_4736 = const()[name = string("op_4736"), val = tensor([1, 2, 64, 128])]; tensor var_4737_cast_fp16 = reshape(shape = var_4736, x = value_states_65_cast_fp16)[name = string("op_4737_cast_fp16")]; tensor value_states_67_perm_0 = const()[name = string("value_states_67_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_4741_cast_fp16 = mul(x = embed_65_cast_fp16, y = cos_1_cast_fp16)[name = string("op_4741_cast_fp16")]; tensor var_4742_split_sizes_0 = const()[name = string("op_4742_split_sizes_0"), val = tensor([32, 32])]; int32 var_4742_axis_0 = const()[name = string("op_4742_axis_0"), val = int32(-2)]; tensor var_4742_cast_fp16_0, tensor var_4742_cast_fp16_1 = split(axis = var_4742_axis_0, split_sizes = var_4742_split_sizes_0, x = embed_65_cast_fp16)[name = string("op_4742_cast_fp16")]; fp16 const_167_promoted_to_fp16 = const()[name = string("const_167_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_4744_cast_fp16 = mul(x = var_4742_cast_fp16_1, y = const_167_promoted_to_fp16)[name = string("op_4744_cast_fp16")]; int32 var_4746 = const()[name = string("op_4746"), val = int32(-2)]; bool var_4747_interleave_0 = const()[name = string("op_4747_interleave_0"), val = bool(false)]; tensor var_4747_cast_fp16 = concat(axis = var_4746, interleave = var_4747_interleave_0, values = (var_4744_cast_fp16, var_4742_cast_fp16_0))[name = string("op_4747_cast_fp16")]; tensor var_4748_cast_fp16 = mul(x = var_4747_cast_fp16, y = sin_1_cast_fp16)[name = string("op_4748_cast_fp16")]; tensor query_states_67_cast_fp16 = add(x = var_4741_cast_fp16, y = var_4748_cast_fp16)[name = string("query_states_67_cast_fp16")]; tensor embed_67_cast_fp16 = transpose(perm = embed_67_perm_0, x = var_4730_cast_fp16)[name = string("transpose_23")]; tensor var_4751_cast_fp16 = mul(x = embed_67_cast_fp16, y = cos_cast_fp16)[name = string("op_4751_cast_fp16")]; tensor var_4752_split_sizes_0 = const()[name = string("op_4752_split_sizes_0"), val = tensor([32, 32])]; int32 var_4752_axis_0 = const()[name = string("op_4752_axis_0"), val = int32(-1)]; tensor var_4752_cast_fp16_0, tensor var_4752_cast_fp16_1 = split(axis = var_4752_axis_0, split_sizes = var_4752_split_sizes_0, x = embed_67_cast_fp16)[name = string("op_4752_cast_fp16")]; fp16 const_168_promoted_to_fp16 = const()[name = string("const_168_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_4754_cast_fp16 = mul(x = var_4752_cast_fp16_1, y = const_168_promoted_to_fp16)[name = string("op_4754_cast_fp16")]; int32 var_4756 = const()[name = string("op_4756"), val = int32(-1)]; bool var_4757_interleave_0 = const()[name = string("op_4757_interleave_0"), val = bool(false)]; tensor var_4757_cast_fp16 = concat(axis = var_4756, interleave = var_4757_interleave_0, values = (var_4754_cast_fp16, var_4752_cast_fp16_0))[name = string("op_4757_cast_fp16")]; tensor var_4758_cast_fp16 = mul(x = var_4757_cast_fp16, y = sin_cast_fp16)[name = string("op_4758_cast_fp16")]; tensor key_states_67_cast_fp16 = add(x = var_4751_cast_fp16, y = var_4758_cast_fp16)[name = string("key_states_67_cast_fp16")]; tensor expand_dims_161 = const()[name = string("expand_dims_161"), val = tensor([16])]; tensor expand_dims_162 = const()[name = string("expand_dims_162"), val = tensor([0])]; tensor expand_dims_164 = const()[name = string("expand_dims_164"), val = tensor([17])]; tensor concat_131_values3_0 = const()[name = string("concat_131_values3_0"), val = tensor([0])]; int32 concat_131_axis_0 = const()[name = string("concat_131_axis_0"), val = int32(0)]; bool concat_131_interleave_0 = const()[name = string("concat_131_interleave_0"), val = bool(false)]; tensor concat_131 = concat(axis = concat_131_axis_0, interleave = concat_131_interleave_0, values = (expand_dims_161, expand_dims_162, position_id, concat_131_values3_0))[name = string("concat_131")]; tensor concat_132_values1_0 = const()[name = string("concat_132_values1_0"), val = tensor([0])]; tensor concat_132_values3_0 = const()[name = string("concat_132_values3_0"), val = tensor([0])]; int32 concat_132_axis_0 = const()[name = string("concat_132_axis_0"), val = int32(0)]; bool concat_132_interleave_0 = const()[name = string("concat_132_interleave_0"), val = bool(false)]; tensor concat_132 = concat(axis = concat_132_axis_0, interleave = concat_132_interleave_0, values = (expand_dims_164, concat_132_values1_0, var_426, concat_132_values3_0))[name = string("concat_132")]; tensor key_cache_internal_tensor_assign_17_stride_0 = const()[name = string("key_cache_internal_tensor_assign_17_stride_0"), val = tensor([1, 1, 1, 1])]; tensor key_cache_internal_tensor_assign_17_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_17_begin_mask_0"), val = tensor([false, false, false, true])]; tensor key_cache_internal_tensor_assign_17_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_17_end_mask_0"), val = tensor([false, true, false, true])]; tensor key_cache_internal_tensor_assign_17_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_17_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor key_cache_internal_tensor_assign_17_cast_fp16 = slice_update(begin = concat_131, begin_mask = key_cache_internal_tensor_assign_17_begin_mask_0, end = concat_132, end_mask = key_cache_internal_tensor_assign_17_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_17_squeeze_mask_0, stride = key_cache_internal_tensor_assign_17_stride_0, update = key_states_67_cast_fp16, x = coreml_update_state_78)[name = string("key_cache_internal_tensor_assign_17_cast_fp16")]; write_state(data = key_cache_internal_tensor_assign_17_cast_fp16, input = key_cache)[name = string("coreml_update_state_150_write_state")]; tensor coreml_update_state_80 = read_state(input = key_cache)[name = string("coreml_update_state_150")]; tensor value_cache_internal_tensor_assign_17_stride_0 = const()[name = string("value_cache_internal_tensor_assign_17_stride_0"), val = tensor([1, 1, 1, 1])]; tensor value_cache_internal_tensor_assign_17_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_17_begin_mask_0"), val = tensor([false, false, false, true])]; tensor value_cache_internal_tensor_assign_17_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_17_end_mask_0"), val = tensor([false, true, false, true])]; tensor value_cache_internal_tensor_assign_17_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_17_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor value_states_67_cast_fp16 = transpose(perm = value_states_67_perm_0, x = var_4737_cast_fp16)[name = string("transpose_22")]; tensor value_cache_internal_tensor_assign_17_cast_fp16 = slice_update(begin = concat_131, begin_mask = value_cache_internal_tensor_assign_17_begin_mask_0, end = concat_132, end_mask = value_cache_internal_tensor_assign_17_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_17_squeeze_mask_0, stride = value_cache_internal_tensor_assign_17_stride_0, update = value_states_67_cast_fp16, x = coreml_update_state_79)[name = string("value_cache_internal_tensor_assign_17_cast_fp16")]; write_state(data = value_cache_internal_tensor_assign_17_cast_fp16, input = value_cache)[name = string("coreml_update_state_151_write_state")]; tensor coreml_update_state_81 = read_state(input = value_cache)[name = string("coreml_update_state_151")]; tensor var_4801_begin_0 = const()[name = string("op_4801_begin_0"), val = tensor([16, 0, 0, 0])]; tensor var_4801_end_0 = const()[name = string("op_4801_end_0"), val = tensor([17, 2, 2048, 64])]; tensor var_4801_end_mask_0 = const()[name = string("op_4801_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_4801_cast_fp16 = slice_by_index(begin = var_4801_begin_0, end = var_4801_end_0, end_mask = var_4801_end_mask_0, x = coreml_update_state_80)[name = string("op_4801_cast_fp16")]; tensor tile_32 = const()[name = string("tile_32"), val = tensor([1, 1])]; int32 var_4804_axis_0 = const()[name = string("op_4804_axis_0"), val = int32(1)]; tensor var_4804_cast_fp16_0, tensor var_4804_cast_fp16_1 = split(axis = var_4804_axis_0, split_sizes = tile_32, x = var_4801_cast_fp16)[name = string("op_4804_cast_fp16")]; tensor var_4811_begin_0 = const()[name = string("op_4811_begin_0"), val = tensor([16, 0, 0, 0])]; tensor var_4811_end_0 = const()[name = string("op_4811_end_0"), val = tensor([17, 2, 2048, 64])]; tensor var_4811_end_mask_0 = const()[name = string("op_4811_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_4811_cast_fp16 = slice_by_index(begin = var_4811_begin_0, end = var_4811_end_0, end_mask = var_4811_end_mask_0, x = coreml_update_state_81)[name = string("op_4811_cast_fp16")]; tensor tile_33 = const()[name = string("tile_33"), val = tensor([1, 1])]; int32 var_4814_axis_0 = const()[name = string("op_4814_axis_0"), val = int32(1)]; tensor var_4814_cast_fp16_0, tensor var_4814_cast_fp16_1 = split(axis = var_4814_axis_0, split_sizes = tile_33, x = var_4811_cast_fp16)[name = string("op_4814_cast_fp16")]; tensor var_4817_split_sizes_0 = const()[name = string("op_4817_split_sizes_0"), val = tensor([8, 8])]; int32 var_4817_axis_0 = const()[name = string("op_4817_axis_0"), val = int32(1)]; tensor var_4817_cast_fp16_0, tensor var_4817_cast_fp16_1 = split(axis = var_4817_axis_0, split_sizes = var_4817_split_sizes_0, x = query_states_67_cast_fp16)[name = string("op_4817_cast_fp16")]; bool attn_weights_257_transpose_x_0 = const()[name = string("attn_weights_257_transpose_x_0"), val = bool(false)]; bool attn_weights_257_transpose_y_0 = const()[name = string("attn_weights_257_transpose_y_0"), val = bool(false)]; tensor attn_weights_257_cast_fp16 = matmul(transpose_x = attn_weights_257_transpose_x_0, transpose_y = attn_weights_257_transpose_y_0, x = var_4804_cast_fp16_0, y = var_4817_cast_fp16_0)[name = string("attn_weights_257_cast_fp16")]; fp16 _inversed_attn_weights_259_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_259_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_259_cast_fp16 = mul(x = attn_weights_257_cast_fp16, y = _inversed_attn_weights_259_y_0_to_fp16)[name = string("_inversed_attn_weights_259_cast_fp16")]; tensor attn_weights_261_cast_fp16 = add(x = _inversed_attn_weights_259_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_261_cast_fp16")]; int32 var_4824 = const()[name = string("op_4824"), val = int32(2)]; tensor attn_weights_263_cast_fp16 = softmax(axis = var_4824, x = attn_weights_261_cast_fp16)[name = string("attn_weights_263_cast_fp16")]; bool var_4830_transpose_x_1 = const()[name = string("op_4830_transpose_x_1"), val = bool(true)]; bool var_4830_transpose_y_1 = const()[name = string("op_4830_transpose_y_1"), val = bool(false)]; tensor var_4830_cast_fp16 = matmul(transpose_x = var_4830_transpose_x_1, transpose_y = var_4830_transpose_y_1, x = attn_weights_263_cast_fp16, y = var_4814_cast_fp16_0)[name = string("op_4830_cast_fp16")]; bool attn_weights_265_transpose_x_0 = const()[name = string("attn_weights_265_transpose_x_0"), val = bool(false)]; bool attn_weights_265_transpose_y_0 = const()[name = string("attn_weights_265_transpose_y_0"), val = bool(false)]; tensor attn_weights_265_cast_fp16 = matmul(transpose_x = attn_weights_265_transpose_x_0, transpose_y = attn_weights_265_transpose_y_0, x = var_4804_cast_fp16_1, y = var_4817_cast_fp16_1)[name = string("attn_weights_265_cast_fp16")]; fp16 _inversed_attn_weights_267_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_267_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_267_cast_fp16 = mul(x = attn_weights_265_cast_fp16, y = _inversed_attn_weights_267_y_0_to_fp16)[name = string("_inversed_attn_weights_267_cast_fp16")]; tensor attn_weights_269_cast_fp16 = add(x = _inversed_attn_weights_267_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_269_cast_fp16")]; int32 var_4836 = const()[name = string("op_4836"), val = int32(2)]; tensor attn_weights_271_cast_fp16 = softmax(axis = var_4836, x = attn_weights_269_cast_fp16)[name = string("attn_weights_271_cast_fp16")]; bool attn_output_97_transpose_x_1 = const()[name = string("attn_output_97_transpose_x_1"), val = bool(true)]; bool attn_output_97_transpose_y_1 = const()[name = string("attn_output_97_transpose_y_1"), val = bool(false)]; tensor attn_output_97_cast_fp16 = matmul(transpose_x = attn_output_97_transpose_x_1, transpose_y = attn_output_97_transpose_y_1, x = attn_weights_271_cast_fp16, y = var_4814_cast_fp16_1)[name = string("attn_output_97_cast_fp16")]; int32 var_4844 = const()[name = string("op_4844"), val = int32(1)]; bool attn_output_99_interleave_0 = const()[name = string("attn_output_99_interleave_0"), val = bool(false)]; tensor attn_output_99_cast_fp16 = concat(axis = var_4844, interleave = attn_output_99_interleave_0, values = (var_4830_cast_fp16, attn_output_97_cast_fp16))[name = string("attn_output_99_cast_fp16")]; tensor var_4848_perm_0 = const()[name = string("op_4848_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_4853 = const()[name = string("op_4853"), val = tensor([1, 1024, 1, 128])]; tensor var_4848_cast_fp16 = transpose(perm = var_4848_perm_0, x = attn_output_99_cast_fp16)[name = string("transpose_21")]; tensor x_293_cast_fp16 = reshape(shape = var_4853, x = var_4848_cast_fp16)[name = string("x_293_cast_fp16")]; string hidden_states_99_pad_type_0 = const()[name = string("hidden_states_99_pad_type_0"), val = string("valid")]; tensor hidden_states_99_strides_0 = const()[name = string("hidden_states_99_strides_0"), val = tensor([1, 1])]; tensor hidden_states_99_pad_0 = const()[name = string("hidden_states_99_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_99_dilations_0 = const()[name = string("hidden_states_99_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_99_groups_0 = const()[name = string("hidden_states_99_groups_0"), val = int32(1)]; tensor var_4860_to_fp16 = const()[name = string("op_4860_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(489317952)))]; tensor hidden_states_99_cast_fp16 = conv(dilations = hidden_states_99_dilations_0, groups = hidden_states_99_groups_0, pad = hidden_states_99_pad_0, pad_type = hidden_states_99_pad_type_0, strides = hidden_states_99_strides_0, weight = var_4860_to_fp16, x = x_293_cast_fp16)[name = string("hidden_states_99_cast_fp16")]; tensor x_295_cast_fp16 = add(x = x_287_cast_fp16, y = hidden_states_99_cast_fp16)[name = string("x_295_cast_fp16")]; int32 var_4872 = const()[name = string("op_4872"), val = int32(1)]; fp16 const_173_promoted_to_fp16 = const()[name = string("const_173_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_4875_cast_fp16 = mul(x = x_295_cast_fp16, y = const_173_promoted_to_fp16)[name = string("op_4875_cast_fp16")]; bool x_297_interleave_0 = const()[name = string("x_297_interleave_0"), val = bool(false)]; tensor x_297_cast_fp16 = concat(axis = var_4872, interleave = x_297_interleave_0, values = (x_295_cast_fp16, var_4875_cast_fp16))[name = string("x_297_cast_fp16")]; tensor out_199_axes_0 = const()[name = string("out_199_axes_0"), val = tensor([1])]; fp16 var_4885_to_fp16 = const()[name = string("op_4885_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_199_cast_fp16 = layer_norm(axes = out_199_axes_0, epsilon = var_4885_to_fp16, x = x_297_cast_fp16)[name = string("out_199_cast_fp16")]; tensor layer_layers_16_post_attention_layernorm_weight_to_fp16 = const()[name = string("layer_layers_16_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(491415168)))]; tensor out_201_cast_fp16 = mul(x = out_199_cast_fp16, y = layer_layers_16_post_attention_layernorm_weight_to_fp16)[name = string("out_201_cast_fp16")]; tensor var_4891_split_sizes_0 = const()[name = string("op_4891_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_4891_axis_0 = const()[name = string("op_4891_axis_0"), val = int32(1)]; tensor var_4891_cast_fp16_0, tensor var_4891_cast_fp16_1 = split(axis = var_4891_axis_0, split_sizes = var_4891_split_sizes_0, x = out_201_cast_fp16)[name = string("op_4891_cast_fp16")]; string input_33_pad_type_0 = const()[name = string("input_33_pad_type_0"), val = string("valid")]; tensor input_33_strides_0 = const()[name = string("input_33_strides_0"), val = tensor([1, 1])]; tensor input_33_pad_0 = const()[name = string("input_33_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_33_dilations_0 = const()[name = string("input_33_dilations_0"), val = tensor([1, 1])]; int32 input_33_groups_0 = const()[name = string("input_33_groups_0"), val = int32(1)]; tensor var_4896_to_fp16 = const()[name = string("op_4896_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(491419328)))]; tensor input_33_cast_fp16 = conv(dilations = input_33_dilations_0, groups = input_33_groups_0, pad = input_33_pad_0, pad_type = input_33_pad_type_0, strides = input_33_strides_0, weight = var_4896_to_fp16, x = var_4891_cast_fp16_0)[name = string("input_33_cast_fp16")]; tensor var_4907_cast_fp16 = silu(x = input_33_cast_fp16)[name = string("op_4907_cast_fp16")]; string var_4912_pad_type_0 = const()[name = string("op_4912_pad_type_0"), val = string("valid")]; tensor var_4912_strides_0 = const()[name = string("op_4912_strides_0"), val = tensor([1, 1])]; tensor var_4912_pad_0 = const()[name = string("op_4912_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_4912_dilations_0 = const()[name = string("op_4912_dilations_0"), val = tensor([1, 1])]; int32 var_4912_groups_0 = const()[name = string("op_4912_groups_0"), val = int32(1)]; tensor var_4895_to_fp16 = const()[name = string("op_4895_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(499808000)))]; tensor var_4912_cast_fp16 = conv(dilations = var_4912_dilations_0, groups = var_4912_groups_0, pad = var_4912_pad_0, pad_type = var_4912_pad_type_0, strides = var_4912_strides_0, weight = var_4895_to_fp16, x = var_4891_cast_fp16_0)[name = string("op_4912_cast_fp16")]; tensor x_303_cast_fp16 = mul(x = var_4907_cast_fp16, y = var_4912_cast_fp16)[name = string("x_303_cast_fp16")]; string hidden_states_101_pad_type_0 = const()[name = string("hidden_states_101_pad_type_0"), val = string("valid")]; tensor hidden_states_101_strides_0 = const()[name = string("hidden_states_101_strides_0"), val = tensor([1, 1])]; tensor hidden_states_101_pad_0 = const()[name = string("hidden_states_101_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_101_dilations_0 = const()[name = string("hidden_states_101_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_101_groups_0 = const()[name = string("hidden_states_101_groups_0"), val = int32(1)]; tensor var_4894_to_fp16 = const()[name = string("op_4894_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(508196672)))]; tensor hidden_states_101_cast_fp16 = conv(dilations = hidden_states_101_dilations_0, groups = hidden_states_101_groups_0, pad = hidden_states_101_pad_0, pad_type = hidden_states_101_pad_type_0, strides = hidden_states_101_strides_0, weight = var_4894_to_fp16, x = x_303_cast_fp16)[name = string("hidden_states_101_cast_fp16")]; tensor x_305_cast_fp16 = add(x = x_295_cast_fp16, y = hidden_states_101_cast_fp16)[name = string("x_305_cast_fp16")]; int32 var_4925 = const()[name = string("op_4925"), val = int32(1)]; fp16 const_174_promoted_to_fp16 = const()[name = string("const_174_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_4928_cast_fp16 = mul(x = x_305_cast_fp16, y = const_174_promoted_to_fp16)[name = string("op_4928_cast_fp16")]; bool x_307_interleave_0 = const()[name = string("x_307_interleave_0"), val = bool(false)]; tensor x_307_cast_fp16 = concat(axis = var_4925, interleave = x_307_interleave_0, values = (x_305_cast_fp16, var_4928_cast_fp16))[name = string("x_307_cast_fp16")]; tensor out_205_axes_0 = const()[name = string("out_205_axes_0"), val = tensor([1])]; fp16 var_4938_to_fp16 = const()[name = string("op_4938_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_205_cast_fp16 = layer_norm(axes = out_205_axes_0, epsilon = var_4938_to_fp16, x = x_307_cast_fp16)[name = string("out_205_cast_fp16")]; tensor layer_layers_17_input_layernorm_weight_to_fp16 = const()[name = string("layer_layers_17_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(516585344)))]; tensor out_207_cast_fp16 = mul(x = out_205_cast_fp16, y = layer_layers_17_input_layernorm_weight_to_fp16)[name = string("out_207_cast_fp16")]; tensor var_4944_split_sizes_0 = const()[name = string("op_4944_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_4944_axis_0 = const()[name = string("op_4944_axis_0"), val = int32(1)]; tensor var_4944_cast_fp16_0, tensor var_4944_cast_fp16_1 = split(axis = var_4944_axis_0, split_sizes = var_4944_split_sizes_0, x = out_207_cast_fp16)[name = string("op_4944_cast_fp16")]; string query_states_69_pad_type_0 = const()[name = string("query_states_69_pad_type_0"), val = string("valid")]; tensor query_states_69_strides_0 = const()[name = string("query_states_69_strides_0"), val = tensor([1, 1])]; tensor query_states_69_pad_0 = const()[name = string("query_states_69_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_states_69_dilations_0 = const()[name = string("query_states_69_dilations_0"), val = tensor([1, 1])]; int32 query_states_69_groups_0 = const()[name = string("query_states_69_groups_0"), val = int32(1)]; tensor var_4966_to_fp16 = const()[name = string("op_4966_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(516589504)))]; tensor query_states_69_cast_fp16 = conv(dilations = query_states_69_dilations_0, groups = query_states_69_groups_0, pad = query_states_69_pad_0, pad_type = query_states_69_pad_type_0, strides = query_states_69_strides_0, weight = var_4966_to_fp16, x = var_4944_cast_fp16_0)[name = string("query_states_69_cast_fp16")]; string key_states_69_pad_type_0 = const()[name = string("key_states_69_pad_type_0"), val = string("valid")]; tensor key_states_69_strides_0 = const()[name = string("key_states_69_strides_0"), val = tensor([1, 1])]; tensor key_states_69_pad_0 = const()[name = string("key_states_69_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_states_69_dilations_0 = const()[name = string("key_states_69_dilations_0"), val = tensor([1, 1])]; int32 key_states_69_groups_0 = const()[name = string("key_states_69_groups_0"), val = int32(1)]; tensor var_4977_to_fp16 = const()[name = string("op_4977_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(518686720)))]; tensor key_states_69_cast_fp16 = conv(dilations = key_states_69_dilations_0, groups = key_states_69_groups_0, pad = key_states_69_pad_0, pad_type = key_states_69_pad_type_0, strides = key_states_69_strides_0, weight = var_4977_to_fp16, x = var_4944_cast_fp16_0)[name = string("key_states_69_cast_fp16")]; string value_states_69_pad_type_0 = const()[name = string("value_states_69_pad_type_0"), val = string("valid")]; tensor value_states_69_strides_0 = const()[name = string("value_states_69_strides_0"), val = tensor([1, 1])]; tensor value_states_69_pad_0 = const()[name = string("value_states_69_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_states_69_dilations_0 = const()[name = string("value_states_69_dilations_0"), val = tensor([1, 1])]; int32 value_states_69_groups_0 = const()[name = string("value_states_69_groups_0"), val = int32(1)]; tensor var_4988_to_fp16 = const()[name = string("op_4988_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(518948928)))]; tensor value_states_69_cast_fp16 = conv(dilations = value_states_69_dilations_0, groups = value_states_69_groups_0, pad = value_states_69_pad_0, pad_type = value_states_69_pad_type_0, strides = value_states_69_strides_0, weight = var_4988_to_fp16, x = var_4944_cast_fp16_0)[name = string("value_states_69_cast_fp16")]; tensor var_4996 = const()[name = string("op_4996"), val = tensor([1, 16, 64, 128])]; tensor embed_69_cast_fp16 = reshape(shape = var_4996, x = query_states_69_cast_fp16)[name = string("embed_69_cast_fp16")]; tensor var_5000 = const()[name = string("op_5000"), val = tensor([1, 2, 64, 128])]; tensor var_5001_cast_fp16 = reshape(shape = var_5000, x = key_states_69_cast_fp16)[name = string("op_5001_cast_fp16")]; tensor embed_71_perm_0 = const()[name = string("embed_71_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_5007 = const()[name = string("op_5007"), val = tensor([1, 2, 64, 128])]; tensor var_5008_cast_fp16 = reshape(shape = var_5007, x = value_states_69_cast_fp16)[name = string("op_5008_cast_fp16")]; tensor value_states_71_perm_0 = const()[name = string("value_states_71_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_5012_cast_fp16 = mul(x = embed_69_cast_fp16, y = cos_1_cast_fp16)[name = string("op_5012_cast_fp16")]; tensor var_5013_split_sizes_0 = const()[name = string("op_5013_split_sizes_0"), val = tensor([32, 32])]; int32 var_5013_axis_0 = const()[name = string("op_5013_axis_0"), val = int32(-2)]; tensor var_5013_cast_fp16_0, tensor var_5013_cast_fp16_1 = split(axis = var_5013_axis_0, split_sizes = var_5013_split_sizes_0, x = embed_69_cast_fp16)[name = string("op_5013_cast_fp16")]; fp16 const_177_promoted_to_fp16 = const()[name = string("const_177_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_5015_cast_fp16 = mul(x = var_5013_cast_fp16_1, y = const_177_promoted_to_fp16)[name = string("op_5015_cast_fp16")]; int32 var_5017 = const()[name = string("op_5017"), val = int32(-2)]; bool var_5018_interleave_0 = const()[name = string("op_5018_interleave_0"), val = bool(false)]; tensor var_5018_cast_fp16 = concat(axis = var_5017, interleave = var_5018_interleave_0, values = (var_5015_cast_fp16, var_5013_cast_fp16_0))[name = string("op_5018_cast_fp16")]; tensor var_5019_cast_fp16 = mul(x = var_5018_cast_fp16, y = sin_1_cast_fp16)[name = string("op_5019_cast_fp16")]; tensor query_states_71_cast_fp16 = add(x = var_5012_cast_fp16, y = var_5019_cast_fp16)[name = string("query_states_71_cast_fp16")]; tensor embed_71_cast_fp16 = transpose(perm = embed_71_perm_0, x = var_5001_cast_fp16)[name = string("transpose_20")]; tensor var_5022_cast_fp16 = mul(x = embed_71_cast_fp16, y = cos_cast_fp16)[name = string("op_5022_cast_fp16")]; tensor var_5023_split_sizes_0 = const()[name = string("op_5023_split_sizes_0"), val = tensor([32, 32])]; int32 var_5023_axis_0 = const()[name = string("op_5023_axis_0"), val = int32(-1)]; tensor var_5023_cast_fp16_0, tensor var_5023_cast_fp16_1 = split(axis = var_5023_axis_0, split_sizes = var_5023_split_sizes_0, x = embed_71_cast_fp16)[name = string("op_5023_cast_fp16")]; fp16 const_178_promoted_to_fp16 = const()[name = string("const_178_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_5025_cast_fp16 = mul(x = var_5023_cast_fp16_1, y = const_178_promoted_to_fp16)[name = string("op_5025_cast_fp16")]; int32 var_5027 = const()[name = string("op_5027"), val = int32(-1)]; bool var_5028_interleave_0 = const()[name = string("op_5028_interleave_0"), val = bool(false)]; tensor var_5028_cast_fp16 = concat(axis = var_5027, interleave = var_5028_interleave_0, values = (var_5025_cast_fp16, var_5023_cast_fp16_0))[name = string("op_5028_cast_fp16")]; tensor var_5029_cast_fp16 = mul(x = var_5028_cast_fp16, y = sin_cast_fp16)[name = string("op_5029_cast_fp16")]; tensor key_states_71_cast_fp16 = add(x = var_5022_cast_fp16, y = var_5029_cast_fp16)[name = string("key_states_71_cast_fp16")]; tensor expand_dims_171 = const()[name = string("expand_dims_171"), val = tensor([17])]; tensor expand_dims_172 = const()[name = string("expand_dims_172"), val = tensor([0])]; tensor expand_dims_174 = const()[name = string("expand_dims_174"), val = tensor([18])]; tensor concat_139_values3_0 = const()[name = string("concat_139_values3_0"), val = tensor([0])]; int32 concat_139_axis_0 = const()[name = string("concat_139_axis_0"), val = int32(0)]; bool concat_139_interleave_0 = const()[name = string("concat_139_interleave_0"), val = bool(false)]; tensor concat_139 = concat(axis = concat_139_axis_0, interleave = concat_139_interleave_0, values = (expand_dims_171, expand_dims_172, position_id, concat_139_values3_0))[name = string("concat_139")]; tensor concat_140_values1_0 = const()[name = string("concat_140_values1_0"), val = tensor([0])]; tensor concat_140_values3_0 = const()[name = string("concat_140_values3_0"), val = tensor([0])]; int32 concat_140_axis_0 = const()[name = string("concat_140_axis_0"), val = int32(0)]; bool concat_140_interleave_0 = const()[name = string("concat_140_interleave_0"), val = bool(false)]; tensor concat_140 = concat(axis = concat_140_axis_0, interleave = concat_140_interleave_0, values = (expand_dims_174, concat_140_values1_0, var_426, concat_140_values3_0))[name = string("concat_140")]; tensor key_cache_internal_tensor_assign_18_stride_0 = const()[name = string("key_cache_internal_tensor_assign_18_stride_0"), val = tensor([1, 1, 1, 1])]; tensor key_cache_internal_tensor_assign_18_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_18_begin_mask_0"), val = tensor([false, false, false, true])]; tensor key_cache_internal_tensor_assign_18_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_18_end_mask_0"), val = tensor([false, true, false, true])]; tensor key_cache_internal_tensor_assign_18_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_18_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor key_cache_internal_tensor_assign_18_cast_fp16 = slice_update(begin = concat_139, begin_mask = key_cache_internal_tensor_assign_18_begin_mask_0, end = concat_140, end_mask = key_cache_internal_tensor_assign_18_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_18_squeeze_mask_0, stride = key_cache_internal_tensor_assign_18_stride_0, update = key_states_71_cast_fp16, x = coreml_update_state_80)[name = string("key_cache_internal_tensor_assign_18_cast_fp16")]; write_state(data = key_cache_internal_tensor_assign_18_cast_fp16, input = key_cache)[name = string("coreml_update_state_152_write_state")]; tensor coreml_update_state_82 = read_state(input = key_cache)[name = string("coreml_update_state_152")]; tensor value_cache_internal_tensor_assign_18_stride_0 = const()[name = string("value_cache_internal_tensor_assign_18_stride_0"), val = tensor([1, 1, 1, 1])]; tensor value_cache_internal_tensor_assign_18_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_18_begin_mask_0"), val = tensor([false, false, false, true])]; tensor value_cache_internal_tensor_assign_18_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_18_end_mask_0"), val = tensor([false, true, false, true])]; tensor value_cache_internal_tensor_assign_18_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_18_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor value_states_71_cast_fp16 = transpose(perm = value_states_71_perm_0, x = var_5008_cast_fp16)[name = string("transpose_19")]; tensor value_cache_internal_tensor_assign_18_cast_fp16 = slice_update(begin = concat_139, begin_mask = value_cache_internal_tensor_assign_18_begin_mask_0, end = concat_140, end_mask = value_cache_internal_tensor_assign_18_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_18_squeeze_mask_0, stride = value_cache_internal_tensor_assign_18_stride_0, update = value_states_71_cast_fp16, x = coreml_update_state_81)[name = string("value_cache_internal_tensor_assign_18_cast_fp16")]; write_state(data = value_cache_internal_tensor_assign_18_cast_fp16, input = value_cache)[name = string("coreml_update_state_153_write_state")]; tensor coreml_update_state_83 = read_state(input = value_cache)[name = string("coreml_update_state_153")]; tensor var_5072_begin_0 = const()[name = string("op_5072_begin_0"), val = tensor([17, 0, 0, 0])]; tensor var_5072_end_0 = const()[name = string("op_5072_end_0"), val = tensor([18, 2, 2048, 64])]; tensor var_5072_end_mask_0 = const()[name = string("op_5072_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_5072_cast_fp16 = slice_by_index(begin = var_5072_begin_0, end = var_5072_end_0, end_mask = var_5072_end_mask_0, x = coreml_update_state_82)[name = string("op_5072_cast_fp16")]; tensor tile_34 = const()[name = string("tile_34"), val = tensor([1, 1])]; int32 var_5075_axis_0 = const()[name = string("op_5075_axis_0"), val = int32(1)]; tensor var_5075_cast_fp16_0, tensor var_5075_cast_fp16_1 = split(axis = var_5075_axis_0, split_sizes = tile_34, x = var_5072_cast_fp16)[name = string("op_5075_cast_fp16")]; tensor var_5082_begin_0 = const()[name = string("op_5082_begin_0"), val = tensor([17, 0, 0, 0])]; tensor var_5082_end_0 = const()[name = string("op_5082_end_0"), val = tensor([18, 2, 2048, 64])]; tensor var_5082_end_mask_0 = const()[name = string("op_5082_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_5082_cast_fp16 = slice_by_index(begin = var_5082_begin_0, end = var_5082_end_0, end_mask = var_5082_end_mask_0, x = coreml_update_state_83)[name = string("op_5082_cast_fp16")]; tensor tile_35 = const()[name = string("tile_35"), val = tensor([1, 1])]; int32 var_5085_axis_0 = const()[name = string("op_5085_axis_0"), val = int32(1)]; tensor var_5085_cast_fp16_0, tensor var_5085_cast_fp16_1 = split(axis = var_5085_axis_0, split_sizes = tile_35, x = var_5082_cast_fp16)[name = string("op_5085_cast_fp16")]; tensor var_5088_split_sizes_0 = const()[name = string("op_5088_split_sizes_0"), val = tensor([8, 8])]; int32 var_5088_axis_0 = const()[name = string("op_5088_axis_0"), val = int32(1)]; tensor var_5088_cast_fp16_0, tensor var_5088_cast_fp16_1 = split(axis = var_5088_axis_0, split_sizes = var_5088_split_sizes_0, x = query_states_71_cast_fp16)[name = string("op_5088_cast_fp16")]; bool attn_weights_273_transpose_x_0 = const()[name = string("attn_weights_273_transpose_x_0"), val = bool(false)]; bool attn_weights_273_transpose_y_0 = const()[name = string("attn_weights_273_transpose_y_0"), val = bool(false)]; tensor attn_weights_273_cast_fp16 = matmul(transpose_x = attn_weights_273_transpose_x_0, transpose_y = attn_weights_273_transpose_y_0, x = var_5075_cast_fp16_0, y = var_5088_cast_fp16_0)[name = string("attn_weights_273_cast_fp16")]; fp16 _inversed_attn_weights_275_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_275_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_275_cast_fp16 = mul(x = attn_weights_273_cast_fp16, y = _inversed_attn_weights_275_y_0_to_fp16)[name = string("_inversed_attn_weights_275_cast_fp16")]; tensor attn_weights_277_cast_fp16 = add(x = _inversed_attn_weights_275_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_277_cast_fp16")]; int32 var_5095 = const()[name = string("op_5095"), val = int32(2)]; tensor attn_weights_279_cast_fp16 = softmax(axis = var_5095, x = attn_weights_277_cast_fp16)[name = string("attn_weights_279_cast_fp16")]; bool var_5101_transpose_x_1 = const()[name = string("op_5101_transpose_x_1"), val = bool(true)]; bool var_5101_transpose_y_1 = const()[name = string("op_5101_transpose_y_1"), val = bool(false)]; tensor var_5101_cast_fp16 = matmul(transpose_x = var_5101_transpose_x_1, transpose_y = var_5101_transpose_y_1, x = attn_weights_279_cast_fp16, y = var_5085_cast_fp16_0)[name = string("op_5101_cast_fp16")]; bool attn_weights_281_transpose_x_0 = const()[name = string("attn_weights_281_transpose_x_0"), val = bool(false)]; bool attn_weights_281_transpose_y_0 = const()[name = string("attn_weights_281_transpose_y_0"), val = bool(false)]; tensor attn_weights_281_cast_fp16 = matmul(transpose_x = attn_weights_281_transpose_x_0, transpose_y = attn_weights_281_transpose_y_0, x = var_5075_cast_fp16_1, y = var_5088_cast_fp16_1)[name = string("attn_weights_281_cast_fp16")]; fp16 _inversed_attn_weights_283_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_283_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_283_cast_fp16 = mul(x = attn_weights_281_cast_fp16, y = _inversed_attn_weights_283_y_0_to_fp16)[name = string("_inversed_attn_weights_283_cast_fp16")]; tensor attn_weights_285_cast_fp16 = add(x = _inversed_attn_weights_283_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_285_cast_fp16")]; int32 var_5107 = const()[name = string("op_5107"), val = int32(2)]; tensor attn_weights_287_cast_fp16 = softmax(axis = var_5107, x = attn_weights_285_cast_fp16)[name = string("attn_weights_287_cast_fp16")]; bool attn_output_103_transpose_x_1 = const()[name = string("attn_output_103_transpose_x_1"), val = bool(true)]; bool attn_output_103_transpose_y_1 = const()[name = string("attn_output_103_transpose_y_1"), val = bool(false)]; tensor attn_output_103_cast_fp16 = matmul(transpose_x = attn_output_103_transpose_x_1, transpose_y = attn_output_103_transpose_y_1, x = attn_weights_287_cast_fp16, y = var_5085_cast_fp16_1)[name = string("attn_output_103_cast_fp16")]; int32 var_5115 = const()[name = string("op_5115"), val = int32(1)]; bool attn_output_105_interleave_0 = const()[name = string("attn_output_105_interleave_0"), val = bool(false)]; tensor attn_output_105_cast_fp16 = concat(axis = var_5115, interleave = attn_output_105_interleave_0, values = (var_5101_cast_fp16, attn_output_103_cast_fp16))[name = string("attn_output_105_cast_fp16")]; tensor var_5119_perm_0 = const()[name = string("op_5119_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_5124 = const()[name = string("op_5124"), val = tensor([1, 1024, 1, 128])]; tensor var_5119_cast_fp16 = transpose(perm = var_5119_perm_0, x = attn_output_105_cast_fp16)[name = string("transpose_18")]; tensor x_311_cast_fp16 = reshape(shape = var_5124, x = var_5119_cast_fp16)[name = string("x_311_cast_fp16")]; string hidden_states_105_pad_type_0 = const()[name = string("hidden_states_105_pad_type_0"), val = string("valid")]; tensor hidden_states_105_strides_0 = const()[name = string("hidden_states_105_strides_0"), val = tensor([1, 1])]; tensor hidden_states_105_pad_0 = const()[name = string("hidden_states_105_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_105_dilations_0 = const()[name = string("hidden_states_105_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_105_groups_0 = const()[name = string("hidden_states_105_groups_0"), val = int32(1)]; tensor var_5131_to_fp16 = const()[name = string("op_5131_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(519211136)))]; tensor hidden_states_105_cast_fp16 = conv(dilations = hidden_states_105_dilations_0, groups = hidden_states_105_groups_0, pad = hidden_states_105_pad_0, pad_type = hidden_states_105_pad_type_0, strides = hidden_states_105_strides_0, weight = var_5131_to_fp16, x = x_311_cast_fp16)[name = string("hidden_states_105_cast_fp16")]; tensor x_313_cast_fp16 = add(x = x_305_cast_fp16, y = hidden_states_105_cast_fp16)[name = string("x_313_cast_fp16")]; int32 var_5143 = const()[name = string("op_5143"), val = int32(1)]; fp16 const_183_promoted_to_fp16 = const()[name = string("const_183_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_5146_cast_fp16 = mul(x = x_313_cast_fp16, y = const_183_promoted_to_fp16)[name = string("op_5146_cast_fp16")]; bool x_315_interleave_0 = const()[name = string("x_315_interleave_0"), val = bool(false)]; tensor x_315_cast_fp16 = concat(axis = var_5143, interleave = x_315_interleave_0, values = (x_313_cast_fp16, var_5146_cast_fp16))[name = string("x_315_cast_fp16")]; tensor out_211_axes_0 = const()[name = string("out_211_axes_0"), val = tensor([1])]; fp16 var_5156_to_fp16 = const()[name = string("op_5156_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_211_cast_fp16 = layer_norm(axes = out_211_axes_0, epsilon = var_5156_to_fp16, x = x_315_cast_fp16)[name = string("out_211_cast_fp16")]; tensor layer_layers_17_post_attention_layernorm_weight_to_fp16 = const()[name = string("layer_layers_17_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(521308352)))]; tensor out_213_cast_fp16 = mul(x = out_211_cast_fp16, y = layer_layers_17_post_attention_layernorm_weight_to_fp16)[name = string("out_213_cast_fp16")]; tensor var_5162_split_sizes_0 = const()[name = string("op_5162_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_5162_axis_0 = const()[name = string("op_5162_axis_0"), val = int32(1)]; tensor var_5162_cast_fp16_0, tensor var_5162_cast_fp16_1 = split(axis = var_5162_axis_0, split_sizes = var_5162_split_sizes_0, x = out_213_cast_fp16)[name = string("op_5162_cast_fp16")]; string input_35_pad_type_0 = const()[name = string("input_35_pad_type_0"), val = string("valid")]; tensor input_35_strides_0 = const()[name = string("input_35_strides_0"), val = tensor([1, 1])]; tensor input_35_pad_0 = const()[name = string("input_35_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_35_dilations_0 = const()[name = string("input_35_dilations_0"), val = tensor([1, 1])]; int32 input_35_groups_0 = const()[name = string("input_35_groups_0"), val = int32(1)]; tensor var_5167_to_fp16 = const()[name = string("op_5167_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(521312512)))]; tensor input_35_cast_fp16 = conv(dilations = input_35_dilations_0, groups = input_35_groups_0, pad = input_35_pad_0, pad_type = input_35_pad_type_0, strides = input_35_strides_0, weight = var_5167_to_fp16, x = var_5162_cast_fp16_0)[name = string("input_35_cast_fp16")]; tensor var_5178_cast_fp16 = silu(x = input_35_cast_fp16)[name = string("op_5178_cast_fp16")]; string var_5183_pad_type_0 = const()[name = string("op_5183_pad_type_0"), val = string("valid")]; tensor var_5183_strides_0 = const()[name = string("op_5183_strides_0"), val = tensor([1, 1])]; tensor var_5183_pad_0 = const()[name = string("op_5183_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_5183_dilations_0 = const()[name = string("op_5183_dilations_0"), val = tensor([1, 1])]; int32 var_5183_groups_0 = const()[name = string("op_5183_groups_0"), val = int32(1)]; tensor var_5166_to_fp16 = const()[name = string("op_5166_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(529701184)))]; tensor var_5183_cast_fp16 = conv(dilations = var_5183_dilations_0, groups = var_5183_groups_0, pad = var_5183_pad_0, pad_type = var_5183_pad_type_0, strides = var_5183_strides_0, weight = var_5166_to_fp16, x = var_5162_cast_fp16_0)[name = string("op_5183_cast_fp16")]; tensor x_321_cast_fp16 = mul(x = var_5178_cast_fp16, y = var_5183_cast_fp16)[name = string("x_321_cast_fp16")]; string hidden_states_107_pad_type_0 = const()[name = string("hidden_states_107_pad_type_0"), val = string("valid")]; tensor hidden_states_107_strides_0 = const()[name = string("hidden_states_107_strides_0"), val = tensor([1, 1])]; tensor hidden_states_107_pad_0 = const()[name = string("hidden_states_107_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_107_dilations_0 = const()[name = string("hidden_states_107_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_107_groups_0 = const()[name = string("hidden_states_107_groups_0"), val = int32(1)]; tensor var_5165_to_fp16 = const()[name = string("op_5165_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(538089856)))]; tensor hidden_states_107_cast_fp16 = conv(dilations = hidden_states_107_dilations_0, groups = hidden_states_107_groups_0, pad = hidden_states_107_pad_0, pad_type = hidden_states_107_pad_type_0, strides = hidden_states_107_strides_0, weight = var_5165_to_fp16, x = x_321_cast_fp16)[name = string("hidden_states_107_cast_fp16")]; tensor x_323_cast_fp16 = add(x = x_313_cast_fp16, y = hidden_states_107_cast_fp16)[name = string("x_323_cast_fp16")]; int32 var_5196 = const()[name = string("op_5196"), val = int32(1)]; fp16 const_184_promoted_to_fp16 = const()[name = string("const_184_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_5199_cast_fp16 = mul(x = x_323_cast_fp16, y = const_184_promoted_to_fp16)[name = string("op_5199_cast_fp16")]; bool x_325_interleave_0 = const()[name = string("x_325_interleave_0"), val = bool(false)]; tensor x_325_cast_fp16 = concat(axis = var_5196, interleave = x_325_interleave_0, values = (x_323_cast_fp16, var_5199_cast_fp16))[name = string("x_325_cast_fp16")]; tensor out_217_axes_0 = const()[name = string("out_217_axes_0"), val = tensor([1])]; fp16 var_5209_to_fp16 = const()[name = string("op_5209_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_217_cast_fp16 = layer_norm(axes = out_217_axes_0, epsilon = var_5209_to_fp16, x = x_325_cast_fp16)[name = string("out_217_cast_fp16")]; tensor layer_layers_18_input_layernorm_weight_to_fp16 = const()[name = string("layer_layers_18_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(546478528)))]; tensor out_219_cast_fp16 = mul(x = out_217_cast_fp16, y = layer_layers_18_input_layernorm_weight_to_fp16)[name = string("out_219_cast_fp16")]; tensor var_5215_split_sizes_0 = const()[name = string("op_5215_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_5215_axis_0 = const()[name = string("op_5215_axis_0"), val = int32(1)]; tensor var_5215_cast_fp16_0, tensor var_5215_cast_fp16_1 = split(axis = var_5215_axis_0, split_sizes = var_5215_split_sizes_0, x = out_219_cast_fp16)[name = string("op_5215_cast_fp16")]; string query_states_73_pad_type_0 = const()[name = string("query_states_73_pad_type_0"), val = string("valid")]; tensor query_states_73_strides_0 = const()[name = string("query_states_73_strides_0"), val = tensor([1, 1])]; tensor query_states_73_pad_0 = const()[name = string("query_states_73_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_states_73_dilations_0 = const()[name = string("query_states_73_dilations_0"), val = tensor([1, 1])]; int32 query_states_73_groups_0 = const()[name = string("query_states_73_groups_0"), val = int32(1)]; tensor var_5237_to_fp16 = const()[name = string("op_5237_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(546482688)))]; tensor query_states_73_cast_fp16 = conv(dilations = query_states_73_dilations_0, groups = query_states_73_groups_0, pad = query_states_73_pad_0, pad_type = query_states_73_pad_type_0, strides = query_states_73_strides_0, weight = var_5237_to_fp16, x = var_5215_cast_fp16_0)[name = string("query_states_73_cast_fp16")]; string key_states_73_pad_type_0 = const()[name = string("key_states_73_pad_type_0"), val = string("valid")]; tensor key_states_73_strides_0 = const()[name = string("key_states_73_strides_0"), val = tensor([1, 1])]; tensor key_states_73_pad_0 = const()[name = string("key_states_73_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_states_73_dilations_0 = const()[name = string("key_states_73_dilations_0"), val = tensor([1, 1])]; int32 key_states_73_groups_0 = const()[name = string("key_states_73_groups_0"), val = int32(1)]; tensor var_5248_to_fp16 = const()[name = string("op_5248_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(548579904)))]; tensor key_states_73_cast_fp16 = conv(dilations = key_states_73_dilations_0, groups = key_states_73_groups_0, pad = key_states_73_pad_0, pad_type = key_states_73_pad_type_0, strides = key_states_73_strides_0, weight = var_5248_to_fp16, x = var_5215_cast_fp16_0)[name = string("key_states_73_cast_fp16")]; string value_states_73_pad_type_0 = const()[name = string("value_states_73_pad_type_0"), val = string("valid")]; tensor value_states_73_strides_0 = const()[name = string("value_states_73_strides_0"), val = tensor([1, 1])]; tensor value_states_73_pad_0 = const()[name = string("value_states_73_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_states_73_dilations_0 = const()[name = string("value_states_73_dilations_0"), val = tensor([1, 1])]; int32 value_states_73_groups_0 = const()[name = string("value_states_73_groups_0"), val = int32(1)]; tensor var_5259_to_fp16 = const()[name = string("op_5259_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(548842112)))]; tensor value_states_73_cast_fp16 = conv(dilations = value_states_73_dilations_0, groups = value_states_73_groups_0, pad = value_states_73_pad_0, pad_type = value_states_73_pad_type_0, strides = value_states_73_strides_0, weight = var_5259_to_fp16, x = var_5215_cast_fp16_0)[name = string("value_states_73_cast_fp16")]; tensor var_5267 = const()[name = string("op_5267"), val = tensor([1, 16, 64, 128])]; tensor embed_73_cast_fp16 = reshape(shape = var_5267, x = query_states_73_cast_fp16)[name = string("embed_73_cast_fp16")]; tensor var_5271 = const()[name = string("op_5271"), val = tensor([1, 2, 64, 128])]; tensor var_5272_cast_fp16 = reshape(shape = var_5271, x = key_states_73_cast_fp16)[name = string("op_5272_cast_fp16")]; tensor embed_75_perm_0 = const()[name = string("embed_75_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_5278 = const()[name = string("op_5278"), val = tensor([1, 2, 64, 128])]; tensor var_5279_cast_fp16 = reshape(shape = var_5278, x = value_states_73_cast_fp16)[name = string("op_5279_cast_fp16")]; tensor value_states_75_perm_0 = const()[name = string("value_states_75_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_5283_cast_fp16 = mul(x = embed_73_cast_fp16, y = cos_1_cast_fp16)[name = string("op_5283_cast_fp16")]; tensor var_5284_split_sizes_0 = const()[name = string("op_5284_split_sizes_0"), val = tensor([32, 32])]; int32 var_5284_axis_0 = const()[name = string("op_5284_axis_0"), val = int32(-2)]; tensor var_5284_cast_fp16_0, tensor var_5284_cast_fp16_1 = split(axis = var_5284_axis_0, split_sizes = var_5284_split_sizes_0, x = embed_73_cast_fp16)[name = string("op_5284_cast_fp16")]; fp16 const_187_promoted_to_fp16 = const()[name = string("const_187_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_5286_cast_fp16 = mul(x = var_5284_cast_fp16_1, y = const_187_promoted_to_fp16)[name = string("op_5286_cast_fp16")]; int32 var_5288 = const()[name = string("op_5288"), val = int32(-2)]; bool var_5289_interleave_0 = const()[name = string("op_5289_interleave_0"), val = bool(false)]; tensor var_5289_cast_fp16 = concat(axis = var_5288, interleave = var_5289_interleave_0, values = (var_5286_cast_fp16, var_5284_cast_fp16_0))[name = string("op_5289_cast_fp16")]; tensor var_5290_cast_fp16 = mul(x = var_5289_cast_fp16, y = sin_1_cast_fp16)[name = string("op_5290_cast_fp16")]; tensor query_states_75_cast_fp16 = add(x = var_5283_cast_fp16, y = var_5290_cast_fp16)[name = string("query_states_75_cast_fp16")]; tensor embed_75_cast_fp16 = transpose(perm = embed_75_perm_0, x = var_5272_cast_fp16)[name = string("transpose_17")]; tensor var_5293_cast_fp16 = mul(x = embed_75_cast_fp16, y = cos_cast_fp16)[name = string("op_5293_cast_fp16")]; tensor var_5294_split_sizes_0 = const()[name = string("op_5294_split_sizes_0"), val = tensor([32, 32])]; int32 var_5294_axis_0 = const()[name = string("op_5294_axis_0"), val = int32(-1)]; tensor var_5294_cast_fp16_0, tensor var_5294_cast_fp16_1 = split(axis = var_5294_axis_0, split_sizes = var_5294_split_sizes_0, x = embed_75_cast_fp16)[name = string("op_5294_cast_fp16")]; fp16 const_188_promoted_to_fp16 = const()[name = string("const_188_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_5296_cast_fp16 = mul(x = var_5294_cast_fp16_1, y = const_188_promoted_to_fp16)[name = string("op_5296_cast_fp16")]; int32 var_5298 = const()[name = string("op_5298"), val = int32(-1)]; bool var_5299_interleave_0 = const()[name = string("op_5299_interleave_0"), val = bool(false)]; tensor var_5299_cast_fp16 = concat(axis = var_5298, interleave = var_5299_interleave_0, values = (var_5296_cast_fp16, var_5294_cast_fp16_0))[name = string("op_5299_cast_fp16")]; tensor var_5300_cast_fp16 = mul(x = var_5299_cast_fp16, y = sin_cast_fp16)[name = string("op_5300_cast_fp16")]; tensor key_states_75_cast_fp16 = add(x = var_5293_cast_fp16, y = var_5300_cast_fp16)[name = string("key_states_75_cast_fp16")]; tensor expand_dims_181 = const()[name = string("expand_dims_181"), val = tensor([18])]; tensor expand_dims_182 = const()[name = string("expand_dims_182"), val = tensor([0])]; tensor expand_dims_184 = const()[name = string("expand_dims_184"), val = tensor([19])]; tensor concat_147_values3_0 = const()[name = string("concat_147_values3_0"), val = tensor([0])]; int32 concat_147_axis_0 = const()[name = string("concat_147_axis_0"), val = int32(0)]; bool concat_147_interleave_0 = const()[name = string("concat_147_interleave_0"), val = bool(false)]; tensor concat_147 = concat(axis = concat_147_axis_0, interleave = concat_147_interleave_0, values = (expand_dims_181, expand_dims_182, position_id, concat_147_values3_0))[name = string("concat_147")]; tensor concat_148_values1_0 = const()[name = string("concat_148_values1_0"), val = tensor([0])]; tensor concat_148_values3_0 = const()[name = string("concat_148_values3_0"), val = tensor([0])]; int32 concat_148_axis_0 = const()[name = string("concat_148_axis_0"), val = int32(0)]; bool concat_148_interleave_0 = const()[name = string("concat_148_interleave_0"), val = bool(false)]; tensor concat_148 = concat(axis = concat_148_axis_0, interleave = concat_148_interleave_0, values = (expand_dims_184, concat_148_values1_0, var_426, concat_148_values3_0))[name = string("concat_148")]; tensor key_cache_internal_tensor_assign_19_stride_0 = const()[name = string("key_cache_internal_tensor_assign_19_stride_0"), val = tensor([1, 1, 1, 1])]; tensor key_cache_internal_tensor_assign_19_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_19_begin_mask_0"), val = tensor([false, false, false, true])]; tensor key_cache_internal_tensor_assign_19_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_19_end_mask_0"), val = tensor([false, true, false, true])]; tensor key_cache_internal_tensor_assign_19_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_19_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor key_cache_internal_tensor_assign_19_cast_fp16 = slice_update(begin = concat_147, begin_mask = key_cache_internal_tensor_assign_19_begin_mask_0, end = concat_148, end_mask = key_cache_internal_tensor_assign_19_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_19_squeeze_mask_0, stride = key_cache_internal_tensor_assign_19_stride_0, update = key_states_75_cast_fp16, x = coreml_update_state_82)[name = string("key_cache_internal_tensor_assign_19_cast_fp16")]; write_state(data = key_cache_internal_tensor_assign_19_cast_fp16, input = key_cache)[name = string("coreml_update_state_154_write_state")]; tensor coreml_update_state_84 = read_state(input = key_cache)[name = string("coreml_update_state_154")]; tensor value_cache_internal_tensor_assign_19_stride_0 = const()[name = string("value_cache_internal_tensor_assign_19_stride_0"), val = tensor([1, 1, 1, 1])]; tensor value_cache_internal_tensor_assign_19_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_19_begin_mask_0"), val = tensor([false, false, false, true])]; tensor value_cache_internal_tensor_assign_19_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_19_end_mask_0"), val = tensor([false, true, false, true])]; tensor value_cache_internal_tensor_assign_19_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_19_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor value_states_75_cast_fp16 = transpose(perm = value_states_75_perm_0, x = var_5279_cast_fp16)[name = string("transpose_16")]; tensor value_cache_internal_tensor_assign_19_cast_fp16 = slice_update(begin = concat_147, begin_mask = value_cache_internal_tensor_assign_19_begin_mask_0, end = concat_148, end_mask = value_cache_internal_tensor_assign_19_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_19_squeeze_mask_0, stride = value_cache_internal_tensor_assign_19_stride_0, update = value_states_75_cast_fp16, x = coreml_update_state_83)[name = string("value_cache_internal_tensor_assign_19_cast_fp16")]; write_state(data = value_cache_internal_tensor_assign_19_cast_fp16, input = value_cache)[name = string("coreml_update_state_155_write_state")]; tensor coreml_update_state_85 = read_state(input = value_cache)[name = string("coreml_update_state_155")]; tensor var_5343_begin_0 = const()[name = string("op_5343_begin_0"), val = tensor([18, 0, 0, 0])]; tensor var_5343_end_0 = const()[name = string("op_5343_end_0"), val = tensor([19, 2, 2048, 64])]; tensor var_5343_end_mask_0 = const()[name = string("op_5343_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_5343_cast_fp16 = slice_by_index(begin = var_5343_begin_0, end = var_5343_end_0, end_mask = var_5343_end_mask_0, x = coreml_update_state_84)[name = string("op_5343_cast_fp16")]; tensor tile_36 = const()[name = string("tile_36"), val = tensor([1, 1])]; int32 var_5346_axis_0 = const()[name = string("op_5346_axis_0"), val = int32(1)]; tensor var_5346_cast_fp16_0, tensor var_5346_cast_fp16_1 = split(axis = var_5346_axis_0, split_sizes = tile_36, x = var_5343_cast_fp16)[name = string("op_5346_cast_fp16")]; tensor var_5353_begin_0 = const()[name = string("op_5353_begin_0"), val = tensor([18, 0, 0, 0])]; tensor var_5353_end_0 = const()[name = string("op_5353_end_0"), val = tensor([19, 2, 2048, 64])]; tensor var_5353_end_mask_0 = const()[name = string("op_5353_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_5353_cast_fp16 = slice_by_index(begin = var_5353_begin_0, end = var_5353_end_0, end_mask = var_5353_end_mask_0, x = coreml_update_state_85)[name = string("op_5353_cast_fp16")]; tensor tile_37 = const()[name = string("tile_37"), val = tensor([1, 1])]; int32 var_5356_axis_0 = const()[name = string("op_5356_axis_0"), val = int32(1)]; tensor var_5356_cast_fp16_0, tensor var_5356_cast_fp16_1 = split(axis = var_5356_axis_0, split_sizes = tile_37, x = var_5353_cast_fp16)[name = string("op_5356_cast_fp16")]; tensor var_5359_split_sizes_0 = const()[name = string("op_5359_split_sizes_0"), val = tensor([8, 8])]; int32 var_5359_axis_0 = const()[name = string("op_5359_axis_0"), val = int32(1)]; tensor var_5359_cast_fp16_0, tensor var_5359_cast_fp16_1 = split(axis = var_5359_axis_0, split_sizes = var_5359_split_sizes_0, x = query_states_75_cast_fp16)[name = string("op_5359_cast_fp16")]; bool attn_weights_289_transpose_x_0 = const()[name = string("attn_weights_289_transpose_x_0"), val = bool(false)]; bool attn_weights_289_transpose_y_0 = const()[name = string("attn_weights_289_transpose_y_0"), val = bool(false)]; tensor attn_weights_289_cast_fp16 = matmul(transpose_x = attn_weights_289_transpose_x_0, transpose_y = attn_weights_289_transpose_y_0, x = var_5346_cast_fp16_0, y = var_5359_cast_fp16_0)[name = string("attn_weights_289_cast_fp16")]; fp16 _inversed_attn_weights_291_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_291_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_291_cast_fp16 = mul(x = attn_weights_289_cast_fp16, y = _inversed_attn_weights_291_y_0_to_fp16)[name = string("_inversed_attn_weights_291_cast_fp16")]; tensor attn_weights_293_cast_fp16 = add(x = _inversed_attn_weights_291_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_293_cast_fp16")]; int32 var_5366 = const()[name = string("op_5366"), val = int32(2)]; tensor attn_weights_295_cast_fp16 = softmax(axis = var_5366, x = attn_weights_293_cast_fp16)[name = string("attn_weights_295_cast_fp16")]; bool var_5372_transpose_x_1 = const()[name = string("op_5372_transpose_x_1"), val = bool(true)]; bool var_5372_transpose_y_1 = const()[name = string("op_5372_transpose_y_1"), val = bool(false)]; tensor var_5372_cast_fp16 = matmul(transpose_x = var_5372_transpose_x_1, transpose_y = var_5372_transpose_y_1, x = attn_weights_295_cast_fp16, y = var_5356_cast_fp16_0)[name = string("op_5372_cast_fp16")]; bool attn_weights_297_transpose_x_0 = const()[name = string("attn_weights_297_transpose_x_0"), val = bool(false)]; bool attn_weights_297_transpose_y_0 = const()[name = string("attn_weights_297_transpose_y_0"), val = bool(false)]; tensor attn_weights_297_cast_fp16 = matmul(transpose_x = attn_weights_297_transpose_x_0, transpose_y = attn_weights_297_transpose_y_0, x = var_5346_cast_fp16_1, y = var_5359_cast_fp16_1)[name = string("attn_weights_297_cast_fp16")]; fp16 _inversed_attn_weights_299_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_299_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_299_cast_fp16 = mul(x = attn_weights_297_cast_fp16, y = _inversed_attn_weights_299_y_0_to_fp16)[name = string("_inversed_attn_weights_299_cast_fp16")]; tensor attn_weights_301_cast_fp16 = add(x = _inversed_attn_weights_299_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_301_cast_fp16")]; int32 var_5378 = const()[name = string("op_5378"), val = int32(2)]; tensor attn_weights_303_cast_fp16 = softmax(axis = var_5378, x = attn_weights_301_cast_fp16)[name = string("attn_weights_303_cast_fp16")]; bool attn_output_109_transpose_x_1 = const()[name = string("attn_output_109_transpose_x_1"), val = bool(true)]; bool attn_output_109_transpose_y_1 = const()[name = string("attn_output_109_transpose_y_1"), val = bool(false)]; tensor attn_output_109_cast_fp16 = matmul(transpose_x = attn_output_109_transpose_x_1, transpose_y = attn_output_109_transpose_y_1, x = attn_weights_303_cast_fp16, y = var_5356_cast_fp16_1)[name = string("attn_output_109_cast_fp16")]; int32 var_5386 = const()[name = string("op_5386"), val = int32(1)]; bool attn_output_111_interleave_0 = const()[name = string("attn_output_111_interleave_0"), val = bool(false)]; tensor attn_output_111_cast_fp16 = concat(axis = var_5386, interleave = attn_output_111_interleave_0, values = (var_5372_cast_fp16, attn_output_109_cast_fp16))[name = string("attn_output_111_cast_fp16")]; tensor var_5390_perm_0 = const()[name = string("op_5390_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_5395 = const()[name = string("op_5395"), val = tensor([1, 1024, 1, 128])]; tensor var_5390_cast_fp16 = transpose(perm = var_5390_perm_0, x = attn_output_111_cast_fp16)[name = string("transpose_15")]; tensor x_329_cast_fp16 = reshape(shape = var_5395, x = var_5390_cast_fp16)[name = string("x_329_cast_fp16")]; string hidden_states_111_pad_type_0 = const()[name = string("hidden_states_111_pad_type_0"), val = string("valid")]; tensor hidden_states_111_strides_0 = const()[name = string("hidden_states_111_strides_0"), val = tensor([1, 1])]; tensor hidden_states_111_pad_0 = const()[name = string("hidden_states_111_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_111_dilations_0 = const()[name = string("hidden_states_111_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_111_groups_0 = const()[name = string("hidden_states_111_groups_0"), val = int32(1)]; tensor var_5402_to_fp16 = const()[name = string("op_5402_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(549104320)))]; tensor hidden_states_111_cast_fp16 = conv(dilations = hidden_states_111_dilations_0, groups = hidden_states_111_groups_0, pad = hidden_states_111_pad_0, pad_type = hidden_states_111_pad_type_0, strides = hidden_states_111_strides_0, weight = var_5402_to_fp16, x = x_329_cast_fp16)[name = string("hidden_states_111_cast_fp16")]; tensor x_331_cast_fp16 = add(x = x_323_cast_fp16, y = hidden_states_111_cast_fp16)[name = string("x_331_cast_fp16")]; int32 var_5414 = const()[name = string("op_5414"), val = int32(1)]; fp16 const_193_promoted_to_fp16 = const()[name = string("const_193_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_5417_cast_fp16 = mul(x = x_331_cast_fp16, y = const_193_promoted_to_fp16)[name = string("op_5417_cast_fp16")]; bool x_333_interleave_0 = const()[name = string("x_333_interleave_0"), val = bool(false)]; tensor x_333_cast_fp16 = concat(axis = var_5414, interleave = x_333_interleave_0, values = (x_331_cast_fp16, var_5417_cast_fp16))[name = string("x_333_cast_fp16")]; tensor out_223_axes_0 = const()[name = string("out_223_axes_0"), val = tensor([1])]; fp16 var_5427_to_fp16 = const()[name = string("op_5427_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_223_cast_fp16 = layer_norm(axes = out_223_axes_0, epsilon = var_5427_to_fp16, x = x_333_cast_fp16)[name = string("out_223_cast_fp16")]; tensor layer_layers_18_post_attention_layernorm_weight_to_fp16 = const()[name = string("layer_layers_18_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(551201536)))]; tensor out_225_cast_fp16 = mul(x = out_223_cast_fp16, y = layer_layers_18_post_attention_layernorm_weight_to_fp16)[name = string("out_225_cast_fp16")]; tensor var_5433_split_sizes_0 = const()[name = string("op_5433_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_5433_axis_0 = const()[name = string("op_5433_axis_0"), val = int32(1)]; tensor var_5433_cast_fp16_0, tensor var_5433_cast_fp16_1 = split(axis = var_5433_axis_0, split_sizes = var_5433_split_sizes_0, x = out_225_cast_fp16)[name = string("op_5433_cast_fp16")]; string input_37_pad_type_0 = const()[name = string("input_37_pad_type_0"), val = string("valid")]; tensor input_37_strides_0 = const()[name = string("input_37_strides_0"), val = tensor([1, 1])]; tensor input_37_pad_0 = const()[name = string("input_37_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_37_dilations_0 = const()[name = string("input_37_dilations_0"), val = tensor([1, 1])]; int32 input_37_groups_0 = const()[name = string("input_37_groups_0"), val = int32(1)]; tensor var_5438_to_fp16 = const()[name = string("op_5438_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(551205696)))]; tensor input_37_cast_fp16 = conv(dilations = input_37_dilations_0, groups = input_37_groups_0, pad = input_37_pad_0, pad_type = input_37_pad_type_0, strides = input_37_strides_0, weight = var_5438_to_fp16, x = var_5433_cast_fp16_0)[name = string("input_37_cast_fp16")]; tensor var_5449_cast_fp16 = silu(x = input_37_cast_fp16)[name = string("op_5449_cast_fp16")]; string var_5454_pad_type_0 = const()[name = string("op_5454_pad_type_0"), val = string("valid")]; tensor var_5454_strides_0 = const()[name = string("op_5454_strides_0"), val = tensor([1, 1])]; tensor var_5454_pad_0 = const()[name = string("op_5454_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_5454_dilations_0 = const()[name = string("op_5454_dilations_0"), val = tensor([1, 1])]; int32 var_5454_groups_0 = const()[name = string("op_5454_groups_0"), val = int32(1)]; tensor var_5437_to_fp16 = const()[name = string("op_5437_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(559594368)))]; tensor var_5454_cast_fp16 = conv(dilations = var_5454_dilations_0, groups = var_5454_groups_0, pad = var_5454_pad_0, pad_type = var_5454_pad_type_0, strides = var_5454_strides_0, weight = var_5437_to_fp16, x = var_5433_cast_fp16_0)[name = string("op_5454_cast_fp16")]; tensor x_339_cast_fp16 = mul(x = var_5449_cast_fp16, y = var_5454_cast_fp16)[name = string("x_339_cast_fp16")]; string hidden_states_113_pad_type_0 = const()[name = string("hidden_states_113_pad_type_0"), val = string("valid")]; tensor hidden_states_113_strides_0 = const()[name = string("hidden_states_113_strides_0"), val = tensor([1, 1])]; tensor hidden_states_113_pad_0 = const()[name = string("hidden_states_113_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_113_dilations_0 = const()[name = string("hidden_states_113_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_113_groups_0 = const()[name = string("hidden_states_113_groups_0"), val = int32(1)]; tensor var_5436_to_fp16 = const()[name = string("op_5436_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(567983040)))]; tensor hidden_states_113_cast_fp16 = conv(dilations = hidden_states_113_dilations_0, groups = hidden_states_113_groups_0, pad = hidden_states_113_pad_0, pad_type = hidden_states_113_pad_type_0, strides = hidden_states_113_strides_0, weight = var_5436_to_fp16, x = x_339_cast_fp16)[name = string("hidden_states_113_cast_fp16")]; tensor x_341_cast_fp16 = add(x = x_331_cast_fp16, y = hidden_states_113_cast_fp16)[name = string("x_341_cast_fp16")]; int32 var_5467 = const()[name = string("op_5467"), val = int32(1)]; fp16 const_194_promoted_to_fp16 = const()[name = string("const_194_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_5470_cast_fp16 = mul(x = x_341_cast_fp16, y = const_194_promoted_to_fp16)[name = string("op_5470_cast_fp16")]; bool x_343_interleave_0 = const()[name = string("x_343_interleave_0"), val = bool(false)]; tensor x_343_cast_fp16 = concat(axis = var_5467, interleave = x_343_interleave_0, values = (x_341_cast_fp16, var_5470_cast_fp16))[name = string("x_343_cast_fp16")]; tensor out_229_axes_0 = const()[name = string("out_229_axes_0"), val = tensor([1])]; fp16 var_5480_to_fp16 = const()[name = string("op_5480_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_229_cast_fp16 = layer_norm(axes = out_229_axes_0, epsilon = var_5480_to_fp16, x = x_343_cast_fp16)[name = string("out_229_cast_fp16")]; tensor layer_layers_19_input_layernorm_weight_to_fp16 = const()[name = string("layer_layers_19_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(576371712)))]; tensor out_231_cast_fp16 = mul(x = out_229_cast_fp16, y = layer_layers_19_input_layernorm_weight_to_fp16)[name = string("out_231_cast_fp16")]; tensor var_5486_split_sizes_0 = const()[name = string("op_5486_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_5486_axis_0 = const()[name = string("op_5486_axis_0"), val = int32(1)]; tensor var_5486_cast_fp16_0, tensor var_5486_cast_fp16_1 = split(axis = var_5486_axis_0, split_sizes = var_5486_split_sizes_0, x = out_231_cast_fp16)[name = string("op_5486_cast_fp16")]; string query_states_77_pad_type_0 = const()[name = string("query_states_77_pad_type_0"), val = string("valid")]; tensor query_states_77_strides_0 = const()[name = string("query_states_77_strides_0"), val = tensor([1, 1])]; tensor query_states_77_pad_0 = const()[name = string("query_states_77_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_states_77_dilations_0 = const()[name = string("query_states_77_dilations_0"), val = tensor([1, 1])]; int32 query_states_77_groups_0 = const()[name = string("query_states_77_groups_0"), val = int32(1)]; tensor var_5508_to_fp16 = const()[name = string("op_5508_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(576375872)))]; tensor query_states_77_cast_fp16 = conv(dilations = query_states_77_dilations_0, groups = query_states_77_groups_0, pad = query_states_77_pad_0, pad_type = query_states_77_pad_type_0, strides = query_states_77_strides_0, weight = var_5508_to_fp16, x = var_5486_cast_fp16_0)[name = string("query_states_77_cast_fp16")]; string key_states_77_pad_type_0 = const()[name = string("key_states_77_pad_type_0"), val = string("valid")]; tensor key_states_77_strides_0 = const()[name = string("key_states_77_strides_0"), val = tensor([1, 1])]; tensor key_states_77_pad_0 = const()[name = string("key_states_77_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_states_77_dilations_0 = const()[name = string("key_states_77_dilations_0"), val = tensor([1, 1])]; int32 key_states_77_groups_0 = const()[name = string("key_states_77_groups_0"), val = int32(1)]; tensor var_5519_to_fp16 = const()[name = string("op_5519_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(578473088)))]; tensor key_states_77_cast_fp16 = conv(dilations = key_states_77_dilations_0, groups = key_states_77_groups_0, pad = key_states_77_pad_0, pad_type = key_states_77_pad_type_0, strides = key_states_77_strides_0, weight = var_5519_to_fp16, x = var_5486_cast_fp16_0)[name = string("key_states_77_cast_fp16")]; string value_states_77_pad_type_0 = const()[name = string("value_states_77_pad_type_0"), val = string("valid")]; tensor value_states_77_strides_0 = const()[name = string("value_states_77_strides_0"), val = tensor([1, 1])]; tensor value_states_77_pad_0 = const()[name = string("value_states_77_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_states_77_dilations_0 = const()[name = string("value_states_77_dilations_0"), val = tensor([1, 1])]; int32 value_states_77_groups_0 = const()[name = string("value_states_77_groups_0"), val = int32(1)]; tensor var_5530_to_fp16 = const()[name = string("op_5530_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(578735296)))]; tensor value_states_77_cast_fp16 = conv(dilations = value_states_77_dilations_0, groups = value_states_77_groups_0, pad = value_states_77_pad_0, pad_type = value_states_77_pad_type_0, strides = value_states_77_strides_0, weight = var_5530_to_fp16, x = var_5486_cast_fp16_0)[name = string("value_states_77_cast_fp16")]; tensor var_5538 = const()[name = string("op_5538"), val = tensor([1, 16, 64, 128])]; tensor embed_77_cast_fp16 = reshape(shape = var_5538, x = query_states_77_cast_fp16)[name = string("embed_77_cast_fp16")]; tensor var_5542 = const()[name = string("op_5542"), val = tensor([1, 2, 64, 128])]; tensor var_5543_cast_fp16 = reshape(shape = var_5542, x = key_states_77_cast_fp16)[name = string("op_5543_cast_fp16")]; tensor embed_79_perm_0 = const()[name = string("embed_79_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_5549 = const()[name = string("op_5549"), val = tensor([1, 2, 64, 128])]; tensor var_5550_cast_fp16 = reshape(shape = var_5549, x = value_states_77_cast_fp16)[name = string("op_5550_cast_fp16")]; tensor value_states_79_perm_0 = const()[name = string("value_states_79_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_5554_cast_fp16 = mul(x = embed_77_cast_fp16, y = cos_1_cast_fp16)[name = string("op_5554_cast_fp16")]; tensor var_5555_split_sizes_0 = const()[name = string("op_5555_split_sizes_0"), val = tensor([32, 32])]; int32 var_5555_axis_0 = const()[name = string("op_5555_axis_0"), val = int32(-2)]; tensor var_5555_cast_fp16_0, tensor var_5555_cast_fp16_1 = split(axis = var_5555_axis_0, split_sizes = var_5555_split_sizes_0, x = embed_77_cast_fp16)[name = string("op_5555_cast_fp16")]; fp16 const_197_promoted_to_fp16 = const()[name = string("const_197_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_5557_cast_fp16 = mul(x = var_5555_cast_fp16_1, y = const_197_promoted_to_fp16)[name = string("op_5557_cast_fp16")]; int32 var_5559 = const()[name = string("op_5559"), val = int32(-2)]; bool var_5560_interleave_0 = const()[name = string("op_5560_interleave_0"), val = bool(false)]; tensor var_5560_cast_fp16 = concat(axis = var_5559, interleave = var_5560_interleave_0, values = (var_5557_cast_fp16, var_5555_cast_fp16_0))[name = string("op_5560_cast_fp16")]; tensor var_5561_cast_fp16 = mul(x = var_5560_cast_fp16, y = sin_1_cast_fp16)[name = string("op_5561_cast_fp16")]; tensor query_states_79_cast_fp16 = add(x = var_5554_cast_fp16, y = var_5561_cast_fp16)[name = string("query_states_79_cast_fp16")]; tensor embed_79_cast_fp16 = transpose(perm = embed_79_perm_0, x = var_5543_cast_fp16)[name = string("transpose_14")]; tensor var_5564_cast_fp16 = mul(x = embed_79_cast_fp16, y = cos_cast_fp16)[name = string("op_5564_cast_fp16")]; tensor var_5565_split_sizes_0 = const()[name = string("op_5565_split_sizes_0"), val = tensor([32, 32])]; int32 var_5565_axis_0 = const()[name = string("op_5565_axis_0"), val = int32(-1)]; tensor var_5565_cast_fp16_0, tensor var_5565_cast_fp16_1 = split(axis = var_5565_axis_0, split_sizes = var_5565_split_sizes_0, x = embed_79_cast_fp16)[name = string("op_5565_cast_fp16")]; fp16 const_198_promoted_to_fp16 = const()[name = string("const_198_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_5567_cast_fp16 = mul(x = var_5565_cast_fp16_1, y = const_198_promoted_to_fp16)[name = string("op_5567_cast_fp16")]; int32 var_5569 = const()[name = string("op_5569"), val = int32(-1)]; bool var_5570_interleave_0 = const()[name = string("op_5570_interleave_0"), val = bool(false)]; tensor var_5570_cast_fp16 = concat(axis = var_5569, interleave = var_5570_interleave_0, values = (var_5567_cast_fp16, var_5565_cast_fp16_0))[name = string("op_5570_cast_fp16")]; tensor var_5571_cast_fp16 = mul(x = var_5570_cast_fp16, y = sin_cast_fp16)[name = string("op_5571_cast_fp16")]; tensor key_states_79_cast_fp16 = add(x = var_5564_cast_fp16, y = var_5571_cast_fp16)[name = string("key_states_79_cast_fp16")]; tensor expand_dims_191 = const()[name = string("expand_dims_191"), val = tensor([19])]; tensor expand_dims_192 = const()[name = string("expand_dims_192"), val = tensor([0])]; tensor expand_dims_194 = const()[name = string("expand_dims_194"), val = tensor([20])]; tensor concat_155_values3_0 = const()[name = string("concat_155_values3_0"), val = tensor([0])]; int32 concat_155_axis_0 = const()[name = string("concat_155_axis_0"), val = int32(0)]; bool concat_155_interleave_0 = const()[name = string("concat_155_interleave_0"), val = bool(false)]; tensor concat_155 = concat(axis = concat_155_axis_0, interleave = concat_155_interleave_0, values = (expand_dims_191, expand_dims_192, position_id, concat_155_values3_0))[name = string("concat_155")]; tensor concat_156_values1_0 = const()[name = string("concat_156_values1_0"), val = tensor([0])]; tensor concat_156_values3_0 = const()[name = string("concat_156_values3_0"), val = tensor([0])]; int32 concat_156_axis_0 = const()[name = string("concat_156_axis_0"), val = int32(0)]; bool concat_156_interleave_0 = const()[name = string("concat_156_interleave_0"), val = bool(false)]; tensor concat_156 = concat(axis = concat_156_axis_0, interleave = concat_156_interleave_0, values = (expand_dims_194, concat_156_values1_0, var_426, concat_156_values3_0))[name = string("concat_156")]; tensor key_cache_internal_tensor_assign_20_stride_0 = const()[name = string("key_cache_internal_tensor_assign_20_stride_0"), val = tensor([1, 1, 1, 1])]; tensor key_cache_internal_tensor_assign_20_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_20_begin_mask_0"), val = tensor([false, false, false, true])]; tensor key_cache_internal_tensor_assign_20_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_20_end_mask_0"), val = tensor([false, true, false, true])]; tensor key_cache_internal_tensor_assign_20_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_20_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor key_cache_internal_tensor_assign_20_cast_fp16 = slice_update(begin = concat_155, begin_mask = key_cache_internal_tensor_assign_20_begin_mask_0, end = concat_156, end_mask = key_cache_internal_tensor_assign_20_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_20_squeeze_mask_0, stride = key_cache_internal_tensor_assign_20_stride_0, update = key_states_79_cast_fp16, x = coreml_update_state_84)[name = string("key_cache_internal_tensor_assign_20_cast_fp16")]; write_state(data = key_cache_internal_tensor_assign_20_cast_fp16, input = key_cache)[name = string("coreml_update_state_156_write_state")]; tensor coreml_update_state_86 = read_state(input = key_cache)[name = string("coreml_update_state_156")]; tensor value_cache_internal_tensor_assign_20_stride_0 = const()[name = string("value_cache_internal_tensor_assign_20_stride_0"), val = tensor([1, 1, 1, 1])]; tensor value_cache_internal_tensor_assign_20_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_20_begin_mask_0"), val = tensor([false, false, false, true])]; tensor value_cache_internal_tensor_assign_20_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_20_end_mask_0"), val = tensor([false, true, false, true])]; tensor value_cache_internal_tensor_assign_20_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_20_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor value_states_79_cast_fp16 = transpose(perm = value_states_79_perm_0, x = var_5550_cast_fp16)[name = string("transpose_13")]; tensor value_cache_internal_tensor_assign_20_cast_fp16 = slice_update(begin = concat_155, begin_mask = value_cache_internal_tensor_assign_20_begin_mask_0, end = concat_156, end_mask = value_cache_internal_tensor_assign_20_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_20_squeeze_mask_0, stride = value_cache_internal_tensor_assign_20_stride_0, update = value_states_79_cast_fp16, x = coreml_update_state_85)[name = string("value_cache_internal_tensor_assign_20_cast_fp16")]; write_state(data = value_cache_internal_tensor_assign_20_cast_fp16, input = value_cache)[name = string("coreml_update_state_157_write_state")]; tensor coreml_update_state_87 = read_state(input = value_cache)[name = string("coreml_update_state_157")]; tensor var_5614_begin_0 = const()[name = string("op_5614_begin_0"), val = tensor([19, 0, 0, 0])]; tensor var_5614_end_0 = const()[name = string("op_5614_end_0"), val = tensor([20, 2, 2048, 64])]; tensor var_5614_end_mask_0 = const()[name = string("op_5614_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_5614_cast_fp16 = slice_by_index(begin = var_5614_begin_0, end = var_5614_end_0, end_mask = var_5614_end_mask_0, x = coreml_update_state_86)[name = string("op_5614_cast_fp16")]; tensor tile_38 = const()[name = string("tile_38"), val = tensor([1, 1])]; int32 var_5617_axis_0 = const()[name = string("op_5617_axis_0"), val = int32(1)]; tensor var_5617_cast_fp16_0, tensor var_5617_cast_fp16_1 = split(axis = var_5617_axis_0, split_sizes = tile_38, x = var_5614_cast_fp16)[name = string("op_5617_cast_fp16")]; tensor var_5624_begin_0 = const()[name = string("op_5624_begin_0"), val = tensor([19, 0, 0, 0])]; tensor var_5624_end_0 = const()[name = string("op_5624_end_0"), val = tensor([20, 2, 2048, 64])]; tensor var_5624_end_mask_0 = const()[name = string("op_5624_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_5624_cast_fp16 = slice_by_index(begin = var_5624_begin_0, end = var_5624_end_0, end_mask = var_5624_end_mask_0, x = coreml_update_state_87)[name = string("op_5624_cast_fp16")]; tensor tile_39 = const()[name = string("tile_39"), val = tensor([1, 1])]; int32 var_5627_axis_0 = const()[name = string("op_5627_axis_0"), val = int32(1)]; tensor var_5627_cast_fp16_0, tensor var_5627_cast_fp16_1 = split(axis = var_5627_axis_0, split_sizes = tile_39, x = var_5624_cast_fp16)[name = string("op_5627_cast_fp16")]; tensor var_5630_split_sizes_0 = const()[name = string("op_5630_split_sizes_0"), val = tensor([8, 8])]; int32 var_5630_axis_0 = const()[name = string("op_5630_axis_0"), val = int32(1)]; tensor var_5630_cast_fp16_0, tensor var_5630_cast_fp16_1 = split(axis = var_5630_axis_0, split_sizes = var_5630_split_sizes_0, x = query_states_79_cast_fp16)[name = string("op_5630_cast_fp16")]; bool attn_weights_305_transpose_x_0 = const()[name = string("attn_weights_305_transpose_x_0"), val = bool(false)]; bool attn_weights_305_transpose_y_0 = const()[name = string("attn_weights_305_transpose_y_0"), val = bool(false)]; tensor attn_weights_305_cast_fp16 = matmul(transpose_x = attn_weights_305_transpose_x_0, transpose_y = attn_weights_305_transpose_y_0, x = var_5617_cast_fp16_0, y = var_5630_cast_fp16_0)[name = string("attn_weights_305_cast_fp16")]; fp16 _inversed_attn_weights_307_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_307_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_307_cast_fp16 = mul(x = attn_weights_305_cast_fp16, y = _inversed_attn_weights_307_y_0_to_fp16)[name = string("_inversed_attn_weights_307_cast_fp16")]; tensor attn_weights_309_cast_fp16 = add(x = _inversed_attn_weights_307_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_309_cast_fp16")]; int32 var_5637 = const()[name = string("op_5637"), val = int32(2)]; tensor attn_weights_311_cast_fp16 = softmax(axis = var_5637, x = attn_weights_309_cast_fp16)[name = string("attn_weights_311_cast_fp16")]; bool var_5643_transpose_x_1 = const()[name = string("op_5643_transpose_x_1"), val = bool(true)]; bool var_5643_transpose_y_1 = const()[name = string("op_5643_transpose_y_1"), val = bool(false)]; tensor var_5643_cast_fp16 = matmul(transpose_x = var_5643_transpose_x_1, transpose_y = var_5643_transpose_y_1, x = attn_weights_311_cast_fp16, y = var_5627_cast_fp16_0)[name = string("op_5643_cast_fp16")]; bool attn_weights_313_transpose_x_0 = const()[name = string("attn_weights_313_transpose_x_0"), val = bool(false)]; bool attn_weights_313_transpose_y_0 = const()[name = string("attn_weights_313_transpose_y_0"), val = bool(false)]; tensor attn_weights_313_cast_fp16 = matmul(transpose_x = attn_weights_313_transpose_x_0, transpose_y = attn_weights_313_transpose_y_0, x = var_5617_cast_fp16_1, y = var_5630_cast_fp16_1)[name = string("attn_weights_313_cast_fp16")]; fp16 _inversed_attn_weights_315_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_315_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_315_cast_fp16 = mul(x = attn_weights_313_cast_fp16, y = _inversed_attn_weights_315_y_0_to_fp16)[name = string("_inversed_attn_weights_315_cast_fp16")]; tensor attn_weights_317_cast_fp16 = add(x = _inversed_attn_weights_315_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_317_cast_fp16")]; int32 var_5649 = const()[name = string("op_5649"), val = int32(2)]; tensor attn_weights_319_cast_fp16 = softmax(axis = var_5649, x = attn_weights_317_cast_fp16)[name = string("attn_weights_319_cast_fp16")]; bool attn_output_115_transpose_x_1 = const()[name = string("attn_output_115_transpose_x_1"), val = bool(true)]; bool attn_output_115_transpose_y_1 = const()[name = string("attn_output_115_transpose_y_1"), val = bool(false)]; tensor attn_output_115_cast_fp16 = matmul(transpose_x = attn_output_115_transpose_x_1, transpose_y = attn_output_115_transpose_y_1, x = attn_weights_319_cast_fp16, y = var_5627_cast_fp16_1)[name = string("attn_output_115_cast_fp16")]; int32 var_5657 = const()[name = string("op_5657"), val = int32(1)]; bool attn_output_117_interleave_0 = const()[name = string("attn_output_117_interleave_0"), val = bool(false)]; tensor attn_output_117_cast_fp16 = concat(axis = var_5657, interleave = attn_output_117_interleave_0, values = (var_5643_cast_fp16, attn_output_115_cast_fp16))[name = string("attn_output_117_cast_fp16")]; tensor var_5661_perm_0 = const()[name = string("op_5661_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_5666 = const()[name = string("op_5666"), val = tensor([1, 1024, 1, 128])]; tensor var_5661_cast_fp16 = transpose(perm = var_5661_perm_0, x = attn_output_117_cast_fp16)[name = string("transpose_12")]; tensor x_347_cast_fp16 = reshape(shape = var_5666, x = var_5661_cast_fp16)[name = string("x_347_cast_fp16")]; string hidden_states_117_pad_type_0 = const()[name = string("hidden_states_117_pad_type_0"), val = string("valid")]; tensor hidden_states_117_strides_0 = const()[name = string("hidden_states_117_strides_0"), val = tensor([1, 1])]; tensor hidden_states_117_pad_0 = const()[name = string("hidden_states_117_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_117_dilations_0 = const()[name = string("hidden_states_117_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_117_groups_0 = const()[name = string("hidden_states_117_groups_0"), val = int32(1)]; tensor var_5673_to_fp16 = const()[name = string("op_5673_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(578997504)))]; tensor hidden_states_117_cast_fp16 = conv(dilations = hidden_states_117_dilations_0, groups = hidden_states_117_groups_0, pad = hidden_states_117_pad_0, pad_type = hidden_states_117_pad_type_0, strides = hidden_states_117_strides_0, weight = var_5673_to_fp16, x = x_347_cast_fp16)[name = string("hidden_states_117_cast_fp16")]; tensor x_349_cast_fp16 = add(x = x_341_cast_fp16, y = hidden_states_117_cast_fp16)[name = string("x_349_cast_fp16")]; int32 var_5685 = const()[name = string("op_5685"), val = int32(1)]; fp16 const_203_promoted_to_fp16 = const()[name = string("const_203_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_5688_cast_fp16 = mul(x = x_349_cast_fp16, y = const_203_promoted_to_fp16)[name = string("op_5688_cast_fp16")]; bool x_351_interleave_0 = const()[name = string("x_351_interleave_0"), val = bool(false)]; tensor x_351_cast_fp16 = concat(axis = var_5685, interleave = x_351_interleave_0, values = (x_349_cast_fp16, var_5688_cast_fp16))[name = string("x_351_cast_fp16")]; tensor out_235_axes_0 = const()[name = string("out_235_axes_0"), val = tensor([1])]; fp16 var_5698_to_fp16 = const()[name = string("op_5698_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_235_cast_fp16 = layer_norm(axes = out_235_axes_0, epsilon = var_5698_to_fp16, x = x_351_cast_fp16)[name = string("out_235_cast_fp16")]; tensor layer_layers_19_post_attention_layernorm_weight_to_fp16 = const()[name = string("layer_layers_19_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(581094720)))]; tensor out_237_cast_fp16 = mul(x = out_235_cast_fp16, y = layer_layers_19_post_attention_layernorm_weight_to_fp16)[name = string("out_237_cast_fp16")]; tensor var_5704_split_sizes_0 = const()[name = string("op_5704_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_5704_axis_0 = const()[name = string("op_5704_axis_0"), val = int32(1)]; tensor var_5704_cast_fp16_0, tensor var_5704_cast_fp16_1 = split(axis = var_5704_axis_0, split_sizes = var_5704_split_sizes_0, x = out_237_cast_fp16)[name = string("op_5704_cast_fp16")]; string input_39_pad_type_0 = const()[name = string("input_39_pad_type_0"), val = string("valid")]; tensor input_39_strides_0 = const()[name = string("input_39_strides_0"), val = tensor([1, 1])]; tensor input_39_pad_0 = const()[name = string("input_39_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_39_dilations_0 = const()[name = string("input_39_dilations_0"), val = tensor([1, 1])]; int32 input_39_groups_0 = const()[name = string("input_39_groups_0"), val = int32(1)]; tensor var_5709_to_fp16 = const()[name = string("op_5709_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(581098880)))]; tensor input_39_cast_fp16 = conv(dilations = input_39_dilations_0, groups = input_39_groups_0, pad = input_39_pad_0, pad_type = input_39_pad_type_0, strides = input_39_strides_0, weight = var_5709_to_fp16, x = var_5704_cast_fp16_0)[name = string("input_39_cast_fp16")]; tensor var_5720_cast_fp16 = silu(x = input_39_cast_fp16)[name = string("op_5720_cast_fp16")]; string var_5725_pad_type_0 = const()[name = string("op_5725_pad_type_0"), val = string("valid")]; tensor var_5725_strides_0 = const()[name = string("op_5725_strides_0"), val = tensor([1, 1])]; tensor var_5725_pad_0 = const()[name = string("op_5725_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_5725_dilations_0 = const()[name = string("op_5725_dilations_0"), val = tensor([1, 1])]; int32 var_5725_groups_0 = const()[name = string("op_5725_groups_0"), val = int32(1)]; tensor var_5708_to_fp16 = const()[name = string("op_5708_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(589487552)))]; tensor var_5725_cast_fp16 = conv(dilations = var_5725_dilations_0, groups = var_5725_groups_0, pad = var_5725_pad_0, pad_type = var_5725_pad_type_0, strides = var_5725_strides_0, weight = var_5708_to_fp16, x = var_5704_cast_fp16_0)[name = string("op_5725_cast_fp16")]; tensor x_357_cast_fp16 = mul(x = var_5720_cast_fp16, y = var_5725_cast_fp16)[name = string("x_357_cast_fp16")]; string hidden_states_119_pad_type_0 = const()[name = string("hidden_states_119_pad_type_0"), val = string("valid")]; tensor hidden_states_119_strides_0 = const()[name = string("hidden_states_119_strides_0"), val = tensor([1, 1])]; tensor hidden_states_119_pad_0 = const()[name = string("hidden_states_119_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_119_dilations_0 = const()[name = string("hidden_states_119_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_119_groups_0 = const()[name = string("hidden_states_119_groups_0"), val = int32(1)]; tensor var_5707_to_fp16 = const()[name = string("op_5707_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(597876224)))]; tensor hidden_states_119_cast_fp16 = conv(dilations = hidden_states_119_dilations_0, groups = hidden_states_119_groups_0, pad = hidden_states_119_pad_0, pad_type = hidden_states_119_pad_type_0, strides = hidden_states_119_strides_0, weight = var_5707_to_fp16, x = x_357_cast_fp16)[name = string("hidden_states_119_cast_fp16")]; tensor x_359_cast_fp16 = add(x = x_349_cast_fp16, y = hidden_states_119_cast_fp16)[name = string("x_359_cast_fp16")]; int32 var_5738 = const()[name = string("op_5738"), val = int32(1)]; fp16 const_204_promoted_to_fp16 = const()[name = string("const_204_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_5741_cast_fp16 = mul(x = x_359_cast_fp16, y = const_204_promoted_to_fp16)[name = string("op_5741_cast_fp16")]; bool x_361_interleave_0 = const()[name = string("x_361_interleave_0"), val = bool(false)]; tensor x_361_cast_fp16 = concat(axis = var_5738, interleave = x_361_interleave_0, values = (x_359_cast_fp16, var_5741_cast_fp16))[name = string("x_361_cast_fp16")]; tensor out_241_axes_0 = const()[name = string("out_241_axes_0"), val = tensor([1])]; fp16 var_5751_to_fp16 = const()[name = string("op_5751_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_241_cast_fp16 = layer_norm(axes = out_241_axes_0, epsilon = var_5751_to_fp16, x = x_361_cast_fp16)[name = string("out_241_cast_fp16")]; tensor layer_layers_20_input_layernorm_weight_to_fp16 = const()[name = string("layer_layers_20_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(606264896)))]; tensor out_243_cast_fp16 = mul(x = out_241_cast_fp16, y = layer_layers_20_input_layernorm_weight_to_fp16)[name = string("out_243_cast_fp16")]; tensor var_5757_split_sizes_0 = const()[name = string("op_5757_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_5757_axis_0 = const()[name = string("op_5757_axis_0"), val = int32(1)]; tensor var_5757_cast_fp16_0, tensor var_5757_cast_fp16_1 = split(axis = var_5757_axis_0, split_sizes = var_5757_split_sizes_0, x = out_243_cast_fp16)[name = string("op_5757_cast_fp16")]; string query_states_81_pad_type_0 = const()[name = string("query_states_81_pad_type_0"), val = string("valid")]; tensor query_states_81_strides_0 = const()[name = string("query_states_81_strides_0"), val = tensor([1, 1])]; tensor query_states_81_pad_0 = const()[name = string("query_states_81_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_states_81_dilations_0 = const()[name = string("query_states_81_dilations_0"), val = tensor([1, 1])]; int32 query_states_81_groups_0 = const()[name = string("query_states_81_groups_0"), val = int32(1)]; tensor var_5779_to_fp16 = const()[name = string("op_5779_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(606269056)))]; tensor query_states_81_cast_fp16 = conv(dilations = query_states_81_dilations_0, groups = query_states_81_groups_0, pad = query_states_81_pad_0, pad_type = query_states_81_pad_type_0, strides = query_states_81_strides_0, weight = var_5779_to_fp16, x = var_5757_cast_fp16_0)[name = string("query_states_81_cast_fp16")]; string key_states_81_pad_type_0 = const()[name = string("key_states_81_pad_type_0"), val = string("valid")]; tensor key_states_81_strides_0 = const()[name = string("key_states_81_strides_0"), val = tensor([1, 1])]; tensor key_states_81_pad_0 = const()[name = string("key_states_81_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_states_81_dilations_0 = const()[name = string("key_states_81_dilations_0"), val = tensor([1, 1])]; int32 key_states_81_groups_0 = const()[name = string("key_states_81_groups_0"), val = int32(1)]; tensor var_5790_to_fp16 = const()[name = string("op_5790_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(608366272)))]; tensor key_states_81_cast_fp16 = conv(dilations = key_states_81_dilations_0, groups = key_states_81_groups_0, pad = key_states_81_pad_0, pad_type = key_states_81_pad_type_0, strides = key_states_81_strides_0, weight = var_5790_to_fp16, x = var_5757_cast_fp16_0)[name = string("key_states_81_cast_fp16")]; string value_states_81_pad_type_0 = const()[name = string("value_states_81_pad_type_0"), val = string("valid")]; tensor value_states_81_strides_0 = const()[name = string("value_states_81_strides_0"), val = tensor([1, 1])]; tensor value_states_81_pad_0 = const()[name = string("value_states_81_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_states_81_dilations_0 = const()[name = string("value_states_81_dilations_0"), val = tensor([1, 1])]; int32 value_states_81_groups_0 = const()[name = string("value_states_81_groups_0"), val = int32(1)]; tensor var_5801_to_fp16 = const()[name = string("op_5801_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(608628480)))]; tensor value_states_81_cast_fp16 = conv(dilations = value_states_81_dilations_0, groups = value_states_81_groups_0, pad = value_states_81_pad_0, pad_type = value_states_81_pad_type_0, strides = value_states_81_strides_0, weight = var_5801_to_fp16, x = var_5757_cast_fp16_0)[name = string("value_states_81_cast_fp16")]; tensor var_5809 = const()[name = string("op_5809"), val = tensor([1, 16, 64, 128])]; tensor embed_81_cast_fp16 = reshape(shape = var_5809, x = query_states_81_cast_fp16)[name = string("embed_81_cast_fp16")]; tensor var_5813 = const()[name = string("op_5813"), val = tensor([1, 2, 64, 128])]; tensor var_5814_cast_fp16 = reshape(shape = var_5813, x = key_states_81_cast_fp16)[name = string("op_5814_cast_fp16")]; tensor embed_83_perm_0 = const()[name = string("embed_83_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_5820 = const()[name = string("op_5820"), val = tensor([1, 2, 64, 128])]; tensor var_5821_cast_fp16 = reshape(shape = var_5820, x = value_states_81_cast_fp16)[name = string("op_5821_cast_fp16")]; tensor value_states_83_perm_0 = const()[name = string("value_states_83_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_5825_cast_fp16 = mul(x = embed_81_cast_fp16, y = cos_1_cast_fp16)[name = string("op_5825_cast_fp16")]; tensor var_5826_split_sizes_0 = const()[name = string("op_5826_split_sizes_0"), val = tensor([32, 32])]; int32 var_5826_axis_0 = const()[name = string("op_5826_axis_0"), val = int32(-2)]; tensor var_5826_cast_fp16_0, tensor var_5826_cast_fp16_1 = split(axis = var_5826_axis_0, split_sizes = var_5826_split_sizes_0, x = embed_81_cast_fp16)[name = string("op_5826_cast_fp16")]; fp16 const_207_promoted_to_fp16 = const()[name = string("const_207_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_5828_cast_fp16 = mul(x = var_5826_cast_fp16_1, y = const_207_promoted_to_fp16)[name = string("op_5828_cast_fp16")]; int32 var_5830 = const()[name = string("op_5830"), val = int32(-2)]; bool var_5831_interleave_0 = const()[name = string("op_5831_interleave_0"), val = bool(false)]; tensor var_5831_cast_fp16 = concat(axis = var_5830, interleave = var_5831_interleave_0, values = (var_5828_cast_fp16, var_5826_cast_fp16_0))[name = string("op_5831_cast_fp16")]; tensor var_5832_cast_fp16 = mul(x = var_5831_cast_fp16, y = sin_1_cast_fp16)[name = string("op_5832_cast_fp16")]; tensor query_states_83_cast_fp16 = add(x = var_5825_cast_fp16, y = var_5832_cast_fp16)[name = string("query_states_83_cast_fp16")]; tensor embed_83_cast_fp16 = transpose(perm = embed_83_perm_0, x = var_5814_cast_fp16)[name = string("transpose_11")]; tensor var_5835_cast_fp16 = mul(x = embed_83_cast_fp16, y = cos_cast_fp16)[name = string("op_5835_cast_fp16")]; tensor var_5836_split_sizes_0 = const()[name = string("op_5836_split_sizes_0"), val = tensor([32, 32])]; int32 var_5836_axis_0 = const()[name = string("op_5836_axis_0"), val = int32(-1)]; tensor var_5836_cast_fp16_0, tensor var_5836_cast_fp16_1 = split(axis = var_5836_axis_0, split_sizes = var_5836_split_sizes_0, x = embed_83_cast_fp16)[name = string("op_5836_cast_fp16")]; fp16 const_208_promoted_to_fp16 = const()[name = string("const_208_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_5838_cast_fp16 = mul(x = var_5836_cast_fp16_1, y = const_208_promoted_to_fp16)[name = string("op_5838_cast_fp16")]; int32 var_5840 = const()[name = string("op_5840"), val = int32(-1)]; bool var_5841_interleave_0 = const()[name = string("op_5841_interleave_0"), val = bool(false)]; tensor var_5841_cast_fp16 = concat(axis = var_5840, interleave = var_5841_interleave_0, values = (var_5838_cast_fp16, var_5836_cast_fp16_0))[name = string("op_5841_cast_fp16")]; tensor var_5842_cast_fp16 = mul(x = var_5841_cast_fp16, y = sin_cast_fp16)[name = string("op_5842_cast_fp16")]; tensor key_states_83_cast_fp16 = add(x = var_5835_cast_fp16, y = var_5842_cast_fp16)[name = string("key_states_83_cast_fp16")]; tensor expand_dims_201 = const()[name = string("expand_dims_201"), val = tensor([20])]; tensor expand_dims_202 = const()[name = string("expand_dims_202"), val = tensor([0])]; tensor expand_dims_204 = const()[name = string("expand_dims_204"), val = tensor([21])]; tensor concat_163_values3_0 = const()[name = string("concat_163_values3_0"), val = tensor([0])]; int32 concat_163_axis_0 = const()[name = string("concat_163_axis_0"), val = int32(0)]; bool concat_163_interleave_0 = const()[name = string("concat_163_interleave_0"), val = bool(false)]; tensor concat_163 = concat(axis = concat_163_axis_0, interleave = concat_163_interleave_0, values = (expand_dims_201, expand_dims_202, position_id, concat_163_values3_0))[name = string("concat_163")]; tensor concat_164_values1_0 = const()[name = string("concat_164_values1_0"), val = tensor([0])]; tensor concat_164_values3_0 = const()[name = string("concat_164_values3_0"), val = tensor([0])]; int32 concat_164_axis_0 = const()[name = string("concat_164_axis_0"), val = int32(0)]; bool concat_164_interleave_0 = const()[name = string("concat_164_interleave_0"), val = bool(false)]; tensor concat_164 = concat(axis = concat_164_axis_0, interleave = concat_164_interleave_0, values = (expand_dims_204, concat_164_values1_0, var_426, concat_164_values3_0))[name = string("concat_164")]; tensor key_cache_internal_tensor_assign_21_stride_0 = const()[name = string("key_cache_internal_tensor_assign_21_stride_0"), val = tensor([1, 1, 1, 1])]; tensor key_cache_internal_tensor_assign_21_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_21_begin_mask_0"), val = tensor([false, false, false, true])]; tensor key_cache_internal_tensor_assign_21_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_21_end_mask_0"), val = tensor([false, true, false, true])]; tensor key_cache_internal_tensor_assign_21_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_21_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor key_cache_internal_tensor_assign_21_cast_fp16 = slice_update(begin = concat_163, begin_mask = key_cache_internal_tensor_assign_21_begin_mask_0, end = concat_164, end_mask = key_cache_internal_tensor_assign_21_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_21_squeeze_mask_0, stride = key_cache_internal_tensor_assign_21_stride_0, update = key_states_83_cast_fp16, x = coreml_update_state_86)[name = string("key_cache_internal_tensor_assign_21_cast_fp16")]; write_state(data = key_cache_internal_tensor_assign_21_cast_fp16, input = key_cache)[name = string("coreml_update_state_158_write_state")]; tensor coreml_update_state_88 = read_state(input = key_cache)[name = string("coreml_update_state_158")]; tensor value_cache_internal_tensor_assign_21_stride_0 = const()[name = string("value_cache_internal_tensor_assign_21_stride_0"), val = tensor([1, 1, 1, 1])]; tensor value_cache_internal_tensor_assign_21_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_21_begin_mask_0"), val = tensor([false, false, false, true])]; tensor value_cache_internal_tensor_assign_21_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_21_end_mask_0"), val = tensor([false, true, false, true])]; tensor value_cache_internal_tensor_assign_21_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_21_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor value_states_83_cast_fp16 = transpose(perm = value_states_83_perm_0, x = var_5821_cast_fp16)[name = string("transpose_10")]; tensor value_cache_internal_tensor_assign_21_cast_fp16 = slice_update(begin = concat_163, begin_mask = value_cache_internal_tensor_assign_21_begin_mask_0, end = concat_164, end_mask = value_cache_internal_tensor_assign_21_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_21_squeeze_mask_0, stride = value_cache_internal_tensor_assign_21_stride_0, update = value_states_83_cast_fp16, x = coreml_update_state_87)[name = string("value_cache_internal_tensor_assign_21_cast_fp16")]; write_state(data = value_cache_internal_tensor_assign_21_cast_fp16, input = value_cache)[name = string("coreml_update_state_159_write_state")]; tensor coreml_update_state_89 = read_state(input = value_cache)[name = string("coreml_update_state_159")]; tensor var_5885_begin_0 = const()[name = string("op_5885_begin_0"), val = tensor([20, 0, 0, 0])]; tensor var_5885_end_0 = const()[name = string("op_5885_end_0"), val = tensor([21, 2, 2048, 64])]; tensor var_5885_end_mask_0 = const()[name = string("op_5885_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_5885_cast_fp16 = slice_by_index(begin = var_5885_begin_0, end = var_5885_end_0, end_mask = var_5885_end_mask_0, x = coreml_update_state_88)[name = string("op_5885_cast_fp16")]; tensor tile_40 = const()[name = string("tile_40"), val = tensor([1, 1])]; int32 var_5888_axis_0 = const()[name = string("op_5888_axis_0"), val = int32(1)]; tensor var_5888_cast_fp16_0, tensor var_5888_cast_fp16_1 = split(axis = var_5888_axis_0, split_sizes = tile_40, x = var_5885_cast_fp16)[name = string("op_5888_cast_fp16")]; tensor var_5895_begin_0 = const()[name = string("op_5895_begin_0"), val = tensor([20, 0, 0, 0])]; tensor var_5895_end_0 = const()[name = string("op_5895_end_0"), val = tensor([21, 2, 2048, 64])]; tensor var_5895_end_mask_0 = const()[name = string("op_5895_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_5895_cast_fp16 = slice_by_index(begin = var_5895_begin_0, end = var_5895_end_0, end_mask = var_5895_end_mask_0, x = coreml_update_state_89)[name = string("op_5895_cast_fp16")]; tensor tile_41 = const()[name = string("tile_41"), val = tensor([1, 1])]; int32 var_5898_axis_0 = const()[name = string("op_5898_axis_0"), val = int32(1)]; tensor var_5898_cast_fp16_0, tensor var_5898_cast_fp16_1 = split(axis = var_5898_axis_0, split_sizes = tile_41, x = var_5895_cast_fp16)[name = string("op_5898_cast_fp16")]; tensor var_5901_split_sizes_0 = const()[name = string("op_5901_split_sizes_0"), val = tensor([8, 8])]; int32 var_5901_axis_0 = const()[name = string("op_5901_axis_0"), val = int32(1)]; tensor var_5901_cast_fp16_0, tensor var_5901_cast_fp16_1 = split(axis = var_5901_axis_0, split_sizes = var_5901_split_sizes_0, x = query_states_83_cast_fp16)[name = string("op_5901_cast_fp16")]; bool attn_weights_321_transpose_x_0 = const()[name = string("attn_weights_321_transpose_x_0"), val = bool(false)]; bool attn_weights_321_transpose_y_0 = const()[name = string("attn_weights_321_transpose_y_0"), val = bool(false)]; tensor attn_weights_321_cast_fp16 = matmul(transpose_x = attn_weights_321_transpose_x_0, transpose_y = attn_weights_321_transpose_y_0, x = var_5888_cast_fp16_0, y = var_5901_cast_fp16_0)[name = string("attn_weights_321_cast_fp16")]; fp16 _inversed_attn_weights_323_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_323_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_323_cast_fp16 = mul(x = attn_weights_321_cast_fp16, y = _inversed_attn_weights_323_y_0_to_fp16)[name = string("_inversed_attn_weights_323_cast_fp16")]; tensor attn_weights_325_cast_fp16 = add(x = _inversed_attn_weights_323_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_325_cast_fp16")]; int32 var_5908 = const()[name = string("op_5908"), val = int32(2)]; tensor attn_weights_327_cast_fp16 = softmax(axis = var_5908, x = attn_weights_325_cast_fp16)[name = string("attn_weights_327_cast_fp16")]; bool var_5914_transpose_x_1 = const()[name = string("op_5914_transpose_x_1"), val = bool(true)]; bool var_5914_transpose_y_1 = const()[name = string("op_5914_transpose_y_1"), val = bool(false)]; tensor var_5914_cast_fp16 = matmul(transpose_x = var_5914_transpose_x_1, transpose_y = var_5914_transpose_y_1, x = attn_weights_327_cast_fp16, y = var_5898_cast_fp16_0)[name = string("op_5914_cast_fp16")]; bool attn_weights_329_transpose_x_0 = const()[name = string("attn_weights_329_transpose_x_0"), val = bool(false)]; bool attn_weights_329_transpose_y_0 = const()[name = string("attn_weights_329_transpose_y_0"), val = bool(false)]; tensor attn_weights_329_cast_fp16 = matmul(transpose_x = attn_weights_329_transpose_x_0, transpose_y = attn_weights_329_transpose_y_0, x = var_5888_cast_fp16_1, y = var_5901_cast_fp16_1)[name = string("attn_weights_329_cast_fp16")]; fp16 _inversed_attn_weights_331_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_331_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_331_cast_fp16 = mul(x = attn_weights_329_cast_fp16, y = _inversed_attn_weights_331_y_0_to_fp16)[name = string("_inversed_attn_weights_331_cast_fp16")]; tensor attn_weights_333_cast_fp16 = add(x = _inversed_attn_weights_331_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_333_cast_fp16")]; int32 var_5920 = const()[name = string("op_5920"), val = int32(2)]; tensor attn_weights_335_cast_fp16 = softmax(axis = var_5920, x = attn_weights_333_cast_fp16)[name = string("attn_weights_335_cast_fp16")]; bool attn_output_121_transpose_x_1 = const()[name = string("attn_output_121_transpose_x_1"), val = bool(true)]; bool attn_output_121_transpose_y_1 = const()[name = string("attn_output_121_transpose_y_1"), val = bool(false)]; tensor attn_output_121_cast_fp16 = matmul(transpose_x = attn_output_121_transpose_x_1, transpose_y = attn_output_121_transpose_y_1, x = attn_weights_335_cast_fp16, y = var_5898_cast_fp16_1)[name = string("attn_output_121_cast_fp16")]; int32 var_5928 = const()[name = string("op_5928"), val = int32(1)]; bool attn_output_123_interleave_0 = const()[name = string("attn_output_123_interleave_0"), val = bool(false)]; tensor attn_output_123_cast_fp16 = concat(axis = var_5928, interleave = attn_output_123_interleave_0, values = (var_5914_cast_fp16, attn_output_121_cast_fp16))[name = string("attn_output_123_cast_fp16")]; tensor var_5932_perm_0 = const()[name = string("op_5932_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_5937 = const()[name = string("op_5937"), val = tensor([1, 1024, 1, 128])]; tensor var_5932_cast_fp16 = transpose(perm = var_5932_perm_0, x = attn_output_123_cast_fp16)[name = string("transpose_9")]; tensor x_365_cast_fp16 = reshape(shape = var_5937, x = var_5932_cast_fp16)[name = string("x_365_cast_fp16")]; string hidden_states_123_pad_type_0 = const()[name = string("hidden_states_123_pad_type_0"), val = string("valid")]; tensor hidden_states_123_strides_0 = const()[name = string("hidden_states_123_strides_0"), val = tensor([1, 1])]; tensor hidden_states_123_pad_0 = const()[name = string("hidden_states_123_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_123_dilations_0 = const()[name = string("hidden_states_123_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_123_groups_0 = const()[name = string("hidden_states_123_groups_0"), val = int32(1)]; tensor var_5944_to_fp16 = const()[name = string("op_5944_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(608890688)))]; tensor hidden_states_123_cast_fp16 = conv(dilations = hidden_states_123_dilations_0, groups = hidden_states_123_groups_0, pad = hidden_states_123_pad_0, pad_type = hidden_states_123_pad_type_0, strides = hidden_states_123_strides_0, weight = var_5944_to_fp16, x = x_365_cast_fp16)[name = string("hidden_states_123_cast_fp16")]; tensor x_367_cast_fp16 = add(x = x_359_cast_fp16, y = hidden_states_123_cast_fp16)[name = string("x_367_cast_fp16")]; int32 var_5956 = const()[name = string("op_5956"), val = int32(1)]; fp16 const_213_promoted_to_fp16 = const()[name = string("const_213_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_5959_cast_fp16 = mul(x = x_367_cast_fp16, y = const_213_promoted_to_fp16)[name = string("op_5959_cast_fp16")]; bool x_369_interleave_0 = const()[name = string("x_369_interleave_0"), val = bool(false)]; tensor x_369_cast_fp16 = concat(axis = var_5956, interleave = x_369_interleave_0, values = (x_367_cast_fp16, var_5959_cast_fp16))[name = string("x_369_cast_fp16")]; tensor out_247_axes_0 = const()[name = string("out_247_axes_0"), val = tensor([1])]; fp16 var_5969_to_fp16 = const()[name = string("op_5969_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_247_cast_fp16 = layer_norm(axes = out_247_axes_0, epsilon = var_5969_to_fp16, x = x_369_cast_fp16)[name = string("out_247_cast_fp16")]; tensor layer_layers_20_post_attention_layernorm_weight_to_fp16 = const()[name = string("layer_layers_20_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(610987904)))]; tensor out_249_cast_fp16 = mul(x = out_247_cast_fp16, y = layer_layers_20_post_attention_layernorm_weight_to_fp16)[name = string("out_249_cast_fp16")]; tensor var_5975_split_sizes_0 = const()[name = string("op_5975_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_5975_axis_0 = const()[name = string("op_5975_axis_0"), val = int32(1)]; tensor var_5975_cast_fp16_0, tensor var_5975_cast_fp16_1 = split(axis = var_5975_axis_0, split_sizes = var_5975_split_sizes_0, x = out_249_cast_fp16)[name = string("op_5975_cast_fp16")]; string input_41_pad_type_0 = const()[name = string("input_41_pad_type_0"), val = string("valid")]; tensor input_41_strides_0 = const()[name = string("input_41_strides_0"), val = tensor([1, 1])]; tensor input_41_pad_0 = const()[name = string("input_41_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_41_dilations_0 = const()[name = string("input_41_dilations_0"), val = tensor([1, 1])]; int32 input_41_groups_0 = const()[name = string("input_41_groups_0"), val = int32(1)]; tensor var_5980_to_fp16 = const()[name = string("op_5980_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(610992064)))]; tensor input_41_cast_fp16 = conv(dilations = input_41_dilations_0, groups = input_41_groups_0, pad = input_41_pad_0, pad_type = input_41_pad_type_0, strides = input_41_strides_0, weight = var_5980_to_fp16, x = var_5975_cast_fp16_0)[name = string("input_41_cast_fp16")]; tensor var_5991_cast_fp16 = silu(x = input_41_cast_fp16)[name = string("op_5991_cast_fp16")]; string var_5996_pad_type_0 = const()[name = string("op_5996_pad_type_0"), val = string("valid")]; tensor var_5996_strides_0 = const()[name = string("op_5996_strides_0"), val = tensor([1, 1])]; tensor var_5996_pad_0 = const()[name = string("op_5996_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_5996_dilations_0 = const()[name = string("op_5996_dilations_0"), val = tensor([1, 1])]; int32 var_5996_groups_0 = const()[name = string("op_5996_groups_0"), val = int32(1)]; tensor var_5979_to_fp16 = const()[name = string("op_5979_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(619380736)))]; tensor var_5996_cast_fp16 = conv(dilations = var_5996_dilations_0, groups = var_5996_groups_0, pad = var_5996_pad_0, pad_type = var_5996_pad_type_0, strides = var_5996_strides_0, weight = var_5979_to_fp16, x = var_5975_cast_fp16_0)[name = string("op_5996_cast_fp16")]; tensor x_375_cast_fp16 = mul(x = var_5991_cast_fp16, y = var_5996_cast_fp16)[name = string("x_375_cast_fp16")]; string hidden_states_125_pad_type_0 = const()[name = string("hidden_states_125_pad_type_0"), val = string("valid")]; tensor hidden_states_125_strides_0 = const()[name = string("hidden_states_125_strides_0"), val = tensor([1, 1])]; tensor hidden_states_125_pad_0 = const()[name = string("hidden_states_125_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_125_dilations_0 = const()[name = string("hidden_states_125_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_125_groups_0 = const()[name = string("hidden_states_125_groups_0"), val = int32(1)]; tensor var_5978_to_fp16 = const()[name = string("op_5978_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(627769408)))]; tensor hidden_states_125_cast_fp16 = conv(dilations = hidden_states_125_dilations_0, groups = hidden_states_125_groups_0, pad = hidden_states_125_pad_0, pad_type = hidden_states_125_pad_type_0, strides = hidden_states_125_strides_0, weight = var_5978_to_fp16, x = x_375_cast_fp16)[name = string("hidden_states_125_cast_fp16")]; tensor x_377_cast_fp16 = add(x = x_367_cast_fp16, y = hidden_states_125_cast_fp16)[name = string("x_377_cast_fp16")]; int32 var_6009 = const()[name = string("op_6009"), val = int32(1)]; fp16 const_214_promoted_to_fp16 = const()[name = string("const_214_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_6012_cast_fp16 = mul(x = x_377_cast_fp16, y = const_214_promoted_to_fp16)[name = string("op_6012_cast_fp16")]; bool x_379_interleave_0 = const()[name = string("x_379_interleave_0"), val = bool(false)]; tensor x_379_cast_fp16 = concat(axis = var_6009, interleave = x_379_interleave_0, values = (x_377_cast_fp16, var_6012_cast_fp16))[name = string("x_379_cast_fp16")]; tensor out_253_axes_0 = const()[name = string("out_253_axes_0"), val = tensor([1])]; fp16 var_6022_to_fp16 = const()[name = string("op_6022_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_253_cast_fp16 = layer_norm(axes = out_253_axes_0, epsilon = var_6022_to_fp16, x = x_379_cast_fp16)[name = string("out_253_cast_fp16")]; tensor layer_layers_21_input_layernorm_weight_to_fp16 = const()[name = string("layer_layers_21_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(636158080)))]; tensor out_255_cast_fp16 = mul(x = out_253_cast_fp16, y = layer_layers_21_input_layernorm_weight_to_fp16)[name = string("out_255_cast_fp16")]; tensor var_6028_split_sizes_0 = const()[name = string("op_6028_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_6028_axis_0 = const()[name = string("op_6028_axis_0"), val = int32(1)]; tensor var_6028_cast_fp16_0, tensor var_6028_cast_fp16_1 = split(axis = var_6028_axis_0, split_sizes = var_6028_split_sizes_0, x = out_255_cast_fp16)[name = string("op_6028_cast_fp16")]; string query_states_85_pad_type_0 = const()[name = string("query_states_85_pad_type_0"), val = string("valid")]; tensor query_states_85_strides_0 = const()[name = string("query_states_85_strides_0"), val = tensor([1, 1])]; tensor query_states_85_pad_0 = const()[name = string("query_states_85_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_states_85_dilations_0 = const()[name = string("query_states_85_dilations_0"), val = tensor([1, 1])]; int32 query_states_85_groups_0 = const()[name = string("query_states_85_groups_0"), val = int32(1)]; tensor var_6050_to_fp16 = const()[name = string("op_6050_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(636162240)))]; tensor query_states_85_cast_fp16 = conv(dilations = query_states_85_dilations_0, groups = query_states_85_groups_0, pad = query_states_85_pad_0, pad_type = query_states_85_pad_type_0, strides = query_states_85_strides_0, weight = var_6050_to_fp16, x = var_6028_cast_fp16_0)[name = string("query_states_85_cast_fp16")]; string key_states_85_pad_type_0 = const()[name = string("key_states_85_pad_type_0"), val = string("valid")]; tensor key_states_85_strides_0 = const()[name = string("key_states_85_strides_0"), val = tensor([1, 1])]; tensor key_states_85_pad_0 = const()[name = string("key_states_85_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_states_85_dilations_0 = const()[name = string("key_states_85_dilations_0"), val = tensor([1, 1])]; int32 key_states_85_groups_0 = const()[name = string("key_states_85_groups_0"), val = int32(1)]; tensor var_6061_to_fp16 = const()[name = string("op_6061_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(638259456)))]; tensor key_states_85_cast_fp16 = conv(dilations = key_states_85_dilations_0, groups = key_states_85_groups_0, pad = key_states_85_pad_0, pad_type = key_states_85_pad_type_0, strides = key_states_85_strides_0, weight = var_6061_to_fp16, x = var_6028_cast_fp16_0)[name = string("key_states_85_cast_fp16")]; string value_states_85_pad_type_0 = const()[name = string("value_states_85_pad_type_0"), val = string("valid")]; tensor value_states_85_strides_0 = const()[name = string("value_states_85_strides_0"), val = tensor([1, 1])]; tensor value_states_85_pad_0 = const()[name = string("value_states_85_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_states_85_dilations_0 = const()[name = string("value_states_85_dilations_0"), val = tensor([1, 1])]; int32 value_states_85_groups_0 = const()[name = string("value_states_85_groups_0"), val = int32(1)]; tensor var_6072_to_fp16 = const()[name = string("op_6072_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(638521664)))]; tensor value_states_85_cast_fp16 = conv(dilations = value_states_85_dilations_0, groups = value_states_85_groups_0, pad = value_states_85_pad_0, pad_type = value_states_85_pad_type_0, strides = value_states_85_strides_0, weight = var_6072_to_fp16, x = var_6028_cast_fp16_0)[name = string("value_states_85_cast_fp16")]; tensor var_6080 = const()[name = string("op_6080"), val = tensor([1, 16, 64, 128])]; tensor embed_85_cast_fp16 = reshape(shape = var_6080, x = query_states_85_cast_fp16)[name = string("embed_85_cast_fp16")]; tensor var_6084 = const()[name = string("op_6084"), val = tensor([1, 2, 64, 128])]; tensor var_6085_cast_fp16 = reshape(shape = var_6084, x = key_states_85_cast_fp16)[name = string("op_6085_cast_fp16")]; tensor embed_87_perm_0 = const()[name = string("embed_87_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_6091 = const()[name = string("op_6091"), val = tensor([1, 2, 64, 128])]; tensor var_6092_cast_fp16 = reshape(shape = var_6091, x = value_states_85_cast_fp16)[name = string("op_6092_cast_fp16")]; tensor value_states_87_perm_0 = const()[name = string("value_states_87_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_6096_cast_fp16 = mul(x = embed_85_cast_fp16, y = cos_1_cast_fp16)[name = string("op_6096_cast_fp16")]; tensor var_6097_split_sizes_0 = const()[name = string("op_6097_split_sizes_0"), val = tensor([32, 32])]; int32 var_6097_axis_0 = const()[name = string("op_6097_axis_0"), val = int32(-2)]; tensor var_6097_cast_fp16_0, tensor var_6097_cast_fp16_1 = split(axis = var_6097_axis_0, split_sizes = var_6097_split_sizes_0, x = embed_85_cast_fp16)[name = string("op_6097_cast_fp16")]; fp16 const_217_promoted_to_fp16 = const()[name = string("const_217_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_6099_cast_fp16 = mul(x = var_6097_cast_fp16_1, y = const_217_promoted_to_fp16)[name = string("op_6099_cast_fp16")]; int32 var_6101 = const()[name = string("op_6101"), val = int32(-2)]; bool var_6102_interleave_0 = const()[name = string("op_6102_interleave_0"), val = bool(false)]; tensor var_6102_cast_fp16 = concat(axis = var_6101, interleave = var_6102_interleave_0, values = (var_6099_cast_fp16, var_6097_cast_fp16_0))[name = string("op_6102_cast_fp16")]; tensor var_6103_cast_fp16 = mul(x = var_6102_cast_fp16, y = sin_1_cast_fp16)[name = string("op_6103_cast_fp16")]; tensor query_states_87_cast_fp16 = add(x = var_6096_cast_fp16, y = var_6103_cast_fp16)[name = string("query_states_87_cast_fp16")]; tensor embed_87_cast_fp16 = transpose(perm = embed_87_perm_0, x = var_6085_cast_fp16)[name = string("transpose_8")]; tensor var_6106_cast_fp16 = mul(x = embed_87_cast_fp16, y = cos_cast_fp16)[name = string("op_6106_cast_fp16")]; tensor var_6107_split_sizes_0 = const()[name = string("op_6107_split_sizes_0"), val = tensor([32, 32])]; int32 var_6107_axis_0 = const()[name = string("op_6107_axis_0"), val = int32(-1)]; tensor var_6107_cast_fp16_0, tensor var_6107_cast_fp16_1 = split(axis = var_6107_axis_0, split_sizes = var_6107_split_sizes_0, x = embed_87_cast_fp16)[name = string("op_6107_cast_fp16")]; fp16 const_218_promoted_to_fp16 = const()[name = string("const_218_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_6109_cast_fp16 = mul(x = var_6107_cast_fp16_1, y = const_218_promoted_to_fp16)[name = string("op_6109_cast_fp16")]; int32 var_6111 = const()[name = string("op_6111"), val = int32(-1)]; bool var_6112_interleave_0 = const()[name = string("op_6112_interleave_0"), val = bool(false)]; tensor var_6112_cast_fp16 = concat(axis = var_6111, interleave = var_6112_interleave_0, values = (var_6109_cast_fp16, var_6107_cast_fp16_0))[name = string("op_6112_cast_fp16")]; tensor var_6113_cast_fp16 = mul(x = var_6112_cast_fp16, y = sin_cast_fp16)[name = string("op_6113_cast_fp16")]; tensor key_states_87_cast_fp16 = add(x = var_6106_cast_fp16, y = var_6113_cast_fp16)[name = string("key_states_87_cast_fp16")]; tensor expand_dims_211 = const()[name = string("expand_dims_211"), val = tensor([21])]; tensor expand_dims_212 = const()[name = string("expand_dims_212"), val = tensor([0])]; tensor expand_dims_214 = const()[name = string("expand_dims_214"), val = tensor([22])]; tensor concat_171_values3_0 = const()[name = string("concat_171_values3_0"), val = tensor([0])]; int32 concat_171_axis_0 = const()[name = string("concat_171_axis_0"), val = int32(0)]; bool concat_171_interleave_0 = const()[name = string("concat_171_interleave_0"), val = bool(false)]; tensor concat_171 = concat(axis = concat_171_axis_0, interleave = concat_171_interleave_0, values = (expand_dims_211, expand_dims_212, position_id, concat_171_values3_0))[name = string("concat_171")]; tensor concat_172_values1_0 = const()[name = string("concat_172_values1_0"), val = tensor([0])]; tensor concat_172_values3_0 = const()[name = string("concat_172_values3_0"), val = tensor([0])]; int32 concat_172_axis_0 = const()[name = string("concat_172_axis_0"), val = int32(0)]; bool concat_172_interleave_0 = const()[name = string("concat_172_interleave_0"), val = bool(false)]; tensor concat_172 = concat(axis = concat_172_axis_0, interleave = concat_172_interleave_0, values = (expand_dims_214, concat_172_values1_0, var_426, concat_172_values3_0))[name = string("concat_172")]; tensor key_cache_internal_tensor_assign_22_stride_0 = const()[name = string("key_cache_internal_tensor_assign_22_stride_0"), val = tensor([1, 1, 1, 1])]; tensor key_cache_internal_tensor_assign_22_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_22_begin_mask_0"), val = tensor([false, false, false, true])]; tensor key_cache_internal_tensor_assign_22_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_22_end_mask_0"), val = tensor([false, true, false, true])]; tensor key_cache_internal_tensor_assign_22_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_22_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor key_cache_internal_tensor_assign_22_cast_fp16 = slice_update(begin = concat_171, begin_mask = key_cache_internal_tensor_assign_22_begin_mask_0, end = concat_172, end_mask = key_cache_internal_tensor_assign_22_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_22_squeeze_mask_0, stride = key_cache_internal_tensor_assign_22_stride_0, update = key_states_87_cast_fp16, x = coreml_update_state_88)[name = string("key_cache_internal_tensor_assign_22_cast_fp16")]; write_state(data = key_cache_internal_tensor_assign_22_cast_fp16, input = key_cache)[name = string("coreml_update_state_160_write_state")]; tensor coreml_update_state_90 = read_state(input = key_cache)[name = string("coreml_update_state_160")]; tensor value_cache_internal_tensor_assign_22_stride_0 = const()[name = string("value_cache_internal_tensor_assign_22_stride_0"), val = tensor([1, 1, 1, 1])]; tensor value_cache_internal_tensor_assign_22_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_22_begin_mask_0"), val = tensor([false, false, false, true])]; tensor value_cache_internal_tensor_assign_22_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_22_end_mask_0"), val = tensor([false, true, false, true])]; tensor value_cache_internal_tensor_assign_22_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_22_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor value_states_87_cast_fp16 = transpose(perm = value_states_87_perm_0, x = var_6092_cast_fp16)[name = string("transpose_7")]; tensor value_cache_internal_tensor_assign_22_cast_fp16 = slice_update(begin = concat_171, begin_mask = value_cache_internal_tensor_assign_22_begin_mask_0, end = concat_172, end_mask = value_cache_internal_tensor_assign_22_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_22_squeeze_mask_0, stride = value_cache_internal_tensor_assign_22_stride_0, update = value_states_87_cast_fp16, x = coreml_update_state_89)[name = string("value_cache_internal_tensor_assign_22_cast_fp16")]; write_state(data = value_cache_internal_tensor_assign_22_cast_fp16, input = value_cache)[name = string("coreml_update_state_161_write_state")]; tensor coreml_update_state_91 = read_state(input = value_cache)[name = string("coreml_update_state_161")]; tensor var_6156_begin_0 = const()[name = string("op_6156_begin_0"), val = tensor([21, 0, 0, 0])]; tensor var_6156_end_0 = const()[name = string("op_6156_end_0"), val = tensor([22, 2, 2048, 64])]; tensor var_6156_end_mask_0 = const()[name = string("op_6156_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_6156_cast_fp16 = slice_by_index(begin = var_6156_begin_0, end = var_6156_end_0, end_mask = var_6156_end_mask_0, x = coreml_update_state_90)[name = string("op_6156_cast_fp16")]; tensor tile_42 = const()[name = string("tile_42"), val = tensor([1, 1])]; int32 var_6159_axis_0 = const()[name = string("op_6159_axis_0"), val = int32(1)]; tensor var_6159_cast_fp16_0, tensor var_6159_cast_fp16_1 = split(axis = var_6159_axis_0, split_sizes = tile_42, x = var_6156_cast_fp16)[name = string("op_6159_cast_fp16")]; tensor var_6166_begin_0 = const()[name = string("op_6166_begin_0"), val = tensor([21, 0, 0, 0])]; tensor var_6166_end_0 = const()[name = string("op_6166_end_0"), val = tensor([22, 2, 2048, 64])]; tensor var_6166_end_mask_0 = const()[name = string("op_6166_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_6166_cast_fp16 = slice_by_index(begin = var_6166_begin_0, end = var_6166_end_0, end_mask = var_6166_end_mask_0, x = coreml_update_state_91)[name = string("op_6166_cast_fp16")]; tensor tile_43 = const()[name = string("tile_43"), val = tensor([1, 1])]; int32 var_6169_axis_0 = const()[name = string("op_6169_axis_0"), val = int32(1)]; tensor var_6169_cast_fp16_0, tensor var_6169_cast_fp16_1 = split(axis = var_6169_axis_0, split_sizes = tile_43, x = var_6166_cast_fp16)[name = string("op_6169_cast_fp16")]; tensor var_6172_split_sizes_0 = const()[name = string("op_6172_split_sizes_0"), val = tensor([8, 8])]; int32 var_6172_axis_0 = const()[name = string("op_6172_axis_0"), val = int32(1)]; tensor var_6172_cast_fp16_0, tensor var_6172_cast_fp16_1 = split(axis = var_6172_axis_0, split_sizes = var_6172_split_sizes_0, x = query_states_87_cast_fp16)[name = string("op_6172_cast_fp16")]; bool attn_weights_337_transpose_x_0 = const()[name = string("attn_weights_337_transpose_x_0"), val = bool(false)]; bool attn_weights_337_transpose_y_0 = const()[name = string("attn_weights_337_transpose_y_0"), val = bool(false)]; tensor attn_weights_337_cast_fp16 = matmul(transpose_x = attn_weights_337_transpose_x_0, transpose_y = attn_weights_337_transpose_y_0, x = var_6159_cast_fp16_0, y = var_6172_cast_fp16_0)[name = string("attn_weights_337_cast_fp16")]; fp16 _inversed_attn_weights_339_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_339_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_339_cast_fp16 = mul(x = attn_weights_337_cast_fp16, y = _inversed_attn_weights_339_y_0_to_fp16)[name = string("_inversed_attn_weights_339_cast_fp16")]; tensor attn_weights_341_cast_fp16 = add(x = _inversed_attn_weights_339_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_341_cast_fp16")]; int32 var_6179 = const()[name = string("op_6179"), val = int32(2)]; tensor attn_weights_343_cast_fp16 = softmax(axis = var_6179, x = attn_weights_341_cast_fp16)[name = string("attn_weights_343_cast_fp16")]; bool var_6185_transpose_x_1 = const()[name = string("op_6185_transpose_x_1"), val = bool(true)]; bool var_6185_transpose_y_1 = const()[name = string("op_6185_transpose_y_1"), val = bool(false)]; tensor var_6185_cast_fp16 = matmul(transpose_x = var_6185_transpose_x_1, transpose_y = var_6185_transpose_y_1, x = attn_weights_343_cast_fp16, y = var_6169_cast_fp16_0)[name = string("op_6185_cast_fp16")]; bool attn_weights_345_transpose_x_0 = const()[name = string("attn_weights_345_transpose_x_0"), val = bool(false)]; bool attn_weights_345_transpose_y_0 = const()[name = string("attn_weights_345_transpose_y_0"), val = bool(false)]; tensor attn_weights_345_cast_fp16 = matmul(transpose_x = attn_weights_345_transpose_x_0, transpose_y = attn_weights_345_transpose_y_0, x = var_6159_cast_fp16_1, y = var_6172_cast_fp16_1)[name = string("attn_weights_345_cast_fp16")]; fp16 _inversed_attn_weights_347_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_347_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_347_cast_fp16 = mul(x = attn_weights_345_cast_fp16, y = _inversed_attn_weights_347_y_0_to_fp16)[name = string("_inversed_attn_weights_347_cast_fp16")]; tensor attn_weights_349_cast_fp16 = add(x = _inversed_attn_weights_347_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_349_cast_fp16")]; int32 var_6191 = const()[name = string("op_6191"), val = int32(2)]; tensor attn_weights_351_cast_fp16 = softmax(axis = var_6191, x = attn_weights_349_cast_fp16)[name = string("attn_weights_351_cast_fp16")]; bool attn_output_127_transpose_x_1 = const()[name = string("attn_output_127_transpose_x_1"), val = bool(true)]; bool attn_output_127_transpose_y_1 = const()[name = string("attn_output_127_transpose_y_1"), val = bool(false)]; tensor attn_output_127_cast_fp16 = matmul(transpose_x = attn_output_127_transpose_x_1, transpose_y = attn_output_127_transpose_y_1, x = attn_weights_351_cast_fp16, y = var_6169_cast_fp16_1)[name = string("attn_output_127_cast_fp16")]; int32 var_6199 = const()[name = string("op_6199"), val = int32(1)]; bool attn_output_129_interleave_0 = const()[name = string("attn_output_129_interleave_0"), val = bool(false)]; tensor attn_output_129_cast_fp16 = concat(axis = var_6199, interleave = attn_output_129_interleave_0, values = (var_6185_cast_fp16, attn_output_127_cast_fp16))[name = string("attn_output_129_cast_fp16")]; tensor var_6203_perm_0 = const()[name = string("op_6203_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_6208 = const()[name = string("op_6208"), val = tensor([1, 1024, 1, 128])]; tensor var_6203_cast_fp16 = transpose(perm = var_6203_perm_0, x = attn_output_129_cast_fp16)[name = string("transpose_6")]; tensor x_383_cast_fp16 = reshape(shape = var_6208, x = var_6203_cast_fp16)[name = string("x_383_cast_fp16")]; string hidden_states_129_pad_type_0 = const()[name = string("hidden_states_129_pad_type_0"), val = string("valid")]; tensor hidden_states_129_strides_0 = const()[name = string("hidden_states_129_strides_0"), val = tensor([1, 1])]; tensor hidden_states_129_pad_0 = const()[name = string("hidden_states_129_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_129_dilations_0 = const()[name = string("hidden_states_129_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_129_groups_0 = const()[name = string("hidden_states_129_groups_0"), val = int32(1)]; tensor var_6215_to_fp16 = const()[name = string("op_6215_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(638783872)))]; tensor hidden_states_129_cast_fp16 = conv(dilations = hidden_states_129_dilations_0, groups = hidden_states_129_groups_0, pad = hidden_states_129_pad_0, pad_type = hidden_states_129_pad_type_0, strides = hidden_states_129_strides_0, weight = var_6215_to_fp16, x = x_383_cast_fp16)[name = string("hidden_states_129_cast_fp16")]; tensor x_385_cast_fp16 = add(x = x_377_cast_fp16, y = hidden_states_129_cast_fp16)[name = string("x_385_cast_fp16")]; int32 var_6227 = const()[name = string("op_6227"), val = int32(1)]; fp16 const_223_promoted_to_fp16 = const()[name = string("const_223_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_6230_cast_fp16 = mul(x = x_385_cast_fp16, y = const_223_promoted_to_fp16)[name = string("op_6230_cast_fp16")]; bool x_387_interleave_0 = const()[name = string("x_387_interleave_0"), val = bool(false)]; tensor x_387_cast_fp16 = concat(axis = var_6227, interleave = x_387_interleave_0, values = (x_385_cast_fp16, var_6230_cast_fp16))[name = string("x_387_cast_fp16")]; tensor out_259_axes_0 = const()[name = string("out_259_axes_0"), val = tensor([1])]; fp16 var_6240_to_fp16 = const()[name = string("op_6240_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_259_cast_fp16 = layer_norm(axes = out_259_axes_0, epsilon = var_6240_to_fp16, x = x_387_cast_fp16)[name = string("out_259_cast_fp16")]; tensor layer_layers_21_post_attention_layernorm_weight_to_fp16 = const()[name = string("layer_layers_21_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(640881088)))]; tensor out_261_cast_fp16 = mul(x = out_259_cast_fp16, y = layer_layers_21_post_attention_layernorm_weight_to_fp16)[name = string("out_261_cast_fp16")]; tensor var_6246_split_sizes_0 = const()[name = string("op_6246_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_6246_axis_0 = const()[name = string("op_6246_axis_0"), val = int32(1)]; tensor var_6246_cast_fp16_0, tensor var_6246_cast_fp16_1 = split(axis = var_6246_axis_0, split_sizes = var_6246_split_sizes_0, x = out_261_cast_fp16)[name = string("op_6246_cast_fp16")]; string input_43_pad_type_0 = const()[name = string("input_43_pad_type_0"), val = string("valid")]; tensor input_43_strides_0 = const()[name = string("input_43_strides_0"), val = tensor([1, 1])]; tensor input_43_pad_0 = const()[name = string("input_43_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_43_dilations_0 = const()[name = string("input_43_dilations_0"), val = tensor([1, 1])]; int32 input_43_groups_0 = const()[name = string("input_43_groups_0"), val = int32(1)]; tensor var_6251_to_fp16 = const()[name = string("op_6251_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(640885248)))]; tensor input_43_cast_fp16 = conv(dilations = input_43_dilations_0, groups = input_43_groups_0, pad = input_43_pad_0, pad_type = input_43_pad_type_0, strides = input_43_strides_0, weight = var_6251_to_fp16, x = var_6246_cast_fp16_0)[name = string("input_43_cast_fp16")]; tensor var_6262_cast_fp16 = silu(x = input_43_cast_fp16)[name = string("op_6262_cast_fp16")]; string var_6267_pad_type_0 = const()[name = string("op_6267_pad_type_0"), val = string("valid")]; tensor var_6267_strides_0 = const()[name = string("op_6267_strides_0"), val = tensor([1, 1])]; tensor var_6267_pad_0 = const()[name = string("op_6267_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_6267_dilations_0 = const()[name = string("op_6267_dilations_0"), val = tensor([1, 1])]; int32 var_6267_groups_0 = const()[name = string("op_6267_groups_0"), val = int32(1)]; tensor var_6250_to_fp16 = const()[name = string("op_6250_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(649273920)))]; tensor var_6267_cast_fp16 = conv(dilations = var_6267_dilations_0, groups = var_6267_groups_0, pad = var_6267_pad_0, pad_type = var_6267_pad_type_0, strides = var_6267_strides_0, weight = var_6250_to_fp16, x = var_6246_cast_fp16_0)[name = string("op_6267_cast_fp16")]; tensor x_393_cast_fp16 = mul(x = var_6262_cast_fp16, y = var_6267_cast_fp16)[name = string("x_393_cast_fp16")]; string hidden_states_131_pad_type_0 = const()[name = string("hidden_states_131_pad_type_0"), val = string("valid")]; tensor hidden_states_131_strides_0 = const()[name = string("hidden_states_131_strides_0"), val = tensor([1, 1])]; tensor hidden_states_131_pad_0 = const()[name = string("hidden_states_131_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_131_dilations_0 = const()[name = string("hidden_states_131_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_131_groups_0 = const()[name = string("hidden_states_131_groups_0"), val = int32(1)]; tensor var_6249_to_fp16 = const()[name = string("op_6249_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(657662592)))]; tensor hidden_states_131_cast_fp16 = conv(dilations = hidden_states_131_dilations_0, groups = hidden_states_131_groups_0, pad = hidden_states_131_pad_0, pad_type = hidden_states_131_pad_type_0, strides = hidden_states_131_strides_0, weight = var_6249_to_fp16, x = x_393_cast_fp16)[name = string("hidden_states_131_cast_fp16")]; tensor x_395_cast_fp16 = add(x = x_385_cast_fp16, y = hidden_states_131_cast_fp16)[name = string("x_395_cast_fp16")]; int32 var_6280 = const()[name = string("op_6280"), val = int32(1)]; fp16 const_224_promoted_to_fp16 = const()[name = string("const_224_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_6283_cast_fp16 = mul(x = x_395_cast_fp16, y = const_224_promoted_to_fp16)[name = string("op_6283_cast_fp16")]; bool x_397_interleave_0 = const()[name = string("x_397_interleave_0"), val = bool(false)]; tensor x_397_cast_fp16 = concat(axis = var_6280, interleave = x_397_interleave_0, values = (x_395_cast_fp16, var_6283_cast_fp16))[name = string("x_397_cast_fp16")]; tensor out_265_axes_0 = const()[name = string("out_265_axes_0"), val = tensor([1])]; fp16 var_6293_to_fp16 = const()[name = string("op_6293_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_265_cast_fp16 = layer_norm(axes = out_265_axes_0, epsilon = var_6293_to_fp16, x = x_397_cast_fp16)[name = string("out_265_cast_fp16")]; tensor layer_layers_22_input_layernorm_weight_to_fp16 = const()[name = string("layer_layers_22_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(666051264)))]; tensor out_267_cast_fp16 = mul(x = out_265_cast_fp16, y = layer_layers_22_input_layernorm_weight_to_fp16)[name = string("out_267_cast_fp16")]; tensor var_6299_split_sizes_0 = const()[name = string("op_6299_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_6299_axis_0 = const()[name = string("op_6299_axis_0"), val = int32(1)]; tensor var_6299_cast_fp16_0, tensor var_6299_cast_fp16_1 = split(axis = var_6299_axis_0, split_sizes = var_6299_split_sizes_0, x = out_267_cast_fp16)[name = string("op_6299_cast_fp16")]; string query_states_89_pad_type_0 = const()[name = string("query_states_89_pad_type_0"), val = string("valid")]; tensor query_states_89_strides_0 = const()[name = string("query_states_89_strides_0"), val = tensor([1, 1])]; tensor query_states_89_pad_0 = const()[name = string("query_states_89_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_states_89_dilations_0 = const()[name = string("query_states_89_dilations_0"), val = tensor([1, 1])]; int32 query_states_89_groups_0 = const()[name = string("query_states_89_groups_0"), val = int32(1)]; tensor var_6321_to_fp16 = const()[name = string("op_6321_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(666055424)))]; tensor query_states_89_cast_fp16 = conv(dilations = query_states_89_dilations_0, groups = query_states_89_groups_0, pad = query_states_89_pad_0, pad_type = query_states_89_pad_type_0, strides = query_states_89_strides_0, weight = var_6321_to_fp16, x = var_6299_cast_fp16_0)[name = string("query_states_89_cast_fp16")]; string key_states_89_pad_type_0 = const()[name = string("key_states_89_pad_type_0"), val = string("valid")]; tensor key_states_89_strides_0 = const()[name = string("key_states_89_strides_0"), val = tensor([1, 1])]; tensor key_states_89_pad_0 = const()[name = string("key_states_89_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_states_89_dilations_0 = const()[name = string("key_states_89_dilations_0"), val = tensor([1, 1])]; int32 key_states_89_groups_0 = const()[name = string("key_states_89_groups_0"), val = int32(1)]; tensor var_6332_to_fp16 = const()[name = string("op_6332_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(668152640)))]; tensor key_states_89_cast_fp16 = conv(dilations = key_states_89_dilations_0, groups = key_states_89_groups_0, pad = key_states_89_pad_0, pad_type = key_states_89_pad_type_0, strides = key_states_89_strides_0, weight = var_6332_to_fp16, x = var_6299_cast_fp16_0)[name = string("key_states_89_cast_fp16")]; string value_states_89_pad_type_0 = const()[name = string("value_states_89_pad_type_0"), val = string("valid")]; tensor value_states_89_strides_0 = const()[name = string("value_states_89_strides_0"), val = tensor([1, 1])]; tensor value_states_89_pad_0 = const()[name = string("value_states_89_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_states_89_dilations_0 = const()[name = string("value_states_89_dilations_0"), val = tensor([1, 1])]; int32 value_states_89_groups_0 = const()[name = string("value_states_89_groups_0"), val = int32(1)]; tensor var_6343_to_fp16 = const()[name = string("op_6343_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(668414848)))]; tensor value_states_89_cast_fp16 = conv(dilations = value_states_89_dilations_0, groups = value_states_89_groups_0, pad = value_states_89_pad_0, pad_type = value_states_89_pad_type_0, strides = value_states_89_strides_0, weight = var_6343_to_fp16, x = var_6299_cast_fp16_0)[name = string("value_states_89_cast_fp16")]; tensor var_6351 = const()[name = string("op_6351"), val = tensor([1, 16, 64, 128])]; tensor embed_89_cast_fp16 = reshape(shape = var_6351, x = query_states_89_cast_fp16)[name = string("embed_89_cast_fp16")]; tensor var_6355 = const()[name = string("op_6355"), val = tensor([1, 2, 64, 128])]; tensor var_6356_cast_fp16 = reshape(shape = var_6355, x = key_states_89_cast_fp16)[name = string("op_6356_cast_fp16")]; tensor embed_91_perm_0 = const()[name = string("embed_91_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_6362 = const()[name = string("op_6362"), val = tensor([1, 2, 64, 128])]; tensor var_6363_cast_fp16 = reshape(shape = var_6362, x = value_states_89_cast_fp16)[name = string("op_6363_cast_fp16")]; tensor value_states_91_perm_0 = const()[name = string("value_states_91_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_6367_cast_fp16 = mul(x = embed_89_cast_fp16, y = cos_1_cast_fp16)[name = string("op_6367_cast_fp16")]; tensor var_6368_split_sizes_0 = const()[name = string("op_6368_split_sizes_0"), val = tensor([32, 32])]; int32 var_6368_axis_0 = const()[name = string("op_6368_axis_0"), val = int32(-2)]; tensor var_6368_cast_fp16_0, tensor var_6368_cast_fp16_1 = split(axis = var_6368_axis_0, split_sizes = var_6368_split_sizes_0, x = embed_89_cast_fp16)[name = string("op_6368_cast_fp16")]; fp16 const_227_promoted_to_fp16 = const()[name = string("const_227_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_6370_cast_fp16 = mul(x = var_6368_cast_fp16_1, y = const_227_promoted_to_fp16)[name = string("op_6370_cast_fp16")]; int32 var_6372 = const()[name = string("op_6372"), val = int32(-2)]; bool var_6373_interleave_0 = const()[name = string("op_6373_interleave_0"), val = bool(false)]; tensor var_6373_cast_fp16 = concat(axis = var_6372, interleave = var_6373_interleave_0, values = (var_6370_cast_fp16, var_6368_cast_fp16_0))[name = string("op_6373_cast_fp16")]; tensor var_6374_cast_fp16 = mul(x = var_6373_cast_fp16, y = sin_1_cast_fp16)[name = string("op_6374_cast_fp16")]; tensor query_states_91_cast_fp16 = add(x = var_6367_cast_fp16, y = var_6374_cast_fp16)[name = string("query_states_91_cast_fp16")]; tensor embed_91_cast_fp16 = transpose(perm = embed_91_perm_0, x = var_6356_cast_fp16)[name = string("transpose_5")]; tensor var_6377_cast_fp16 = mul(x = embed_91_cast_fp16, y = cos_cast_fp16)[name = string("op_6377_cast_fp16")]; tensor var_6378_split_sizes_0 = const()[name = string("op_6378_split_sizes_0"), val = tensor([32, 32])]; int32 var_6378_axis_0 = const()[name = string("op_6378_axis_0"), val = int32(-1)]; tensor var_6378_cast_fp16_0, tensor var_6378_cast_fp16_1 = split(axis = var_6378_axis_0, split_sizes = var_6378_split_sizes_0, x = embed_91_cast_fp16)[name = string("op_6378_cast_fp16")]; fp16 const_228_promoted_to_fp16 = const()[name = string("const_228_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_6380_cast_fp16 = mul(x = var_6378_cast_fp16_1, y = const_228_promoted_to_fp16)[name = string("op_6380_cast_fp16")]; int32 var_6382 = const()[name = string("op_6382"), val = int32(-1)]; bool var_6383_interleave_0 = const()[name = string("op_6383_interleave_0"), val = bool(false)]; tensor var_6383_cast_fp16 = concat(axis = var_6382, interleave = var_6383_interleave_0, values = (var_6380_cast_fp16, var_6378_cast_fp16_0))[name = string("op_6383_cast_fp16")]; tensor var_6384_cast_fp16 = mul(x = var_6383_cast_fp16, y = sin_cast_fp16)[name = string("op_6384_cast_fp16")]; tensor key_states_91_cast_fp16 = add(x = var_6377_cast_fp16, y = var_6384_cast_fp16)[name = string("key_states_91_cast_fp16")]; tensor expand_dims_221 = const()[name = string("expand_dims_221"), val = tensor([22])]; tensor expand_dims_222 = const()[name = string("expand_dims_222"), val = tensor([0])]; tensor expand_dims_224 = const()[name = string("expand_dims_224"), val = tensor([23])]; tensor concat_179_values3_0 = const()[name = string("concat_179_values3_0"), val = tensor([0])]; int32 concat_179_axis_0 = const()[name = string("concat_179_axis_0"), val = int32(0)]; bool concat_179_interleave_0 = const()[name = string("concat_179_interleave_0"), val = bool(false)]; tensor concat_179 = concat(axis = concat_179_axis_0, interleave = concat_179_interleave_0, values = (expand_dims_221, expand_dims_222, position_id, concat_179_values3_0))[name = string("concat_179")]; tensor concat_180_values1_0 = const()[name = string("concat_180_values1_0"), val = tensor([0])]; tensor concat_180_values3_0 = const()[name = string("concat_180_values3_0"), val = tensor([0])]; int32 concat_180_axis_0 = const()[name = string("concat_180_axis_0"), val = int32(0)]; bool concat_180_interleave_0 = const()[name = string("concat_180_interleave_0"), val = bool(false)]; tensor concat_180 = concat(axis = concat_180_axis_0, interleave = concat_180_interleave_0, values = (expand_dims_224, concat_180_values1_0, var_426, concat_180_values3_0))[name = string("concat_180")]; tensor key_cache_internal_tensor_assign_23_stride_0 = const()[name = string("key_cache_internal_tensor_assign_23_stride_0"), val = tensor([1, 1, 1, 1])]; tensor key_cache_internal_tensor_assign_23_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_23_begin_mask_0"), val = tensor([false, false, false, true])]; tensor key_cache_internal_tensor_assign_23_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_23_end_mask_0"), val = tensor([false, true, false, true])]; tensor key_cache_internal_tensor_assign_23_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_23_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor key_cache_internal_tensor_assign_23_cast_fp16 = slice_update(begin = concat_179, begin_mask = key_cache_internal_tensor_assign_23_begin_mask_0, end = concat_180, end_mask = key_cache_internal_tensor_assign_23_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_23_squeeze_mask_0, stride = key_cache_internal_tensor_assign_23_stride_0, update = key_states_91_cast_fp16, x = coreml_update_state_90)[name = string("key_cache_internal_tensor_assign_23_cast_fp16")]; write_state(data = key_cache_internal_tensor_assign_23_cast_fp16, input = key_cache)[name = string("coreml_update_state_162_write_state")]; tensor coreml_update_state_92 = read_state(input = key_cache)[name = string("coreml_update_state_162")]; tensor value_cache_internal_tensor_assign_23_stride_0 = const()[name = string("value_cache_internal_tensor_assign_23_stride_0"), val = tensor([1, 1, 1, 1])]; tensor value_cache_internal_tensor_assign_23_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_23_begin_mask_0"), val = tensor([false, false, false, true])]; tensor value_cache_internal_tensor_assign_23_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_23_end_mask_0"), val = tensor([false, true, false, true])]; tensor value_cache_internal_tensor_assign_23_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_23_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor value_states_91_cast_fp16 = transpose(perm = value_states_91_perm_0, x = var_6363_cast_fp16)[name = string("transpose_4")]; tensor value_cache_internal_tensor_assign_23_cast_fp16 = slice_update(begin = concat_179, begin_mask = value_cache_internal_tensor_assign_23_begin_mask_0, end = concat_180, end_mask = value_cache_internal_tensor_assign_23_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_23_squeeze_mask_0, stride = value_cache_internal_tensor_assign_23_stride_0, update = value_states_91_cast_fp16, x = coreml_update_state_91)[name = string("value_cache_internal_tensor_assign_23_cast_fp16")]; write_state(data = value_cache_internal_tensor_assign_23_cast_fp16, input = value_cache)[name = string("coreml_update_state_163_write_state")]; tensor coreml_update_state_93 = read_state(input = value_cache)[name = string("coreml_update_state_163")]; tensor var_6427_begin_0 = const()[name = string("op_6427_begin_0"), val = tensor([22, 0, 0, 0])]; tensor var_6427_end_0 = const()[name = string("op_6427_end_0"), val = tensor([23, 2, 2048, 64])]; tensor var_6427_end_mask_0 = const()[name = string("op_6427_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_6427_cast_fp16 = slice_by_index(begin = var_6427_begin_0, end = var_6427_end_0, end_mask = var_6427_end_mask_0, x = coreml_update_state_92)[name = string("op_6427_cast_fp16")]; tensor tile_44 = const()[name = string("tile_44"), val = tensor([1, 1])]; int32 var_6430_axis_0 = const()[name = string("op_6430_axis_0"), val = int32(1)]; tensor var_6430_cast_fp16_0, tensor var_6430_cast_fp16_1 = split(axis = var_6430_axis_0, split_sizes = tile_44, x = var_6427_cast_fp16)[name = string("op_6430_cast_fp16")]; tensor var_6437_begin_0 = const()[name = string("op_6437_begin_0"), val = tensor([22, 0, 0, 0])]; tensor var_6437_end_0 = const()[name = string("op_6437_end_0"), val = tensor([23, 2, 2048, 64])]; tensor var_6437_end_mask_0 = const()[name = string("op_6437_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_6437_cast_fp16 = slice_by_index(begin = var_6437_begin_0, end = var_6437_end_0, end_mask = var_6437_end_mask_0, x = coreml_update_state_93)[name = string("op_6437_cast_fp16")]; tensor tile_45 = const()[name = string("tile_45"), val = tensor([1, 1])]; int32 var_6440_axis_0 = const()[name = string("op_6440_axis_0"), val = int32(1)]; tensor var_6440_cast_fp16_0, tensor var_6440_cast_fp16_1 = split(axis = var_6440_axis_0, split_sizes = tile_45, x = var_6437_cast_fp16)[name = string("op_6440_cast_fp16")]; tensor var_6443_split_sizes_0 = const()[name = string("op_6443_split_sizes_0"), val = tensor([8, 8])]; int32 var_6443_axis_0 = const()[name = string("op_6443_axis_0"), val = int32(1)]; tensor var_6443_cast_fp16_0, tensor var_6443_cast_fp16_1 = split(axis = var_6443_axis_0, split_sizes = var_6443_split_sizes_0, x = query_states_91_cast_fp16)[name = string("op_6443_cast_fp16")]; bool attn_weights_353_transpose_x_0 = const()[name = string("attn_weights_353_transpose_x_0"), val = bool(false)]; bool attn_weights_353_transpose_y_0 = const()[name = string("attn_weights_353_transpose_y_0"), val = bool(false)]; tensor attn_weights_353_cast_fp16 = matmul(transpose_x = attn_weights_353_transpose_x_0, transpose_y = attn_weights_353_transpose_y_0, x = var_6430_cast_fp16_0, y = var_6443_cast_fp16_0)[name = string("attn_weights_353_cast_fp16")]; fp16 _inversed_attn_weights_355_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_355_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_355_cast_fp16 = mul(x = attn_weights_353_cast_fp16, y = _inversed_attn_weights_355_y_0_to_fp16)[name = string("_inversed_attn_weights_355_cast_fp16")]; tensor attn_weights_357_cast_fp16 = add(x = _inversed_attn_weights_355_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_357_cast_fp16")]; int32 var_6450 = const()[name = string("op_6450"), val = int32(2)]; tensor attn_weights_359_cast_fp16 = softmax(axis = var_6450, x = attn_weights_357_cast_fp16)[name = string("attn_weights_359_cast_fp16")]; bool var_6456_transpose_x_1 = const()[name = string("op_6456_transpose_x_1"), val = bool(true)]; bool var_6456_transpose_y_1 = const()[name = string("op_6456_transpose_y_1"), val = bool(false)]; tensor var_6456_cast_fp16 = matmul(transpose_x = var_6456_transpose_x_1, transpose_y = var_6456_transpose_y_1, x = attn_weights_359_cast_fp16, y = var_6440_cast_fp16_0)[name = string("op_6456_cast_fp16")]; bool attn_weights_361_transpose_x_0 = const()[name = string("attn_weights_361_transpose_x_0"), val = bool(false)]; bool attn_weights_361_transpose_y_0 = const()[name = string("attn_weights_361_transpose_y_0"), val = bool(false)]; tensor attn_weights_361_cast_fp16 = matmul(transpose_x = attn_weights_361_transpose_x_0, transpose_y = attn_weights_361_transpose_y_0, x = var_6430_cast_fp16_1, y = var_6443_cast_fp16_1)[name = string("attn_weights_361_cast_fp16")]; fp16 _inversed_attn_weights_363_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_363_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_363_cast_fp16 = mul(x = attn_weights_361_cast_fp16, y = _inversed_attn_weights_363_y_0_to_fp16)[name = string("_inversed_attn_weights_363_cast_fp16")]; tensor attn_weights_365_cast_fp16 = add(x = _inversed_attn_weights_363_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_365_cast_fp16")]; int32 var_6462 = const()[name = string("op_6462"), val = int32(2)]; tensor attn_weights_367_cast_fp16 = softmax(axis = var_6462, x = attn_weights_365_cast_fp16)[name = string("attn_weights_367_cast_fp16")]; bool attn_output_133_transpose_x_1 = const()[name = string("attn_output_133_transpose_x_1"), val = bool(true)]; bool attn_output_133_transpose_y_1 = const()[name = string("attn_output_133_transpose_y_1"), val = bool(false)]; tensor attn_output_133_cast_fp16 = matmul(transpose_x = attn_output_133_transpose_x_1, transpose_y = attn_output_133_transpose_y_1, x = attn_weights_367_cast_fp16, y = var_6440_cast_fp16_1)[name = string("attn_output_133_cast_fp16")]; int32 var_6470 = const()[name = string("op_6470"), val = int32(1)]; bool attn_output_135_interleave_0 = const()[name = string("attn_output_135_interleave_0"), val = bool(false)]; tensor attn_output_135_cast_fp16 = concat(axis = var_6470, interleave = attn_output_135_interleave_0, values = (var_6456_cast_fp16, attn_output_133_cast_fp16))[name = string("attn_output_135_cast_fp16")]; tensor var_6474_perm_0 = const()[name = string("op_6474_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_6479 = const()[name = string("op_6479"), val = tensor([1, 1024, 1, 128])]; tensor var_6474_cast_fp16 = transpose(perm = var_6474_perm_0, x = attn_output_135_cast_fp16)[name = string("transpose_3")]; tensor x_401_cast_fp16 = reshape(shape = var_6479, x = var_6474_cast_fp16)[name = string("x_401_cast_fp16")]; string hidden_states_135_pad_type_0 = const()[name = string("hidden_states_135_pad_type_0"), val = string("valid")]; tensor hidden_states_135_strides_0 = const()[name = string("hidden_states_135_strides_0"), val = tensor([1, 1])]; tensor hidden_states_135_pad_0 = const()[name = string("hidden_states_135_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_135_dilations_0 = const()[name = string("hidden_states_135_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_135_groups_0 = const()[name = string("hidden_states_135_groups_0"), val = int32(1)]; tensor var_6486_to_fp16 = const()[name = string("op_6486_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(668677056)))]; tensor hidden_states_135_cast_fp16 = conv(dilations = hidden_states_135_dilations_0, groups = hidden_states_135_groups_0, pad = hidden_states_135_pad_0, pad_type = hidden_states_135_pad_type_0, strides = hidden_states_135_strides_0, weight = var_6486_to_fp16, x = x_401_cast_fp16)[name = string("hidden_states_135_cast_fp16")]; tensor x_403_cast_fp16 = add(x = x_395_cast_fp16, y = hidden_states_135_cast_fp16)[name = string("x_403_cast_fp16")]; int32 var_6498 = const()[name = string("op_6498"), val = int32(1)]; fp16 const_233_promoted_to_fp16 = const()[name = string("const_233_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_6501_cast_fp16 = mul(x = x_403_cast_fp16, y = const_233_promoted_to_fp16)[name = string("op_6501_cast_fp16")]; bool x_405_interleave_0 = const()[name = string("x_405_interleave_0"), val = bool(false)]; tensor x_405_cast_fp16 = concat(axis = var_6498, interleave = x_405_interleave_0, values = (x_403_cast_fp16, var_6501_cast_fp16))[name = string("x_405_cast_fp16")]; tensor out_271_axes_0 = const()[name = string("out_271_axes_0"), val = tensor([1])]; fp16 var_6511_to_fp16 = const()[name = string("op_6511_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_271_cast_fp16 = layer_norm(axes = out_271_axes_0, epsilon = var_6511_to_fp16, x = x_405_cast_fp16)[name = string("out_271_cast_fp16")]; tensor layer_layers_22_post_attention_layernorm_weight_to_fp16 = const()[name = string("layer_layers_22_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(670774272)))]; tensor out_273_cast_fp16 = mul(x = out_271_cast_fp16, y = layer_layers_22_post_attention_layernorm_weight_to_fp16)[name = string("out_273_cast_fp16")]; tensor var_6517_split_sizes_0 = const()[name = string("op_6517_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_6517_axis_0 = const()[name = string("op_6517_axis_0"), val = int32(1)]; tensor var_6517_cast_fp16_0, tensor var_6517_cast_fp16_1 = split(axis = var_6517_axis_0, split_sizes = var_6517_split_sizes_0, x = out_273_cast_fp16)[name = string("op_6517_cast_fp16")]; string input_45_pad_type_0 = const()[name = string("input_45_pad_type_0"), val = string("valid")]; tensor input_45_strides_0 = const()[name = string("input_45_strides_0"), val = tensor([1, 1])]; tensor input_45_pad_0 = const()[name = string("input_45_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_45_dilations_0 = const()[name = string("input_45_dilations_0"), val = tensor([1, 1])]; int32 input_45_groups_0 = const()[name = string("input_45_groups_0"), val = int32(1)]; tensor var_6522_to_fp16 = const()[name = string("op_6522_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(670778432)))]; tensor input_45_cast_fp16 = conv(dilations = input_45_dilations_0, groups = input_45_groups_0, pad = input_45_pad_0, pad_type = input_45_pad_type_0, strides = input_45_strides_0, weight = var_6522_to_fp16, x = var_6517_cast_fp16_0)[name = string("input_45_cast_fp16")]; tensor var_6533_cast_fp16 = silu(x = input_45_cast_fp16)[name = string("op_6533_cast_fp16")]; string var_6538_pad_type_0 = const()[name = string("op_6538_pad_type_0"), val = string("valid")]; tensor var_6538_strides_0 = const()[name = string("op_6538_strides_0"), val = tensor([1, 1])]; tensor var_6538_pad_0 = const()[name = string("op_6538_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_6538_dilations_0 = const()[name = string("op_6538_dilations_0"), val = tensor([1, 1])]; int32 var_6538_groups_0 = const()[name = string("op_6538_groups_0"), val = int32(1)]; tensor var_6521_to_fp16 = const()[name = string("op_6521_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(679167104)))]; tensor var_6538_cast_fp16 = conv(dilations = var_6538_dilations_0, groups = var_6538_groups_0, pad = var_6538_pad_0, pad_type = var_6538_pad_type_0, strides = var_6538_strides_0, weight = var_6521_to_fp16, x = var_6517_cast_fp16_0)[name = string("op_6538_cast_fp16")]; tensor x_411_cast_fp16 = mul(x = var_6533_cast_fp16, y = var_6538_cast_fp16)[name = string("x_411_cast_fp16")]; string hidden_states_137_pad_type_0 = const()[name = string("hidden_states_137_pad_type_0"), val = string("valid")]; tensor hidden_states_137_strides_0 = const()[name = string("hidden_states_137_strides_0"), val = tensor([1, 1])]; tensor hidden_states_137_pad_0 = const()[name = string("hidden_states_137_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_137_dilations_0 = const()[name = string("hidden_states_137_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_137_groups_0 = const()[name = string("hidden_states_137_groups_0"), val = int32(1)]; tensor var_6520_to_fp16 = const()[name = string("op_6520_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(687555776)))]; tensor hidden_states_137_cast_fp16 = conv(dilations = hidden_states_137_dilations_0, groups = hidden_states_137_groups_0, pad = hidden_states_137_pad_0, pad_type = hidden_states_137_pad_type_0, strides = hidden_states_137_strides_0, weight = var_6520_to_fp16, x = x_411_cast_fp16)[name = string("hidden_states_137_cast_fp16")]; tensor x_413_cast_fp16 = add(x = x_403_cast_fp16, y = hidden_states_137_cast_fp16)[name = string("x_413_cast_fp16")]; int32 var_6551 = const()[name = string("op_6551"), val = int32(1)]; fp16 const_234_promoted_to_fp16 = const()[name = string("const_234_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_6554_cast_fp16 = mul(x = x_413_cast_fp16, y = const_234_promoted_to_fp16)[name = string("op_6554_cast_fp16")]; bool x_415_interleave_0 = const()[name = string("x_415_interleave_0"), val = bool(false)]; tensor x_415_cast_fp16 = concat(axis = var_6551, interleave = x_415_interleave_0, values = (x_413_cast_fp16, var_6554_cast_fp16))[name = string("x_415_cast_fp16")]; tensor out_277_axes_0 = const()[name = string("out_277_axes_0"), val = tensor([1])]; fp16 var_6564_to_fp16 = const()[name = string("op_6564_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_277_cast_fp16 = layer_norm(axes = out_277_axes_0, epsilon = var_6564_to_fp16, x = x_415_cast_fp16)[name = string("out_277_cast_fp16")]; tensor layer_layers_23_input_layernorm_weight_to_fp16 = const()[name = string("layer_layers_23_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(695944448)))]; tensor out_279_cast_fp16 = mul(x = out_277_cast_fp16, y = layer_layers_23_input_layernorm_weight_to_fp16)[name = string("out_279_cast_fp16")]; tensor var_6570_split_sizes_0 = const()[name = string("op_6570_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_6570_axis_0 = const()[name = string("op_6570_axis_0"), val = int32(1)]; tensor var_6570_cast_fp16_0, tensor var_6570_cast_fp16_1 = split(axis = var_6570_axis_0, split_sizes = var_6570_split_sizes_0, x = out_279_cast_fp16)[name = string("op_6570_cast_fp16")]; string query_states_93_pad_type_0 = const()[name = string("query_states_93_pad_type_0"), val = string("valid")]; tensor query_states_93_strides_0 = const()[name = string("query_states_93_strides_0"), val = tensor([1, 1])]; tensor query_states_93_pad_0 = const()[name = string("query_states_93_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_states_93_dilations_0 = const()[name = string("query_states_93_dilations_0"), val = tensor([1, 1])]; int32 query_states_93_groups_0 = const()[name = string("query_states_93_groups_0"), val = int32(1)]; tensor var_6592_to_fp16 = const()[name = string("op_6592_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(695948608)))]; tensor query_states_93_cast_fp16 = conv(dilations = query_states_93_dilations_0, groups = query_states_93_groups_0, pad = query_states_93_pad_0, pad_type = query_states_93_pad_type_0, strides = query_states_93_strides_0, weight = var_6592_to_fp16, x = var_6570_cast_fp16_0)[name = string("query_states_93_cast_fp16")]; string key_states_93_pad_type_0 = const()[name = string("key_states_93_pad_type_0"), val = string("valid")]; tensor key_states_93_strides_0 = const()[name = string("key_states_93_strides_0"), val = tensor([1, 1])]; tensor key_states_93_pad_0 = const()[name = string("key_states_93_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_states_93_dilations_0 = const()[name = string("key_states_93_dilations_0"), val = tensor([1, 1])]; int32 key_states_93_groups_0 = const()[name = string("key_states_93_groups_0"), val = int32(1)]; tensor var_6603_to_fp16 = const()[name = string("op_6603_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(698045824)))]; tensor key_states_93_cast_fp16 = conv(dilations = key_states_93_dilations_0, groups = key_states_93_groups_0, pad = key_states_93_pad_0, pad_type = key_states_93_pad_type_0, strides = key_states_93_strides_0, weight = var_6603_to_fp16, x = var_6570_cast_fp16_0)[name = string("key_states_93_cast_fp16")]; string value_states_93_pad_type_0 = const()[name = string("value_states_93_pad_type_0"), val = string("valid")]; tensor value_states_93_strides_0 = const()[name = string("value_states_93_strides_0"), val = tensor([1, 1])]; tensor value_states_93_pad_0 = const()[name = string("value_states_93_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_states_93_dilations_0 = const()[name = string("value_states_93_dilations_0"), val = tensor([1, 1])]; int32 value_states_93_groups_0 = const()[name = string("value_states_93_groups_0"), val = int32(1)]; tensor var_6614_to_fp16 = const()[name = string("op_6614_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(698308032)))]; tensor value_states_93_cast_fp16 = conv(dilations = value_states_93_dilations_0, groups = value_states_93_groups_0, pad = value_states_93_pad_0, pad_type = value_states_93_pad_type_0, strides = value_states_93_strides_0, weight = var_6614_to_fp16, x = var_6570_cast_fp16_0)[name = string("value_states_93_cast_fp16")]; tensor var_6622 = const()[name = string("op_6622"), val = tensor([1, 16, 64, 128])]; tensor embed_93_cast_fp16 = reshape(shape = var_6622, x = query_states_93_cast_fp16)[name = string("embed_93_cast_fp16")]; tensor var_6626 = const()[name = string("op_6626"), val = tensor([1, 2, 64, 128])]; tensor var_6627_cast_fp16 = reshape(shape = var_6626, x = key_states_93_cast_fp16)[name = string("op_6627_cast_fp16")]; tensor embed_perm_0 = const()[name = string("embed_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_6633 = const()[name = string("op_6633"), val = tensor([1, 2, 64, 128])]; tensor var_6634_cast_fp16 = reshape(shape = var_6633, x = value_states_93_cast_fp16)[name = string("op_6634_cast_fp16")]; tensor value_states_perm_0 = const()[name = string("value_states_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_6638_cast_fp16 = mul(x = embed_93_cast_fp16, y = cos_1_cast_fp16)[name = string("op_6638_cast_fp16")]; tensor var_6639_split_sizes_0 = const()[name = string("op_6639_split_sizes_0"), val = tensor([32, 32])]; int32 var_6639_axis_0 = const()[name = string("op_6639_axis_0"), val = int32(-2)]; tensor var_6639_cast_fp16_0, tensor var_6639_cast_fp16_1 = split(axis = var_6639_axis_0, split_sizes = var_6639_split_sizes_0, x = embed_93_cast_fp16)[name = string("op_6639_cast_fp16")]; fp16 const_237_promoted_to_fp16 = const()[name = string("const_237_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_6641_cast_fp16 = mul(x = var_6639_cast_fp16_1, y = const_237_promoted_to_fp16)[name = string("op_6641_cast_fp16")]; int32 var_6643 = const()[name = string("op_6643"), val = int32(-2)]; bool var_6644_interleave_0 = const()[name = string("op_6644_interleave_0"), val = bool(false)]; tensor var_6644_cast_fp16 = concat(axis = var_6643, interleave = var_6644_interleave_0, values = (var_6641_cast_fp16, var_6639_cast_fp16_0))[name = string("op_6644_cast_fp16")]; tensor var_6645_cast_fp16 = mul(x = var_6644_cast_fp16, y = sin_1_cast_fp16)[name = string("op_6645_cast_fp16")]; tensor query_states_cast_fp16 = add(x = var_6638_cast_fp16, y = var_6645_cast_fp16)[name = string("query_states_cast_fp16")]; tensor embed_cast_fp16 = transpose(perm = embed_perm_0, x = var_6627_cast_fp16)[name = string("transpose_2")]; tensor var_6648_cast_fp16 = mul(x = embed_cast_fp16, y = cos_cast_fp16)[name = string("op_6648_cast_fp16")]; tensor var_6649_split_sizes_0 = const()[name = string("op_6649_split_sizes_0"), val = tensor([32, 32])]; int32 var_6649_axis_0 = const()[name = string("op_6649_axis_0"), val = int32(-1)]; tensor var_6649_cast_fp16_0, tensor var_6649_cast_fp16_1 = split(axis = var_6649_axis_0, split_sizes = var_6649_split_sizes_0, x = embed_cast_fp16)[name = string("op_6649_cast_fp16")]; fp16 const_238_promoted_to_fp16 = const()[name = string("const_238_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_6651_cast_fp16 = mul(x = var_6649_cast_fp16_1, y = const_238_promoted_to_fp16)[name = string("op_6651_cast_fp16")]; int32 var_6653 = const()[name = string("op_6653"), val = int32(-1)]; bool var_6654_interleave_0 = const()[name = string("op_6654_interleave_0"), val = bool(false)]; tensor var_6654_cast_fp16 = concat(axis = var_6653, interleave = var_6654_interleave_0, values = (var_6651_cast_fp16, var_6649_cast_fp16_0))[name = string("op_6654_cast_fp16")]; tensor var_6655_cast_fp16 = mul(x = var_6654_cast_fp16, y = sin_cast_fp16)[name = string("op_6655_cast_fp16")]; tensor key_states_cast_fp16 = add(x = var_6648_cast_fp16, y = var_6655_cast_fp16)[name = string("key_states_cast_fp16")]; tensor expand_dims_231 = const()[name = string("expand_dims_231"), val = tensor([23])]; tensor expand_dims_232 = const()[name = string("expand_dims_232"), val = tensor([0])]; tensor expand_dims_234 = const()[name = string("expand_dims_234"), val = tensor([24])]; tensor concat_187_values3_0 = const()[name = string("concat_187_values3_0"), val = tensor([0])]; int32 concat_187_axis_0 = const()[name = string("concat_187_axis_0"), val = int32(0)]; bool concat_187_interleave_0 = const()[name = string("concat_187_interleave_0"), val = bool(false)]; tensor concat_187 = concat(axis = concat_187_axis_0, interleave = concat_187_interleave_0, values = (expand_dims_231, expand_dims_232, position_id, concat_187_values3_0))[name = string("concat_187")]; tensor concat_188_values1_0 = const()[name = string("concat_188_values1_0"), val = tensor([0])]; tensor concat_188_values3_0 = const()[name = string("concat_188_values3_0"), val = tensor([0])]; int32 concat_188_axis_0 = const()[name = string("concat_188_axis_0"), val = int32(0)]; bool concat_188_interleave_0 = const()[name = string("concat_188_interleave_0"), val = bool(false)]; tensor concat_188 = concat(axis = concat_188_axis_0, interleave = concat_188_interleave_0, values = (expand_dims_234, concat_188_values1_0, var_426, concat_188_values3_0))[name = string("concat_188")]; tensor key_cache_internal_tensor_assign_24_stride_0 = const()[name = string("key_cache_internal_tensor_assign_24_stride_0"), val = tensor([1, 1, 1, 1])]; tensor key_cache_internal_tensor_assign_24_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_24_begin_mask_0"), val = tensor([false, false, false, true])]; tensor key_cache_internal_tensor_assign_24_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_24_end_mask_0"), val = tensor([false, true, false, true])]; tensor key_cache_internal_tensor_assign_24_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_24_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor key_cache_internal_tensor_assign_24_cast_fp16 = slice_update(begin = concat_187, begin_mask = key_cache_internal_tensor_assign_24_begin_mask_0, end = concat_188, end_mask = key_cache_internal_tensor_assign_24_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_24_squeeze_mask_0, stride = key_cache_internal_tensor_assign_24_stride_0, update = key_states_cast_fp16, x = coreml_update_state_92)[name = string("key_cache_internal_tensor_assign_24_cast_fp16")]; write_state(data = key_cache_internal_tensor_assign_24_cast_fp16, input = key_cache)[name = string("coreml_update_state_164_write_state")]; tensor coreml_update_state_94 = read_state(input = key_cache)[name = string("coreml_update_state_164")]; tensor value_cache_internal_tensor_assign_24_stride_0 = const()[name = string("value_cache_internal_tensor_assign_24_stride_0"), val = tensor([1, 1, 1, 1])]; tensor value_cache_internal_tensor_assign_24_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_24_begin_mask_0"), val = tensor([false, false, false, true])]; tensor value_cache_internal_tensor_assign_24_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_24_end_mask_0"), val = tensor([false, true, false, true])]; tensor value_cache_internal_tensor_assign_24_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_24_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor value_states_cast_fp16 = transpose(perm = value_states_perm_0, x = var_6634_cast_fp16)[name = string("transpose_1")]; tensor value_cache_internal_tensor_assign_24_cast_fp16 = slice_update(begin = concat_187, begin_mask = value_cache_internal_tensor_assign_24_begin_mask_0, end = concat_188, end_mask = value_cache_internal_tensor_assign_24_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_24_squeeze_mask_0, stride = value_cache_internal_tensor_assign_24_stride_0, update = value_states_cast_fp16, x = coreml_update_state_93)[name = string("value_cache_internal_tensor_assign_24_cast_fp16")]; write_state(data = value_cache_internal_tensor_assign_24_cast_fp16, input = value_cache)[name = string("coreml_update_state_165_write_state")]; tensor coreml_update_state_95 = read_state(input = value_cache)[name = string("coreml_update_state_165")]; tensor var_6698_begin_0 = const()[name = string("op_6698_begin_0"), val = tensor([23, 0, 0, 0])]; tensor var_6698_end_0 = const()[name = string("op_6698_end_0"), val = tensor([1, 2, 2048, 64])]; tensor var_6698_end_mask_0 = const()[name = string("op_6698_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_6698_cast_fp16 = slice_by_index(begin = var_6698_begin_0, end = var_6698_end_0, end_mask = var_6698_end_mask_0, x = coreml_update_state_94)[name = string("op_6698_cast_fp16")]; tensor tile_46 = const()[name = string("tile_46"), val = tensor([1, 1])]; int32 var_6701_axis_0 = const()[name = string("op_6701_axis_0"), val = int32(1)]; tensor var_6701_cast_fp16_0, tensor var_6701_cast_fp16_1 = split(axis = var_6701_axis_0, split_sizes = tile_46, x = var_6698_cast_fp16)[name = string("op_6701_cast_fp16")]; tensor var_6708_begin_0 = const()[name = string("op_6708_begin_0"), val = tensor([23, 0, 0, 0])]; tensor var_6708_end_0 = const()[name = string("op_6708_end_0"), val = tensor([1, 2, 2048, 64])]; tensor var_6708_end_mask_0 = const()[name = string("op_6708_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_6708_cast_fp16 = slice_by_index(begin = var_6708_begin_0, end = var_6708_end_0, end_mask = var_6708_end_mask_0, x = coreml_update_state_95)[name = string("op_6708_cast_fp16")]; tensor tile_47 = const()[name = string("tile_47"), val = tensor([1, 1])]; int32 var_6711_axis_0 = const()[name = string("op_6711_axis_0"), val = int32(1)]; tensor var_6711_cast_fp16_0, tensor var_6711_cast_fp16_1 = split(axis = var_6711_axis_0, split_sizes = tile_47, x = var_6708_cast_fp16)[name = string("op_6711_cast_fp16")]; tensor var_6714_split_sizes_0 = const()[name = string("op_6714_split_sizes_0"), val = tensor([8, 8])]; int32 var_6714_axis_0 = const()[name = string("op_6714_axis_0"), val = int32(1)]; tensor var_6714_cast_fp16_0, tensor var_6714_cast_fp16_1 = split(axis = var_6714_axis_0, split_sizes = var_6714_split_sizes_0, x = query_states_cast_fp16)[name = string("op_6714_cast_fp16")]; bool attn_weights_369_transpose_x_0 = const()[name = string("attn_weights_369_transpose_x_0"), val = bool(false)]; bool attn_weights_369_transpose_y_0 = const()[name = string("attn_weights_369_transpose_y_0"), val = bool(false)]; tensor attn_weights_369_cast_fp16 = matmul(transpose_x = attn_weights_369_transpose_x_0, transpose_y = attn_weights_369_transpose_y_0, x = var_6701_cast_fp16_0, y = var_6714_cast_fp16_0)[name = string("attn_weights_369_cast_fp16")]; fp16 _inversed_attn_weights_371_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_371_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_371_cast_fp16 = mul(x = attn_weights_369_cast_fp16, y = _inversed_attn_weights_371_y_0_to_fp16)[name = string("_inversed_attn_weights_371_cast_fp16")]; tensor attn_weights_373_cast_fp16 = add(x = _inversed_attn_weights_371_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_373_cast_fp16")]; int32 var_6721 = const()[name = string("op_6721"), val = int32(2)]; tensor attn_weights_375_cast_fp16 = softmax(axis = var_6721, x = attn_weights_373_cast_fp16)[name = string("attn_weights_375_cast_fp16")]; bool var_6727_transpose_x_1 = const()[name = string("op_6727_transpose_x_1"), val = bool(true)]; bool var_6727_transpose_y_1 = const()[name = string("op_6727_transpose_y_1"), val = bool(false)]; tensor var_6727_cast_fp16 = matmul(transpose_x = var_6727_transpose_x_1, transpose_y = var_6727_transpose_y_1, x = attn_weights_375_cast_fp16, y = var_6711_cast_fp16_0)[name = string("op_6727_cast_fp16")]; bool attn_weights_377_transpose_x_0 = const()[name = string("attn_weights_377_transpose_x_0"), val = bool(false)]; bool attn_weights_377_transpose_y_0 = const()[name = string("attn_weights_377_transpose_y_0"), val = bool(false)]; tensor attn_weights_377_cast_fp16 = matmul(transpose_x = attn_weights_377_transpose_x_0, transpose_y = attn_weights_377_transpose_y_0, x = var_6701_cast_fp16_1, y = var_6714_cast_fp16_1)[name = string("attn_weights_377_cast_fp16")]; fp16 _inversed_attn_weights_379_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_379_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_379_cast_fp16 = mul(x = attn_weights_377_cast_fp16, y = _inversed_attn_weights_379_y_0_to_fp16)[name = string("_inversed_attn_weights_379_cast_fp16")]; tensor attn_weights_381_cast_fp16 = add(x = _inversed_attn_weights_379_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_381_cast_fp16")]; int32 var_6733 = const()[name = string("op_6733"), val = int32(2)]; tensor attn_weights_cast_fp16 = softmax(axis = var_6733, x = attn_weights_381_cast_fp16)[name = string("attn_weights_cast_fp16")]; bool attn_output_139_transpose_x_1 = const()[name = string("attn_output_139_transpose_x_1"), val = bool(true)]; bool attn_output_139_transpose_y_1 = const()[name = string("attn_output_139_transpose_y_1"), val = bool(false)]; tensor attn_output_139_cast_fp16 = matmul(transpose_x = attn_output_139_transpose_x_1, transpose_y = attn_output_139_transpose_y_1, x = attn_weights_cast_fp16, y = var_6711_cast_fp16_1)[name = string("attn_output_139_cast_fp16")]; int32 var_6741 = const()[name = string("op_6741"), val = int32(1)]; bool attn_output_141_interleave_0 = const()[name = string("attn_output_141_interleave_0"), val = bool(false)]; tensor attn_output_141_cast_fp16 = concat(axis = var_6741, interleave = attn_output_141_interleave_0, values = (var_6727_cast_fp16, attn_output_139_cast_fp16))[name = string("attn_output_141_cast_fp16")]; tensor var_6745_perm_0 = const()[name = string("op_6745_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_6750 = const()[name = string("op_6750"), val = tensor([1, 1024, 1, 128])]; tensor var_6745_cast_fp16 = transpose(perm = var_6745_perm_0, x = attn_output_141_cast_fp16)[name = string("transpose_0")]; tensor x_419_cast_fp16 = reshape(shape = var_6750, x = var_6745_cast_fp16)[name = string("x_419_cast_fp16")]; string hidden_states_141_pad_type_0 = const()[name = string("hidden_states_141_pad_type_0"), val = string("valid")]; tensor hidden_states_141_strides_0 = const()[name = string("hidden_states_141_strides_0"), val = tensor([1, 1])]; tensor hidden_states_141_pad_0 = const()[name = string("hidden_states_141_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_141_dilations_0 = const()[name = string("hidden_states_141_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_141_groups_0 = const()[name = string("hidden_states_141_groups_0"), val = int32(1)]; tensor var_6757_to_fp16 = const()[name = string("op_6757_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(698570240)))]; tensor hidden_states_141_cast_fp16 = conv(dilations = hidden_states_141_dilations_0, groups = hidden_states_141_groups_0, pad = hidden_states_141_pad_0, pad_type = hidden_states_141_pad_type_0, strides = hidden_states_141_strides_0, weight = var_6757_to_fp16, x = x_419_cast_fp16)[name = string("hidden_states_141_cast_fp16")]; tensor x_421_cast_fp16 = add(x = x_413_cast_fp16, y = hidden_states_141_cast_fp16)[name = string("x_421_cast_fp16")]; int32 var_6769 = const()[name = string("op_6769"), val = int32(1)]; fp16 const_243_promoted_to_fp16 = const()[name = string("const_243_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_6772_cast_fp16 = mul(x = x_421_cast_fp16, y = const_243_promoted_to_fp16)[name = string("op_6772_cast_fp16")]; bool x_423_interleave_0 = const()[name = string("x_423_interleave_0"), val = bool(false)]; tensor x_423_cast_fp16 = concat(axis = var_6769, interleave = x_423_interleave_0, values = (x_421_cast_fp16, var_6772_cast_fp16))[name = string("x_423_cast_fp16")]; tensor out_283_axes_0 = const()[name = string("out_283_axes_0"), val = tensor([1])]; fp16 var_6782_to_fp16 = const()[name = string("op_6782_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_283_cast_fp16 = layer_norm(axes = out_283_axes_0, epsilon = var_6782_to_fp16, x = x_423_cast_fp16)[name = string("out_283_cast_fp16")]; tensor layer_layers_23_post_attention_layernorm_weight_to_fp16 = const()[name = string("layer_layers_23_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(700667456)))]; tensor out_285_cast_fp16 = mul(x = out_283_cast_fp16, y = layer_layers_23_post_attention_layernorm_weight_to_fp16)[name = string("out_285_cast_fp16")]; tensor var_6788_split_sizes_0 = const()[name = string("op_6788_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_6788_axis_0 = const()[name = string("op_6788_axis_0"), val = int32(1)]; tensor var_6788_cast_fp16_0, tensor var_6788_cast_fp16_1 = split(axis = var_6788_axis_0, split_sizes = var_6788_split_sizes_0, x = out_285_cast_fp16)[name = string("op_6788_cast_fp16")]; string input_pad_type_0 = const()[name = string("input_pad_type_0"), val = string("valid")]; tensor input_strides_0 = const()[name = string("input_strides_0"), val = tensor([1, 1])]; tensor input_pad_0 = const()[name = string("input_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_dilations_0 = const()[name = string("input_dilations_0"), val = tensor([1, 1])]; int32 input_groups_0 = const()[name = string("input_groups_0"), val = int32(1)]; tensor var_6793_to_fp16 = const()[name = string("op_6793_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(700671616)))]; tensor input_cast_fp16 = conv(dilations = input_dilations_0, groups = input_groups_0, pad = input_pad_0, pad_type = input_pad_type_0, strides = input_strides_0, weight = var_6793_to_fp16, x = var_6788_cast_fp16_0)[name = string("input_cast_fp16")]; tensor var_6804_cast_fp16 = silu(x = input_cast_fp16)[name = string("op_6804_cast_fp16")]; string var_6809_pad_type_0 = const()[name = string("op_6809_pad_type_0"), val = string("valid")]; tensor var_6809_strides_0 = const()[name = string("op_6809_strides_0"), val = tensor([1, 1])]; tensor var_6809_pad_0 = const()[name = string("op_6809_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_6809_dilations_0 = const()[name = string("op_6809_dilations_0"), val = tensor([1, 1])]; int32 var_6809_groups_0 = const()[name = string("op_6809_groups_0"), val = int32(1)]; tensor var_6792_to_fp16 = const()[name = string("op_6792_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(709060288)))]; tensor var_6809_cast_fp16 = conv(dilations = var_6809_dilations_0, groups = var_6809_groups_0, pad = var_6809_pad_0, pad_type = var_6809_pad_type_0, strides = var_6809_strides_0, weight = var_6792_to_fp16, x = var_6788_cast_fp16_0)[name = string("op_6809_cast_fp16")]; tensor x_429_cast_fp16 = mul(x = var_6804_cast_fp16, y = var_6809_cast_fp16)[name = string("x_429_cast_fp16")]; string hidden_states_pad_type_0 = const()[name = string("hidden_states_pad_type_0"), val = string("valid")]; tensor hidden_states_strides_0 = const()[name = string("hidden_states_strides_0"), val = tensor([1, 1])]; tensor hidden_states_pad_0 = const()[name = string("hidden_states_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_dilations_0 = const()[name = string("hidden_states_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_groups_0 = const()[name = string("hidden_states_groups_0"), val = int32(1)]; tensor var_6791_to_fp16 = const()[name = string("op_6791_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(717448960)))]; tensor hidden_states_cast_fp16 = conv(dilations = hidden_states_dilations_0, groups = hidden_states_groups_0, pad = hidden_states_pad_0, pad_type = hidden_states_pad_type_0, strides = hidden_states_strides_0, weight = var_6791_to_fp16, x = x_429_cast_fp16)[name = string("hidden_states_cast_fp16")]; tensor x_431_cast_fp16 = add(x = x_421_cast_fp16, y = hidden_states_cast_fp16)[name = string("x_431_cast_fp16")]; int32 var_6822 = const()[name = string("op_6822"), val = int32(1)]; fp16 const_244_promoted_to_fp16 = const()[name = string("const_244_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_6825_cast_fp16 = mul(x = x_431_cast_fp16, y = const_244_promoted_to_fp16)[name = string("op_6825_cast_fp16")]; bool x_433_interleave_0 = const()[name = string("x_433_interleave_0"), val = bool(false)]; tensor x_433_cast_fp16 = concat(axis = var_6822, interleave = x_433_interleave_0, values = (x_431_cast_fp16, var_6825_cast_fp16))[name = string("x_433_cast_fp16")]; tensor out_289_axes_0 = const()[name = string("out_289_axes_0"), val = tensor([1])]; fp16 var_6835_to_fp16 = const()[name = string("op_6835_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_289_cast_fp16 = layer_norm(axes = out_289_axes_0, epsilon = var_6835_to_fp16, x = x_433_cast_fp16)[name = string("out_289_cast_fp16")]; tensor layer_norm_weight_to_fp16 = const()[name = string("layer_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(725837632)))]; tensor out_291_cast_fp16 = mul(x = out_289_cast_fp16, y = layer_norm_weight_to_fp16)[name = string("out_291_cast_fp16")]; tensor var_6841_split_sizes_0 = const()[name = string("op_6841_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_6841_axis_0 = const()[name = string("op_6841_axis_0"), val = int32(1)]; tensor output, tensor var_6841_cast_fp16_1 = split(axis = var_6841_axis_0, split_sizes = var_6841_split_sizes_0, x = out_291_cast_fp16)[name = string("op_6841_cast_fp16")]; } -> (output); func length_16(tensor inputs_embeds, state> key_cache, tensor position_id, state> value_cache) { tensor var_260 = const()[name = string("op_260"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(726399552)))]; tensor position_ids_1 = add(x = var_260, y = position_id)[name = string("position_ids_1")]; int32 cos_batch_dims_0 = const()[name = string("cos_batch_dims_0"), val = int32(0)]; bool cos_validate_indices_0 = const()[name = string("cos_validate_indices_0"), val = bool(false)]; tensor layer_rope_emb_cos_cached_to_fp16 = const()[name = string("layer_rope_emb_cos_cached_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64)))]; int32 greater_equal_0_y_0 = const()[name = string("greater_equal_0_y_0"), val = int32(0)]; tensor greater_equal_0 = greater_equal(x = position_ids_1, y = greater_equal_0_y_0)[name = string("greater_equal_0")]; int32 slice_by_index_0 = const()[name = string("slice_by_index_0"), val = int32(32768)]; tensor add_0 = add(x = position_ids_1, y = slice_by_index_0)[name = string("add_0")]; tensor select_0 = select(a = position_ids_1, b = add_0, cond = greater_equal_0)[name = string("select_0")]; int32 cos_cast_fp16_axis_0 = const()[name = string("cos_cast_fp16_axis_0"), val = int32(0)]; tensor cos_cast_fp16 = gather(axis = cos_cast_fp16_axis_0, batch_dims = cos_batch_dims_0, indices = select_0, validate_indices = cos_validate_indices_0, x = layer_rope_emb_cos_cached_to_fp16)[name = string("cos_cast_fp16")]; int32 sin_batch_dims_0 = const()[name = string("sin_batch_dims_0"), val = int32(0)]; bool sin_validate_indices_0 = const()[name = string("sin_validate_indices_0"), val = bool(false)]; tensor layer_rope_emb_sin_cached_to_fp16 = const()[name = string("layer_rope_emb_sin_cached_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(4194432)))]; int32 sin_cast_fp16_axis_0 = const()[name = string("sin_cast_fp16_axis_0"), val = int32(0)]; tensor sin_cast_fp16 = gather(axis = sin_cast_fp16_axis_0, batch_dims = sin_batch_dims_0, indices = select_0, validate_indices = sin_validate_indices_0, x = layer_rope_emb_sin_cached_to_fp16)[name = string("sin_cast_fp16")]; tensor var_285 = const()[name = string("op_285"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(8388800)))]; tensor var_292_axes_0 = const()[name = string("op_292_axes_0"), val = tensor([2])]; tensor var_292 = expand_dims(axes = var_292_axes_0, x = position_ids_1)[name = string("op_292")]; tensor var_293 = greater(x = var_285, y = var_292)[name = string("op_293")]; tensor attention_mask_3_axes_0 = const()[name = string("attention_mask_3_axes_0"), val = tensor([1])]; string attention_mask_1_to_fp16_dtype_0 = const()[name = string("attention_mask_1_to_fp16_dtype_0"), val = string("fp16")]; tensor var_293_to_fp16 = cast(dtype = attention_mask_1_to_fp16_dtype_0, x = var_293)[name = string("cast_245")]; tensor attention_mask_3_cast_fp16 = expand_dims(axes = attention_mask_3_axes_0, x = var_293_to_fp16)[name = string("attention_mask_3_cast_fp16")]; fp16 var_301_promoted_to_fp16 = const()[name = string("op_301_promoted_to_fp16"), val = fp16(0x0p+0)]; tensor var_302_cast_fp16 = equal(x = attention_mask_3_cast_fp16, y = var_301_promoted_to_fp16)[name = string("op_302_cast_fp16")]; tensor var_303_after_broadcast_to_fp16 = const()[name = string("op_303_after_broadcast_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(726399680)))]; tensor attention_mask_cast_fp16 = select(a = attention_mask_3_cast_fp16, b = var_303_after_broadcast_to_fp16, cond = var_302_cast_fp16)[name = string("attention_mask_cast_fp16")]; tensor cos_1_perm_0 = const()[name = string("cos_1_perm_0"), val = tensor([0, -1, -2])]; tensor sin_1_perm_0 = const()[name = string("sin_1_perm_0"), val = tensor([0, -1, -2])]; tensor attn_mask_perm_0 = const()[name = string("attn_mask_perm_0"), val = tensor([0, 1, -1, -2])]; int32 var_318 = const()[name = string("op_318"), val = int32(1)]; fp16 const_4_promoted_to_fp16 = const()[name = string("const_4_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_321_cast_fp16 = mul(x = inputs_embeds, y = const_4_promoted_to_fp16)[name = string("op_321_cast_fp16")]; bool x_1_interleave_0 = const()[name = string("x_1_interleave_0"), val = bool(false)]; tensor x_1_cast_fp16 = concat(axis = var_318, interleave = x_1_interleave_0, values = (inputs_embeds, var_321_cast_fp16))[name = string("x_1_cast_fp16")]; tensor out_1_axes_0 = const()[name = string("out_1_axes_0"), val = tensor([1])]; fp16 var_331_to_fp16 = const()[name = string("op_331_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_1_cast_fp16 = layer_norm(axes = out_1_axes_0, epsilon = var_331_to_fp16, x = x_1_cast_fp16)[name = string("out_1_cast_fp16")]; tensor layer_layers_0_input_layernorm_weight_to_fp16 = const()[name = string("layer_layers_0_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(8401216)))]; tensor out_3_cast_fp16 = mul(x = out_1_cast_fp16, y = layer_layers_0_input_layernorm_weight_to_fp16)[name = string("out_3_cast_fp16")]; tensor var_337_split_sizes_0 = const()[name = string("op_337_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_337_axis_0 = const()[name = string("op_337_axis_0"), val = int32(1)]; tensor var_337_cast_fp16_0, tensor var_337_cast_fp16_1 = split(axis = var_337_axis_0, split_sizes = var_337_split_sizes_0, x = out_3_cast_fp16)[name = string("op_337_cast_fp16")]; tensor q_len_1 = const()[name = string("q_len_1"), val = tensor([16])]; string query_states_1_pad_type_0 = const()[name = string("query_states_1_pad_type_0"), val = string("valid")]; tensor query_states_1_strides_0 = const()[name = string("query_states_1_strides_0"), val = tensor([1, 1])]; tensor query_states_1_pad_0 = const()[name = string("query_states_1_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_states_1_dilations_0 = const()[name = string("query_states_1_dilations_0"), val = tensor([1, 1])]; int32 query_states_1_groups_0 = const()[name = string("query_states_1_groups_0"), val = int32(1)]; tensor var_359_to_fp16 = const()[name = string("op_359_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(8405376)))]; tensor query_states_1_cast_fp16 = conv(dilations = query_states_1_dilations_0, groups = query_states_1_groups_0, pad = query_states_1_pad_0, pad_type = query_states_1_pad_type_0, strides = query_states_1_strides_0, weight = var_359_to_fp16, x = var_337_cast_fp16_0)[name = string("query_states_1_cast_fp16")]; string key_states_1_pad_type_0 = const()[name = string("key_states_1_pad_type_0"), val = string("valid")]; tensor key_states_1_strides_0 = const()[name = string("key_states_1_strides_0"), val = tensor([1, 1])]; tensor key_states_1_pad_0 = const()[name = string("key_states_1_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_states_1_dilations_0 = const()[name = string("key_states_1_dilations_0"), val = tensor([1, 1])]; int32 key_states_1_groups_0 = const()[name = string("key_states_1_groups_0"), val = int32(1)]; tensor var_370_to_fp16 = const()[name = string("op_370_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(10502592)))]; tensor key_states_1_cast_fp16 = conv(dilations = key_states_1_dilations_0, groups = key_states_1_groups_0, pad = key_states_1_pad_0, pad_type = key_states_1_pad_type_0, strides = key_states_1_strides_0, weight = var_370_to_fp16, x = var_337_cast_fp16_0)[name = string("key_states_1_cast_fp16")]; string value_states_1_pad_type_0 = const()[name = string("value_states_1_pad_type_0"), val = string("valid")]; tensor value_states_1_strides_0 = const()[name = string("value_states_1_strides_0"), val = tensor([1, 1])]; tensor value_states_1_pad_0 = const()[name = string("value_states_1_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_states_1_dilations_0 = const()[name = string("value_states_1_dilations_0"), val = tensor([1, 1])]; int32 value_states_1_groups_0 = const()[name = string("value_states_1_groups_0"), val = int32(1)]; tensor var_381_to_fp16 = const()[name = string("op_381_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(10764800)))]; tensor value_states_1_cast_fp16 = conv(dilations = value_states_1_dilations_0, groups = value_states_1_groups_0, pad = value_states_1_pad_0, pad_type = value_states_1_pad_type_0, strides = value_states_1_strides_0, weight = var_381_to_fp16, x = var_337_cast_fp16_0)[name = string("value_states_1_cast_fp16")]; tensor var_389 = const()[name = string("op_389"), val = tensor([1, 16, 64, 16])]; tensor embed_1_cast_fp16 = reshape(shape = var_389, x = query_states_1_cast_fp16)[name = string("embed_1_cast_fp16")]; tensor var_393 = const()[name = string("op_393"), val = tensor([1, 2, 64, 16])]; tensor var_394_cast_fp16 = reshape(shape = var_393, x = key_states_1_cast_fp16)[name = string("op_394_cast_fp16")]; tensor embed_3_perm_0 = const()[name = string("embed_3_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_400 = const()[name = string("op_400"), val = tensor([1, 2, 64, 16])]; tensor var_401_cast_fp16 = reshape(shape = var_400, x = value_states_1_cast_fp16)[name = string("op_401_cast_fp16")]; tensor value_states_3_perm_0 = const()[name = string("value_states_3_perm_0"), val = tensor([0, 1, 3, 2])]; tensor cos_1_cast_fp16 = transpose(perm = cos_1_perm_0, x = cos_cast_fp16)[name = string("transpose_74")]; tensor var_405_cast_fp16 = mul(x = embed_1_cast_fp16, y = cos_1_cast_fp16)[name = string("op_405_cast_fp16")]; tensor var_406_split_sizes_0 = const()[name = string("op_406_split_sizes_0"), val = tensor([32, 32])]; int32 var_406_axis_0 = const()[name = string("op_406_axis_0"), val = int32(-2)]; tensor var_406_cast_fp16_0, tensor var_406_cast_fp16_1 = split(axis = var_406_axis_0, split_sizes = var_406_split_sizes_0, x = embed_1_cast_fp16)[name = string("op_406_cast_fp16")]; fp16 const_7_promoted_to_fp16 = const()[name = string("const_7_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_408_cast_fp16 = mul(x = var_406_cast_fp16_1, y = const_7_promoted_to_fp16)[name = string("op_408_cast_fp16")]; int32 var_410 = const()[name = string("op_410"), val = int32(-2)]; bool var_411_interleave_0 = const()[name = string("op_411_interleave_0"), val = bool(false)]; tensor var_411_cast_fp16 = concat(axis = var_410, interleave = var_411_interleave_0, values = (var_408_cast_fp16, var_406_cast_fp16_0))[name = string("op_411_cast_fp16")]; tensor sin_1_cast_fp16 = transpose(perm = sin_1_perm_0, x = sin_cast_fp16)[name = string("transpose_73")]; tensor var_412_cast_fp16 = mul(x = var_411_cast_fp16, y = sin_1_cast_fp16)[name = string("op_412_cast_fp16")]; tensor query_states_3_cast_fp16 = add(x = var_405_cast_fp16, y = var_412_cast_fp16)[name = string("query_states_3_cast_fp16")]; tensor embed_3_cast_fp16 = transpose(perm = embed_3_perm_0, x = var_394_cast_fp16)[name = string("transpose_71")]; tensor var_415_cast_fp16 = mul(x = embed_3_cast_fp16, y = cos_cast_fp16)[name = string("op_415_cast_fp16")]; tensor var_416_split_sizes_0 = const()[name = string("op_416_split_sizes_0"), val = tensor([32, 32])]; int32 var_416_axis_0 = const()[name = string("op_416_axis_0"), val = int32(-1)]; tensor var_416_cast_fp16_0, tensor var_416_cast_fp16_1 = split(axis = var_416_axis_0, split_sizes = var_416_split_sizes_0, x = embed_3_cast_fp16)[name = string("op_416_cast_fp16")]; fp16 const_8_promoted_to_fp16 = const()[name = string("const_8_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_418_cast_fp16 = mul(x = var_416_cast_fp16_1, y = const_8_promoted_to_fp16)[name = string("op_418_cast_fp16")]; int32 var_420 = const()[name = string("op_420"), val = int32(-1)]; bool var_421_interleave_0 = const()[name = string("op_421_interleave_0"), val = bool(false)]; tensor var_421_cast_fp16 = concat(axis = var_420, interleave = var_421_interleave_0, values = (var_418_cast_fp16, var_416_cast_fp16_0))[name = string("op_421_cast_fp16")]; tensor var_422_cast_fp16 = mul(x = var_421_cast_fp16, y = sin_cast_fp16)[name = string("op_422_cast_fp16")]; tensor key_states_3_cast_fp16 = add(x = var_415_cast_fp16, y = var_422_cast_fp16)[name = string("key_states_3_cast_fp16")]; tensor var_426 = add(x = position_id, y = q_len_1)[name = string("op_426")]; tensor read_state_0 = read_state(input = key_cache)[name = string("read_state_0")]; tensor expand_dims_1 = const()[name = string("expand_dims_1"), val = tensor([0])]; tensor expand_dims_2 = const()[name = string("expand_dims_2"), val = tensor([0])]; tensor expand_dims_4 = const()[name = string("expand_dims_4"), val = tensor([1])]; tensor concat_3_values3_0 = const()[name = string("concat_3_values3_0"), val = tensor([0])]; int32 concat_3_axis_0 = const()[name = string("concat_3_axis_0"), val = int32(0)]; bool concat_3_interleave_0 = const()[name = string("concat_3_interleave_0"), val = bool(false)]; tensor concat_3 = concat(axis = concat_3_axis_0, interleave = concat_3_interleave_0, values = (expand_dims_1, expand_dims_2, position_id, concat_3_values3_0))[name = string("concat_3")]; tensor concat_4_values1_0 = const()[name = string("concat_4_values1_0"), val = tensor([0])]; tensor concat_4_values3_0 = const()[name = string("concat_4_values3_0"), val = tensor([0])]; int32 concat_4_axis_0 = const()[name = string("concat_4_axis_0"), val = int32(0)]; bool concat_4_interleave_0 = const()[name = string("concat_4_interleave_0"), val = bool(false)]; tensor concat_4 = concat(axis = concat_4_axis_0, interleave = concat_4_interleave_0, values = (expand_dims_4, concat_4_values1_0, var_426, concat_4_values3_0))[name = string("concat_4")]; tensor key_cache_internal_tensor_assign_1_stride_0 = const()[name = string("key_cache_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1, 1])]; tensor key_cache_internal_tensor_assign_1_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false, true])]; tensor key_cache_internal_tensor_assign_1_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_1_end_mask_0"), val = tensor([false, true, false, true])]; tensor key_cache_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor key_cache_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_3, begin_mask = key_cache_internal_tensor_assign_1_begin_mask_0, end = concat_4, end_mask = key_cache_internal_tensor_assign_1_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_1_squeeze_mask_0, stride = key_cache_internal_tensor_assign_1_stride_0, update = key_states_3_cast_fp16, x = read_state_0)[name = string("key_cache_internal_tensor_assign_1_cast_fp16")]; write_state(data = key_cache_internal_tensor_assign_1_cast_fp16, input = key_cache)[name = string("coreml_update_state_262_write_state")]; tensor coreml_update_state_48 = read_state(input = key_cache)[name = string("coreml_update_state_262")]; tensor read_state_1 = read_state(input = value_cache)[name = string("read_state_1")]; tensor value_cache_internal_tensor_assign_1_stride_0 = const()[name = string("value_cache_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1, 1])]; tensor value_cache_internal_tensor_assign_1_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false, true])]; tensor value_cache_internal_tensor_assign_1_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_1_end_mask_0"), val = tensor([false, true, false, true])]; tensor value_cache_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor value_states_3_cast_fp16 = transpose(perm = value_states_3_perm_0, x = var_401_cast_fp16)[name = string("transpose_70")]; tensor value_cache_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_3, begin_mask = value_cache_internal_tensor_assign_1_begin_mask_0, end = concat_4, end_mask = value_cache_internal_tensor_assign_1_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_1_squeeze_mask_0, stride = value_cache_internal_tensor_assign_1_stride_0, update = value_states_3_cast_fp16, x = read_state_1)[name = string("value_cache_internal_tensor_assign_1_cast_fp16")]; write_state(data = value_cache_internal_tensor_assign_1_cast_fp16, input = value_cache)[name = string("coreml_update_state_263_write_state")]; tensor coreml_update_state_49 = read_state(input = value_cache)[name = string("coreml_update_state_263")]; tensor var_465_begin_0 = const()[name = string("op_465_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_465_end_0 = const()[name = string("op_465_end_0"), val = tensor([1, 2, 2048, 64])]; tensor var_465_end_mask_0 = const()[name = string("op_465_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_465_cast_fp16 = slice_by_index(begin = var_465_begin_0, end = var_465_end_0, end_mask = var_465_end_mask_0, x = coreml_update_state_48)[name = string("op_465_cast_fp16")]; tensor tile_0 = const()[name = string("tile_0"), val = tensor([1, 1])]; int32 var_468_axis_0 = const()[name = string("op_468_axis_0"), val = int32(1)]; tensor var_468_cast_fp16_0, tensor var_468_cast_fp16_1 = split(axis = var_468_axis_0, split_sizes = tile_0, x = var_465_cast_fp16)[name = string("op_468_cast_fp16")]; tensor var_475_begin_0 = const()[name = string("op_475_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_475_end_0 = const()[name = string("op_475_end_0"), val = tensor([1, 2, 2048, 64])]; tensor var_475_end_mask_0 = const()[name = string("op_475_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_475_cast_fp16 = slice_by_index(begin = var_475_begin_0, end = var_475_end_0, end_mask = var_475_end_mask_0, x = coreml_update_state_49)[name = string("op_475_cast_fp16")]; tensor tile_1 = const()[name = string("tile_1"), val = tensor([1, 1])]; int32 var_478_axis_0 = const()[name = string("op_478_axis_0"), val = int32(1)]; tensor var_478_cast_fp16_0, tensor var_478_cast_fp16_1 = split(axis = var_478_axis_0, split_sizes = tile_1, x = var_475_cast_fp16)[name = string("op_478_cast_fp16")]; tensor var_481_split_sizes_0 = const()[name = string("op_481_split_sizes_0"), val = tensor([8, 8])]; int32 var_481_axis_0 = const()[name = string("op_481_axis_0"), val = int32(1)]; tensor var_481_cast_fp16_0, tensor var_481_cast_fp16_1 = split(axis = var_481_axis_0, split_sizes = var_481_split_sizes_0, x = query_states_3_cast_fp16)[name = string("op_481_cast_fp16")]; bool attn_weights_1_transpose_x_0 = const()[name = string("attn_weights_1_transpose_x_0"), val = bool(false)]; bool attn_weights_1_transpose_y_0 = const()[name = string("attn_weights_1_transpose_y_0"), val = bool(false)]; tensor attn_weights_1_cast_fp16 = matmul(transpose_x = attn_weights_1_transpose_x_0, transpose_y = attn_weights_1_transpose_y_0, x = var_468_cast_fp16_0, y = var_481_cast_fp16_0)[name = string("attn_weights_1_cast_fp16")]; fp16 _inversed_attn_weights_3_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_3_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_3_cast_fp16 = mul(x = attn_weights_1_cast_fp16, y = _inversed_attn_weights_3_y_0_to_fp16)[name = string("_inversed_attn_weights_3_cast_fp16")]; tensor attn_mask_cast_fp16 = transpose(perm = attn_mask_perm_0, x = attention_mask_cast_fp16)[name = string("transpose_72")]; tensor attn_weights_5_cast_fp16 = add(x = _inversed_attn_weights_3_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_5_cast_fp16")]; int32 var_488 = const()[name = string("op_488"), val = int32(2)]; tensor attn_weights_7_cast_fp16 = softmax(axis = var_488, x = attn_weights_5_cast_fp16)[name = string("attn_weights_7_cast_fp16")]; bool var_494_transpose_x_1 = const()[name = string("op_494_transpose_x_1"), val = bool(true)]; bool var_494_transpose_y_1 = const()[name = string("op_494_transpose_y_1"), val = bool(false)]; tensor var_494_cast_fp16 = matmul(transpose_x = var_494_transpose_x_1, transpose_y = var_494_transpose_y_1, x = attn_weights_7_cast_fp16, y = var_478_cast_fp16_0)[name = string("op_494_cast_fp16")]; bool attn_weights_9_transpose_x_0 = const()[name = string("attn_weights_9_transpose_x_0"), val = bool(false)]; bool attn_weights_9_transpose_y_0 = const()[name = string("attn_weights_9_transpose_y_0"), val = bool(false)]; tensor attn_weights_9_cast_fp16 = matmul(transpose_x = attn_weights_9_transpose_x_0, transpose_y = attn_weights_9_transpose_y_0, x = var_468_cast_fp16_1, y = var_481_cast_fp16_1)[name = string("attn_weights_9_cast_fp16")]; fp16 _inversed_attn_weights_11_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_11_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_11_cast_fp16 = mul(x = attn_weights_9_cast_fp16, y = _inversed_attn_weights_11_y_0_to_fp16)[name = string("_inversed_attn_weights_11_cast_fp16")]; tensor attn_weights_13_cast_fp16 = add(x = _inversed_attn_weights_11_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_13_cast_fp16")]; int32 var_500 = const()[name = string("op_500"), val = int32(2)]; tensor attn_weights_15_cast_fp16 = softmax(axis = var_500, x = attn_weights_13_cast_fp16)[name = string("attn_weights_15_cast_fp16")]; bool attn_output_1_transpose_x_1 = const()[name = string("attn_output_1_transpose_x_1"), val = bool(true)]; bool attn_output_1_transpose_y_1 = const()[name = string("attn_output_1_transpose_y_1"), val = bool(false)]; tensor attn_output_1_cast_fp16 = matmul(transpose_x = attn_output_1_transpose_x_1, transpose_y = attn_output_1_transpose_y_1, x = attn_weights_15_cast_fp16, y = var_478_cast_fp16_1)[name = string("attn_output_1_cast_fp16")]; int32 var_508 = const()[name = string("op_508"), val = int32(1)]; bool attn_output_3_interleave_0 = const()[name = string("attn_output_3_interleave_0"), val = bool(false)]; tensor attn_output_3_cast_fp16 = concat(axis = var_508, interleave = attn_output_3_interleave_0, values = (var_494_cast_fp16, attn_output_1_cast_fp16))[name = string("attn_output_3_cast_fp16")]; tensor var_512_perm_0 = const()[name = string("op_512_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_517 = const()[name = string("op_517"), val = tensor([1, 1024, 1, 16])]; tensor var_512_cast_fp16 = transpose(perm = var_512_perm_0, x = attn_output_3_cast_fp16)[name = string("transpose_69")]; tensor x_5_cast_fp16 = reshape(shape = var_517, x = var_512_cast_fp16)[name = string("x_5_cast_fp16")]; string hidden_states_3_pad_type_0 = const()[name = string("hidden_states_3_pad_type_0"), val = string("valid")]; tensor hidden_states_3_strides_0 = const()[name = string("hidden_states_3_strides_0"), val = tensor([1, 1])]; tensor hidden_states_3_pad_0 = const()[name = string("hidden_states_3_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_3_dilations_0 = const()[name = string("hidden_states_3_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_3_groups_0 = const()[name = string("hidden_states_3_groups_0"), val = int32(1)]; tensor var_524_to_fp16 = const()[name = string("op_524_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(11027008)))]; tensor hidden_states_3_cast_fp16 = conv(dilations = hidden_states_3_dilations_0, groups = hidden_states_3_groups_0, pad = hidden_states_3_pad_0, pad_type = hidden_states_3_pad_type_0, strides = hidden_states_3_strides_0, weight = var_524_to_fp16, x = x_5_cast_fp16)[name = string("hidden_states_3_cast_fp16")]; tensor x_7_cast_fp16 = add(x = inputs_embeds, y = hidden_states_3_cast_fp16)[name = string("x_7_cast_fp16")]; int32 var_536 = const()[name = string("op_536"), val = int32(1)]; fp16 const_13_promoted_to_fp16 = const()[name = string("const_13_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_539_cast_fp16 = mul(x = x_7_cast_fp16, y = const_13_promoted_to_fp16)[name = string("op_539_cast_fp16")]; bool x_9_interleave_0 = const()[name = string("x_9_interleave_0"), val = bool(false)]; tensor x_9_cast_fp16 = concat(axis = var_536, interleave = x_9_interleave_0, values = (x_7_cast_fp16, var_539_cast_fp16))[name = string("x_9_cast_fp16")]; tensor out_7_axes_0 = const()[name = string("out_7_axes_0"), val = tensor([1])]; fp16 var_549_to_fp16 = const()[name = string("op_549_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_7_cast_fp16 = layer_norm(axes = out_7_axes_0, epsilon = var_549_to_fp16, x = x_9_cast_fp16)[name = string("out_7_cast_fp16")]; tensor layer_layers_0_post_attention_layernorm_weight_to_fp16 = const()[name = string("layer_layers_0_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(13124224)))]; tensor out_9_cast_fp16 = mul(x = out_7_cast_fp16, y = layer_layers_0_post_attention_layernorm_weight_to_fp16)[name = string("out_9_cast_fp16")]; tensor var_555_split_sizes_0 = const()[name = string("op_555_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_555_axis_0 = const()[name = string("op_555_axis_0"), val = int32(1)]; tensor var_555_cast_fp16_0, tensor var_555_cast_fp16_1 = split(axis = var_555_axis_0, split_sizes = var_555_split_sizes_0, x = out_9_cast_fp16)[name = string("op_555_cast_fp16")]; string input_1_pad_type_0 = const()[name = string("input_1_pad_type_0"), val = string("valid")]; tensor input_1_strides_0 = const()[name = string("input_1_strides_0"), val = tensor([1, 1])]; tensor input_1_pad_0 = const()[name = string("input_1_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_1_dilations_0 = const()[name = string("input_1_dilations_0"), val = tensor([1, 1])]; int32 input_1_groups_0 = const()[name = string("input_1_groups_0"), val = int32(1)]; tensor var_560_to_fp16 = const()[name = string("op_560_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(13128384)))]; tensor input_1_cast_fp16 = conv(dilations = input_1_dilations_0, groups = input_1_groups_0, pad = input_1_pad_0, pad_type = input_1_pad_type_0, strides = input_1_strides_0, weight = var_560_to_fp16, x = var_555_cast_fp16_0)[name = string("input_1_cast_fp16")]; tensor var_571_cast_fp16 = silu(x = input_1_cast_fp16)[name = string("op_571_cast_fp16")]; string var_576_pad_type_0 = const()[name = string("op_576_pad_type_0"), val = string("valid")]; tensor var_576_strides_0 = const()[name = string("op_576_strides_0"), val = tensor([1, 1])]; tensor var_576_pad_0 = const()[name = string("op_576_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_576_dilations_0 = const()[name = string("op_576_dilations_0"), val = tensor([1, 1])]; int32 var_576_groups_0 = const()[name = string("op_576_groups_0"), val = int32(1)]; tensor var_559_to_fp16 = const()[name = string("op_559_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(21517056)))]; tensor var_576_cast_fp16 = conv(dilations = var_576_dilations_0, groups = var_576_groups_0, pad = var_576_pad_0, pad_type = var_576_pad_type_0, strides = var_576_strides_0, weight = var_559_to_fp16, x = var_555_cast_fp16_0)[name = string("op_576_cast_fp16")]; tensor x_15_cast_fp16 = mul(x = var_571_cast_fp16, y = var_576_cast_fp16)[name = string("x_15_cast_fp16")]; string hidden_states_5_pad_type_0 = const()[name = string("hidden_states_5_pad_type_0"), val = string("valid")]; tensor hidden_states_5_strides_0 = const()[name = string("hidden_states_5_strides_0"), val = tensor([1, 1])]; tensor hidden_states_5_pad_0 = const()[name = string("hidden_states_5_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_5_dilations_0 = const()[name = string("hidden_states_5_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_5_groups_0 = const()[name = string("hidden_states_5_groups_0"), val = int32(1)]; tensor var_558_to_fp16 = const()[name = string("op_558_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(29905728)))]; tensor hidden_states_5_cast_fp16 = conv(dilations = hidden_states_5_dilations_0, groups = hidden_states_5_groups_0, pad = hidden_states_5_pad_0, pad_type = hidden_states_5_pad_type_0, strides = hidden_states_5_strides_0, weight = var_558_to_fp16, x = x_15_cast_fp16)[name = string("hidden_states_5_cast_fp16")]; tensor x_17_cast_fp16 = add(x = x_7_cast_fp16, y = hidden_states_5_cast_fp16)[name = string("x_17_cast_fp16")]; int32 var_589 = const()[name = string("op_589"), val = int32(1)]; fp16 const_14_promoted_to_fp16 = const()[name = string("const_14_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_592_cast_fp16 = mul(x = x_17_cast_fp16, y = const_14_promoted_to_fp16)[name = string("op_592_cast_fp16")]; bool x_19_interleave_0 = const()[name = string("x_19_interleave_0"), val = bool(false)]; tensor x_19_cast_fp16 = concat(axis = var_589, interleave = x_19_interleave_0, values = (x_17_cast_fp16, var_592_cast_fp16))[name = string("x_19_cast_fp16")]; tensor out_13_axes_0 = const()[name = string("out_13_axes_0"), val = tensor([1])]; fp16 var_602_to_fp16 = const()[name = string("op_602_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_13_cast_fp16 = layer_norm(axes = out_13_axes_0, epsilon = var_602_to_fp16, x = x_19_cast_fp16)[name = string("out_13_cast_fp16")]; tensor layer_layers_1_input_layernorm_weight_to_fp16 = const()[name = string("layer_layers_1_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(38294400)))]; tensor out_15_cast_fp16 = mul(x = out_13_cast_fp16, y = layer_layers_1_input_layernorm_weight_to_fp16)[name = string("out_15_cast_fp16")]; tensor var_608_split_sizes_0 = const()[name = string("op_608_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_608_axis_0 = const()[name = string("op_608_axis_0"), val = int32(1)]; tensor var_608_cast_fp16_0, tensor var_608_cast_fp16_1 = split(axis = var_608_axis_0, split_sizes = var_608_split_sizes_0, x = out_15_cast_fp16)[name = string("op_608_cast_fp16")]; string query_states_5_pad_type_0 = const()[name = string("query_states_5_pad_type_0"), val = string("valid")]; tensor query_states_5_strides_0 = const()[name = string("query_states_5_strides_0"), val = tensor([1, 1])]; tensor query_states_5_pad_0 = const()[name = string("query_states_5_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_states_5_dilations_0 = const()[name = string("query_states_5_dilations_0"), val = tensor([1, 1])]; int32 query_states_5_groups_0 = const()[name = string("query_states_5_groups_0"), val = int32(1)]; tensor var_630_to_fp16 = const()[name = string("op_630_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(38298560)))]; tensor query_states_5_cast_fp16 = conv(dilations = query_states_5_dilations_0, groups = query_states_5_groups_0, pad = query_states_5_pad_0, pad_type = query_states_5_pad_type_0, strides = query_states_5_strides_0, weight = var_630_to_fp16, x = var_608_cast_fp16_0)[name = string("query_states_5_cast_fp16")]; string key_states_5_pad_type_0 = const()[name = string("key_states_5_pad_type_0"), val = string("valid")]; tensor key_states_5_strides_0 = const()[name = string("key_states_5_strides_0"), val = tensor([1, 1])]; tensor key_states_5_pad_0 = const()[name = string("key_states_5_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_states_5_dilations_0 = const()[name = string("key_states_5_dilations_0"), val = tensor([1, 1])]; int32 key_states_5_groups_0 = const()[name = string("key_states_5_groups_0"), val = int32(1)]; tensor var_641_to_fp16 = const()[name = string("op_641_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(40395776)))]; tensor key_states_5_cast_fp16 = conv(dilations = key_states_5_dilations_0, groups = key_states_5_groups_0, pad = key_states_5_pad_0, pad_type = key_states_5_pad_type_0, strides = key_states_5_strides_0, weight = var_641_to_fp16, x = var_608_cast_fp16_0)[name = string("key_states_5_cast_fp16")]; string value_states_5_pad_type_0 = const()[name = string("value_states_5_pad_type_0"), val = string("valid")]; tensor value_states_5_strides_0 = const()[name = string("value_states_5_strides_0"), val = tensor([1, 1])]; tensor value_states_5_pad_0 = const()[name = string("value_states_5_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_states_5_dilations_0 = const()[name = string("value_states_5_dilations_0"), val = tensor([1, 1])]; int32 value_states_5_groups_0 = const()[name = string("value_states_5_groups_0"), val = int32(1)]; tensor var_652_to_fp16 = const()[name = string("op_652_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(40657984)))]; tensor value_states_5_cast_fp16 = conv(dilations = value_states_5_dilations_0, groups = value_states_5_groups_0, pad = value_states_5_pad_0, pad_type = value_states_5_pad_type_0, strides = value_states_5_strides_0, weight = var_652_to_fp16, x = var_608_cast_fp16_0)[name = string("value_states_5_cast_fp16")]; tensor var_660 = const()[name = string("op_660"), val = tensor([1, 16, 64, 16])]; tensor embed_5_cast_fp16 = reshape(shape = var_660, x = query_states_5_cast_fp16)[name = string("embed_5_cast_fp16")]; tensor var_664 = const()[name = string("op_664"), val = tensor([1, 2, 64, 16])]; tensor var_665_cast_fp16 = reshape(shape = var_664, x = key_states_5_cast_fp16)[name = string("op_665_cast_fp16")]; tensor embed_7_perm_0 = const()[name = string("embed_7_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_671 = const()[name = string("op_671"), val = tensor([1, 2, 64, 16])]; tensor var_672_cast_fp16 = reshape(shape = var_671, x = value_states_5_cast_fp16)[name = string("op_672_cast_fp16")]; tensor value_states_7_perm_0 = const()[name = string("value_states_7_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_676_cast_fp16 = mul(x = embed_5_cast_fp16, y = cos_1_cast_fp16)[name = string("op_676_cast_fp16")]; tensor var_677_split_sizes_0 = const()[name = string("op_677_split_sizes_0"), val = tensor([32, 32])]; int32 var_677_axis_0 = const()[name = string("op_677_axis_0"), val = int32(-2)]; tensor var_677_cast_fp16_0, tensor var_677_cast_fp16_1 = split(axis = var_677_axis_0, split_sizes = var_677_split_sizes_0, x = embed_5_cast_fp16)[name = string("op_677_cast_fp16")]; fp16 const_17_promoted_to_fp16 = const()[name = string("const_17_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_679_cast_fp16 = mul(x = var_677_cast_fp16_1, y = const_17_promoted_to_fp16)[name = string("op_679_cast_fp16")]; int32 var_681 = const()[name = string("op_681"), val = int32(-2)]; bool var_682_interleave_0 = const()[name = string("op_682_interleave_0"), val = bool(false)]; tensor var_682_cast_fp16 = concat(axis = var_681, interleave = var_682_interleave_0, values = (var_679_cast_fp16, var_677_cast_fp16_0))[name = string("op_682_cast_fp16")]; tensor var_683_cast_fp16 = mul(x = var_682_cast_fp16, y = sin_1_cast_fp16)[name = string("op_683_cast_fp16")]; tensor query_states_7_cast_fp16 = add(x = var_676_cast_fp16, y = var_683_cast_fp16)[name = string("query_states_7_cast_fp16")]; tensor embed_7_cast_fp16 = transpose(perm = embed_7_perm_0, x = var_665_cast_fp16)[name = string("transpose_68")]; tensor var_686_cast_fp16 = mul(x = embed_7_cast_fp16, y = cos_cast_fp16)[name = string("op_686_cast_fp16")]; tensor var_687_split_sizes_0 = const()[name = string("op_687_split_sizes_0"), val = tensor([32, 32])]; int32 var_687_axis_0 = const()[name = string("op_687_axis_0"), val = int32(-1)]; tensor var_687_cast_fp16_0, tensor var_687_cast_fp16_1 = split(axis = var_687_axis_0, split_sizes = var_687_split_sizes_0, x = embed_7_cast_fp16)[name = string("op_687_cast_fp16")]; fp16 const_18_promoted_to_fp16 = const()[name = string("const_18_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_689_cast_fp16 = mul(x = var_687_cast_fp16_1, y = const_18_promoted_to_fp16)[name = string("op_689_cast_fp16")]; int32 var_691 = const()[name = string("op_691"), val = int32(-1)]; bool var_692_interleave_0 = const()[name = string("op_692_interleave_0"), val = bool(false)]; tensor var_692_cast_fp16 = concat(axis = var_691, interleave = var_692_interleave_0, values = (var_689_cast_fp16, var_687_cast_fp16_0))[name = string("op_692_cast_fp16")]; tensor var_693_cast_fp16 = mul(x = var_692_cast_fp16, y = sin_cast_fp16)[name = string("op_693_cast_fp16")]; tensor key_states_7_cast_fp16 = add(x = var_686_cast_fp16, y = var_693_cast_fp16)[name = string("key_states_7_cast_fp16")]; tensor expand_dims_11 = const()[name = string("expand_dims_11"), val = tensor([1])]; tensor expand_dims_12 = const()[name = string("expand_dims_12"), val = tensor([0])]; tensor expand_dims_14 = const()[name = string("expand_dims_14"), val = tensor([2])]; tensor concat_11_values3_0 = const()[name = string("concat_11_values3_0"), val = tensor([0])]; int32 concat_11_axis_0 = const()[name = string("concat_11_axis_0"), val = int32(0)]; bool concat_11_interleave_0 = const()[name = string("concat_11_interleave_0"), val = bool(false)]; tensor concat_11 = concat(axis = concat_11_axis_0, interleave = concat_11_interleave_0, values = (expand_dims_11, expand_dims_12, position_id, concat_11_values3_0))[name = string("concat_11")]; tensor concat_12_values1_0 = const()[name = string("concat_12_values1_0"), val = tensor([0])]; tensor concat_12_values3_0 = const()[name = string("concat_12_values3_0"), val = tensor([0])]; int32 concat_12_axis_0 = const()[name = string("concat_12_axis_0"), val = int32(0)]; bool concat_12_interleave_0 = const()[name = string("concat_12_interleave_0"), val = bool(false)]; tensor concat_12 = concat(axis = concat_12_axis_0, interleave = concat_12_interleave_0, values = (expand_dims_14, concat_12_values1_0, var_426, concat_12_values3_0))[name = string("concat_12")]; tensor key_cache_internal_tensor_assign_2_stride_0 = const()[name = string("key_cache_internal_tensor_assign_2_stride_0"), val = tensor([1, 1, 1, 1])]; tensor key_cache_internal_tensor_assign_2_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_2_begin_mask_0"), val = tensor([false, false, false, true])]; tensor key_cache_internal_tensor_assign_2_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_2_end_mask_0"), val = tensor([false, true, false, true])]; tensor key_cache_internal_tensor_assign_2_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_2_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor key_cache_internal_tensor_assign_2_cast_fp16 = slice_update(begin = concat_11, begin_mask = key_cache_internal_tensor_assign_2_begin_mask_0, end = concat_12, end_mask = key_cache_internal_tensor_assign_2_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_2_squeeze_mask_0, stride = key_cache_internal_tensor_assign_2_stride_0, update = key_states_7_cast_fp16, x = coreml_update_state_48)[name = string("key_cache_internal_tensor_assign_2_cast_fp16")]; write_state(data = key_cache_internal_tensor_assign_2_cast_fp16, input = key_cache)[name = string("coreml_update_state_264_write_state")]; tensor coreml_update_state_50 = read_state(input = key_cache)[name = string("coreml_update_state_264")]; tensor value_cache_internal_tensor_assign_2_stride_0 = const()[name = string("value_cache_internal_tensor_assign_2_stride_0"), val = tensor([1, 1, 1, 1])]; tensor value_cache_internal_tensor_assign_2_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_2_begin_mask_0"), val = tensor([false, false, false, true])]; tensor value_cache_internal_tensor_assign_2_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_2_end_mask_0"), val = tensor([false, true, false, true])]; tensor value_cache_internal_tensor_assign_2_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_2_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor value_states_7_cast_fp16 = transpose(perm = value_states_7_perm_0, x = var_672_cast_fp16)[name = string("transpose_67")]; tensor value_cache_internal_tensor_assign_2_cast_fp16 = slice_update(begin = concat_11, begin_mask = value_cache_internal_tensor_assign_2_begin_mask_0, end = concat_12, end_mask = value_cache_internal_tensor_assign_2_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_2_squeeze_mask_0, stride = value_cache_internal_tensor_assign_2_stride_0, update = value_states_7_cast_fp16, x = coreml_update_state_49)[name = string("value_cache_internal_tensor_assign_2_cast_fp16")]; write_state(data = value_cache_internal_tensor_assign_2_cast_fp16, input = value_cache)[name = string("coreml_update_state_265_write_state")]; tensor coreml_update_state_51 = read_state(input = value_cache)[name = string("coreml_update_state_265")]; tensor var_736_begin_0 = const()[name = string("op_736_begin_0"), val = tensor([1, 0, 0, 0])]; tensor var_736_end_0 = const()[name = string("op_736_end_0"), val = tensor([2, 2, 2048, 64])]; tensor var_736_end_mask_0 = const()[name = string("op_736_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_736_cast_fp16 = slice_by_index(begin = var_736_begin_0, end = var_736_end_0, end_mask = var_736_end_mask_0, x = coreml_update_state_50)[name = string("op_736_cast_fp16")]; tensor tile_2 = const()[name = string("tile_2"), val = tensor([1, 1])]; int32 var_739_axis_0 = const()[name = string("op_739_axis_0"), val = int32(1)]; tensor var_739_cast_fp16_0, tensor var_739_cast_fp16_1 = split(axis = var_739_axis_0, split_sizes = tile_2, x = var_736_cast_fp16)[name = string("op_739_cast_fp16")]; tensor var_746_begin_0 = const()[name = string("op_746_begin_0"), val = tensor([1, 0, 0, 0])]; tensor var_746_end_0 = const()[name = string("op_746_end_0"), val = tensor([2, 2, 2048, 64])]; tensor var_746_end_mask_0 = const()[name = string("op_746_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_746_cast_fp16 = slice_by_index(begin = var_746_begin_0, end = var_746_end_0, end_mask = var_746_end_mask_0, x = coreml_update_state_51)[name = string("op_746_cast_fp16")]; tensor tile_3 = const()[name = string("tile_3"), val = tensor([1, 1])]; int32 var_749_axis_0 = const()[name = string("op_749_axis_0"), val = int32(1)]; tensor var_749_cast_fp16_0, tensor var_749_cast_fp16_1 = split(axis = var_749_axis_0, split_sizes = tile_3, x = var_746_cast_fp16)[name = string("op_749_cast_fp16")]; tensor var_752_split_sizes_0 = const()[name = string("op_752_split_sizes_0"), val = tensor([8, 8])]; int32 var_752_axis_0 = const()[name = string("op_752_axis_0"), val = int32(1)]; tensor var_752_cast_fp16_0, tensor var_752_cast_fp16_1 = split(axis = var_752_axis_0, split_sizes = var_752_split_sizes_0, x = query_states_7_cast_fp16)[name = string("op_752_cast_fp16")]; bool attn_weights_17_transpose_x_0 = const()[name = string("attn_weights_17_transpose_x_0"), val = bool(false)]; bool attn_weights_17_transpose_y_0 = const()[name = string("attn_weights_17_transpose_y_0"), val = bool(false)]; tensor attn_weights_17_cast_fp16 = matmul(transpose_x = attn_weights_17_transpose_x_0, transpose_y = attn_weights_17_transpose_y_0, x = var_739_cast_fp16_0, y = var_752_cast_fp16_0)[name = string("attn_weights_17_cast_fp16")]; fp16 _inversed_attn_weights_19_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_19_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_19_cast_fp16 = mul(x = attn_weights_17_cast_fp16, y = _inversed_attn_weights_19_y_0_to_fp16)[name = string("_inversed_attn_weights_19_cast_fp16")]; tensor attn_weights_21_cast_fp16 = add(x = _inversed_attn_weights_19_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_21_cast_fp16")]; int32 var_759 = const()[name = string("op_759"), val = int32(2)]; tensor attn_weights_23_cast_fp16 = softmax(axis = var_759, x = attn_weights_21_cast_fp16)[name = string("attn_weights_23_cast_fp16")]; bool var_765_transpose_x_1 = const()[name = string("op_765_transpose_x_1"), val = bool(true)]; bool var_765_transpose_y_1 = const()[name = string("op_765_transpose_y_1"), val = bool(false)]; tensor var_765_cast_fp16 = matmul(transpose_x = var_765_transpose_x_1, transpose_y = var_765_transpose_y_1, x = attn_weights_23_cast_fp16, y = var_749_cast_fp16_0)[name = string("op_765_cast_fp16")]; bool attn_weights_25_transpose_x_0 = const()[name = string("attn_weights_25_transpose_x_0"), val = bool(false)]; bool attn_weights_25_transpose_y_0 = const()[name = string("attn_weights_25_transpose_y_0"), val = bool(false)]; tensor attn_weights_25_cast_fp16 = matmul(transpose_x = attn_weights_25_transpose_x_0, transpose_y = attn_weights_25_transpose_y_0, x = var_739_cast_fp16_1, y = var_752_cast_fp16_1)[name = string("attn_weights_25_cast_fp16")]; fp16 _inversed_attn_weights_27_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_27_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_27_cast_fp16 = mul(x = attn_weights_25_cast_fp16, y = _inversed_attn_weights_27_y_0_to_fp16)[name = string("_inversed_attn_weights_27_cast_fp16")]; tensor attn_weights_29_cast_fp16 = add(x = _inversed_attn_weights_27_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_29_cast_fp16")]; int32 var_771 = const()[name = string("op_771"), val = int32(2)]; tensor attn_weights_31_cast_fp16 = softmax(axis = var_771, x = attn_weights_29_cast_fp16)[name = string("attn_weights_31_cast_fp16")]; bool attn_output_7_transpose_x_1 = const()[name = string("attn_output_7_transpose_x_1"), val = bool(true)]; bool attn_output_7_transpose_y_1 = const()[name = string("attn_output_7_transpose_y_1"), val = bool(false)]; tensor attn_output_7_cast_fp16 = matmul(transpose_x = attn_output_7_transpose_x_1, transpose_y = attn_output_7_transpose_y_1, x = attn_weights_31_cast_fp16, y = var_749_cast_fp16_1)[name = string("attn_output_7_cast_fp16")]; int32 var_779 = const()[name = string("op_779"), val = int32(1)]; bool attn_output_9_interleave_0 = const()[name = string("attn_output_9_interleave_0"), val = bool(false)]; tensor attn_output_9_cast_fp16 = concat(axis = var_779, interleave = attn_output_9_interleave_0, values = (var_765_cast_fp16, attn_output_7_cast_fp16))[name = string("attn_output_9_cast_fp16")]; tensor var_783_perm_0 = const()[name = string("op_783_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_788 = const()[name = string("op_788"), val = tensor([1, 1024, 1, 16])]; tensor var_783_cast_fp16 = transpose(perm = var_783_perm_0, x = attn_output_9_cast_fp16)[name = string("transpose_66")]; tensor x_23_cast_fp16 = reshape(shape = var_788, x = var_783_cast_fp16)[name = string("x_23_cast_fp16")]; string hidden_states_9_pad_type_0 = const()[name = string("hidden_states_9_pad_type_0"), val = string("valid")]; tensor hidden_states_9_strides_0 = const()[name = string("hidden_states_9_strides_0"), val = tensor([1, 1])]; tensor hidden_states_9_pad_0 = const()[name = string("hidden_states_9_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_9_dilations_0 = const()[name = string("hidden_states_9_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_9_groups_0 = const()[name = string("hidden_states_9_groups_0"), val = int32(1)]; tensor var_795_to_fp16 = const()[name = string("op_795_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(40920192)))]; tensor hidden_states_9_cast_fp16 = conv(dilations = hidden_states_9_dilations_0, groups = hidden_states_9_groups_0, pad = hidden_states_9_pad_0, pad_type = hidden_states_9_pad_type_0, strides = hidden_states_9_strides_0, weight = var_795_to_fp16, x = x_23_cast_fp16)[name = string("hidden_states_9_cast_fp16")]; tensor x_25_cast_fp16 = add(x = x_17_cast_fp16, y = hidden_states_9_cast_fp16)[name = string("x_25_cast_fp16")]; int32 var_807 = const()[name = string("op_807"), val = int32(1)]; fp16 const_23_promoted_to_fp16 = const()[name = string("const_23_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_810_cast_fp16 = mul(x = x_25_cast_fp16, y = const_23_promoted_to_fp16)[name = string("op_810_cast_fp16")]; bool x_27_interleave_0 = const()[name = string("x_27_interleave_0"), val = bool(false)]; tensor x_27_cast_fp16 = concat(axis = var_807, interleave = x_27_interleave_0, values = (x_25_cast_fp16, var_810_cast_fp16))[name = string("x_27_cast_fp16")]; tensor out_19_axes_0 = const()[name = string("out_19_axes_0"), val = tensor([1])]; fp16 var_820_to_fp16 = const()[name = string("op_820_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_19_cast_fp16 = layer_norm(axes = out_19_axes_0, epsilon = var_820_to_fp16, x = x_27_cast_fp16)[name = string("out_19_cast_fp16")]; tensor layer_layers_1_post_attention_layernorm_weight_to_fp16 = const()[name = string("layer_layers_1_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(43017408)))]; tensor out_21_cast_fp16 = mul(x = out_19_cast_fp16, y = layer_layers_1_post_attention_layernorm_weight_to_fp16)[name = string("out_21_cast_fp16")]; tensor var_826_split_sizes_0 = const()[name = string("op_826_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_826_axis_0 = const()[name = string("op_826_axis_0"), val = int32(1)]; tensor var_826_cast_fp16_0, tensor var_826_cast_fp16_1 = split(axis = var_826_axis_0, split_sizes = var_826_split_sizes_0, x = out_21_cast_fp16)[name = string("op_826_cast_fp16")]; string input_3_pad_type_0 = const()[name = string("input_3_pad_type_0"), val = string("valid")]; tensor input_3_strides_0 = const()[name = string("input_3_strides_0"), val = tensor([1, 1])]; tensor input_3_pad_0 = const()[name = string("input_3_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_3_dilations_0 = const()[name = string("input_3_dilations_0"), val = tensor([1, 1])]; int32 input_3_groups_0 = const()[name = string("input_3_groups_0"), val = int32(1)]; tensor var_831_to_fp16 = const()[name = string("op_831_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(43021568)))]; tensor input_3_cast_fp16 = conv(dilations = input_3_dilations_0, groups = input_3_groups_0, pad = input_3_pad_0, pad_type = input_3_pad_type_0, strides = input_3_strides_0, weight = var_831_to_fp16, x = var_826_cast_fp16_0)[name = string("input_3_cast_fp16")]; tensor var_842_cast_fp16 = silu(x = input_3_cast_fp16)[name = string("op_842_cast_fp16")]; string var_847_pad_type_0 = const()[name = string("op_847_pad_type_0"), val = string("valid")]; tensor var_847_strides_0 = const()[name = string("op_847_strides_0"), val = tensor([1, 1])]; tensor var_847_pad_0 = const()[name = string("op_847_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_847_dilations_0 = const()[name = string("op_847_dilations_0"), val = tensor([1, 1])]; int32 var_847_groups_0 = const()[name = string("op_847_groups_0"), val = int32(1)]; tensor var_830_to_fp16 = const()[name = string("op_830_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(51410240)))]; tensor var_847_cast_fp16 = conv(dilations = var_847_dilations_0, groups = var_847_groups_0, pad = var_847_pad_0, pad_type = var_847_pad_type_0, strides = var_847_strides_0, weight = var_830_to_fp16, x = var_826_cast_fp16_0)[name = string("op_847_cast_fp16")]; tensor x_33_cast_fp16 = mul(x = var_842_cast_fp16, y = var_847_cast_fp16)[name = string("x_33_cast_fp16")]; string hidden_states_11_pad_type_0 = const()[name = string("hidden_states_11_pad_type_0"), val = string("valid")]; tensor hidden_states_11_strides_0 = const()[name = string("hidden_states_11_strides_0"), val = tensor([1, 1])]; tensor hidden_states_11_pad_0 = const()[name = string("hidden_states_11_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_11_dilations_0 = const()[name = string("hidden_states_11_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_11_groups_0 = const()[name = string("hidden_states_11_groups_0"), val = int32(1)]; tensor var_829_to_fp16 = const()[name = string("op_829_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(59798912)))]; tensor hidden_states_11_cast_fp16 = conv(dilations = hidden_states_11_dilations_0, groups = hidden_states_11_groups_0, pad = hidden_states_11_pad_0, pad_type = hidden_states_11_pad_type_0, strides = hidden_states_11_strides_0, weight = var_829_to_fp16, x = x_33_cast_fp16)[name = string("hidden_states_11_cast_fp16")]; tensor x_35_cast_fp16 = add(x = x_25_cast_fp16, y = hidden_states_11_cast_fp16)[name = string("x_35_cast_fp16")]; int32 var_860 = const()[name = string("op_860"), val = int32(1)]; fp16 const_24_promoted_to_fp16 = const()[name = string("const_24_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_863_cast_fp16 = mul(x = x_35_cast_fp16, y = const_24_promoted_to_fp16)[name = string("op_863_cast_fp16")]; bool x_37_interleave_0 = const()[name = string("x_37_interleave_0"), val = bool(false)]; tensor x_37_cast_fp16 = concat(axis = var_860, interleave = x_37_interleave_0, values = (x_35_cast_fp16, var_863_cast_fp16))[name = string("x_37_cast_fp16")]; tensor out_25_axes_0 = const()[name = string("out_25_axes_0"), val = tensor([1])]; fp16 var_873_to_fp16 = const()[name = string("op_873_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_25_cast_fp16 = layer_norm(axes = out_25_axes_0, epsilon = var_873_to_fp16, x = x_37_cast_fp16)[name = string("out_25_cast_fp16")]; tensor layer_layers_2_input_layernorm_weight_to_fp16 = const()[name = string("layer_layers_2_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(68187584)))]; tensor out_27_cast_fp16 = mul(x = out_25_cast_fp16, y = layer_layers_2_input_layernorm_weight_to_fp16)[name = string("out_27_cast_fp16")]; tensor var_879_split_sizes_0 = const()[name = string("op_879_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_879_axis_0 = const()[name = string("op_879_axis_0"), val = int32(1)]; tensor var_879_cast_fp16_0, tensor var_879_cast_fp16_1 = split(axis = var_879_axis_0, split_sizes = var_879_split_sizes_0, x = out_27_cast_fp16)[name = string("op_879_cast_fp16")]; string query_states_9_pad_type_0 = const()[name = string("query_states_9_pad_type_0"), val = string("valid")]; tensor query_states_9_strides_0 = const()[name = string("query_states_9_strides_0"), val = tensor([1, 1])]; tensor query_states_9_pad_0 = const()[name = string("query_states_9_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_states_9_dilations_0 = const()[name = string("query_states_9_dilations_0"), val = tensor([1, 1])]; int32 query_states_9_groups_0 = const()[name = string("query_states_9_groups_0"), val = int32(1)]; tensor var_901_to_fp16 = const()[name = string("op_901_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(68191744)))]; tensor query_states_9_cast_fp16 = conv(dilations = query_states_9_dilations_0, groups = query_states_9_groups_0, pad = query_states_9_pad_0, pad_type = query_states_9_pad_type_0, strides = query_states_9_strides_0, weight = var_901_to_fp16, x = var_879_cast_fp16_0)[name = string("query_states_9_cast_fp16")]; string key_states_9_pad_type_0 = const()[name = string("key_states_9_pad_type_0"), val = string("valid")]; tensor key_states_9_strides_0 = const()[name = string("key_states_9_strides_0"), val = tensor([1, 1])]; tensor key_states_9_pad_0 = const()[name = string("key_states_9_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_states_9_dilations_0 = const()[name = string("key_states_9_dilations_0"), val = tensor([1, 1])]; int32 key_states_9_groups_0 = const()[name = string("key_states_9_groups_0"), val = int32(1)]; tensor var_912_to_fp16 = const()[name = string("op_912_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(70288960)))]; tensor key_states_9_cast_fp16 = conv(dilations = key_states_9_dilations_0, groups = key_states_9_groups_0, pad = key_states_9_pad_0, pad_type = key_states_9_pad_type_0, strides = key_states_9_strides_0, weight = var_912_to_fp16, x = var_879_cast_fp16_0)[name = string("key_states_9_cast_fp16")]; string value_states_9_pad_type_0 = const()[name = string("value_states_9_pad_type_0"), val = string("valid")]; tensor value_states_9_strides_0 = const()[name = string("value_states_9_strides_0"), val = tensor([1, 1])]; tensor value_states_9_pad_0 = const()[name = string("value_states_9_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_states_9_dilations_0 = const()[name = string("value_states_9_dilations_0"), val = tensor([1, 1])]; int32 value_states_9_groups_0 = const()[name = string("value_states_9_groups_0"), val = int32(1)]; tensor var_923_to_fp16 = const()[name = string("op_923_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(70551168)))]; tensor value_states_9_cast_fp16 = conv(dilations = value_states_9_dilations_0, groups = value_states_9_groups_0, pad = value_states_9_pad_0, pad_type = value_states_9_pad_type_0, strides = value_states_9_strides_0, weight = var_923_to_fp16, x = var_879_cast_fp16_0)[name = string("value_states_9_cast_fp16")]; tensor var_931 = const()[name = string("op_931"), val = tensor([1, 16, 64, 16])]; tensor embed_9_cast_fp16 = reshape(shape = var_931, x = query_states_9_cast_fp16)[name = string("embed_9_cast_fp16")]; tensor var_935 = const()[name = string("op_935"), val = tensor([1, 2, 64, 16])]; tensor var_936_cast_fp16 = reshape(shape = var_935, x = key_states_9_cast_fp16)[name = string("op_936_cast_fp16")]; tensor embed_11_perm_0 = const()[name = string("embed_11_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_942 = const()[name = string("op_942"), val = tensor([1, 2, 64, 16])]; tensor var_943_cast_fp16 = reshape(shape = var_942, x = value_states_9_cast_fp16)[name = string("op_943_cast_fp16")]; tensor value_states_11_perm_0 = const()[name = string("value_states_11_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_947_cast_fp16 = mul(x = embed_9_cast_fp16, y = cos_1_cast_fp16)[name = string("op_947_cast_fp16")]; tensor var_948_split_sizes_0 = const()[name = string("op_948_split_sizes_0"), val = tensor([32, 32])]; int32 var_948_axis_0 = const()[name = string("op_948_axis_0"), val = int32(-2)]; tensor var_948_cast_fp16_0, tensor var_948_cast_fp16_1 = split(axis = var_948_axis_0, split_sizes = var_948_split_sizes_0, x = embed_9_cast_fp16)[name = string("op_948_cast_fp16")]; fp16 const_27_promoted_to_fp16 = const()[name = string("const_27_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_950_cast_fp16 = mul(x = var_948_cast_fp16_1, y = const_27_promoted_to_fp16)[name = string("op_950_cast_fp16")]; int32 var_952 = const()[name = string("op_952"), val = int32(-2)]; bool var_953_interleave_0 = const()[name = string("op_953_interleave_0"), val = bool(false)]; tensor var_953_cast_fp16 = concat(axis = var_952, interleave = var_953_interleave_0, values = (var_950_cast_fp16, var_948_cast_fp16_0))[name = string("op_953_cast_fp16")]; tensor var_954_cast_fp16 = mul(x = var_953_cast_fp16, y = sin_1_cast_fp16)[name = string("op_954_cast_fp16")]; tensor query_states_11_cast_fp16 = add(x = var_947_cast_fp16, y = var_954_cast_fp16)[name = string("query_states_11_cast_fp16")]; tensor embed_11_cast_fp16 = transpose(perm = embed_11_perm_0, x = var_936_cast_fp16)[name = string("transpose_65")]; tensor var_957_cast_fp16 = mul(x = embed_11_cast_fp16, y = cos_cast_fp16)[name = string("op_957_cast_fp16")]; tensor var_958_split_sizes_0 = const()[name = string("op_958_split_sizes_0"), val = tensor([32, 32])]; int32 var_958_axis_0 = const()[name = string("op_958_axis_0"), val = int32(-1)]; tensor var_958_cast_fp16_0, tensor var_958_cast_fp16_1 = split(axis = var_958_axis_0, split_sizes = var_958_split_sizes_0, x = embed_11_cast_fp16)[name = string("op_958_cast_fp16")]; fp16 const_28_promoted_to_fp16 = const()[name = string("const_28_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_960_cast_fp16 = mul(x = var_958_cast_fp16_1, y = const_28_promoted_to_fp16)[name = string("op_960_cast_fp16")]; int32 var_962 = const()[name = string("op_962"), val = int32(-1)]; bool var_963_interleave_0 = const()[name = string("op_963_interleave_0"), val = bool(false)]; tensor var_963_cast_fp16 = concat(axis = var_962, interleave = var_963_interleave_0, values = (var_960_cast_fp16, var_958_cast_fp16_0))[name = string("op_963_cast_fp16")]; tensor var_964_cast_fp16 = mul(x = var_963_cast_fp16, y = sin_cast_fp16)[name = string("op_964_cast_fp16")]; tensor key_states_11_cast_fp16 = add(x = var_957_cast_fp16, y = var_964_cast_fp16)[name = string("key_states_11_cast_fp16")]; tensor expand_dims_21 = const()[name = string("expand_dims_21"), val = tensor([2])]; tensor expand_dims_22 = const()[name = string("expand_dims_22"), val = tensor([0])]; tensor expand_dims_24 = const()[name = string("expand_dims_24"), val = tensor([3])]; tensor concat_19_values3_0 = const()[name = string("concat_19_values3_0"), val = tensor([0])]; int32 concat_19_axis_0 = const()[name = string("concat_19_axis_0"), val = int32(0)]; bool concat_19_interleave_0 = const()[name = string("concat_19_interleave_0"), val = bool(false)]; tensor concat_19 = concat(axis = concat_19_axis_0, interleave = concat_19_interleave_0, values = (expand_dims_21, expand_dims_22, position_id, concat_19_values3_0))[name = string("concat_19")]; tensor concat_20_values1_0 = const()[name = string("concat_20_values1_0"), val = tensor([0])]; tensor concat_20_values3_0 = const()[name = string("concat_20_values3_0"), val = tensor([0])]; int32 concat_20_axis_0 = const()[name = string("concat_20_axis_0"), val = int32(0)]; bool concat_20_interleave_0 = const()[name = string("concat_20_interleave_0"), val = bool(false)]; tensor concat_20 = concat(axis = concat_20_axis_0, interleave = concat_20_interleave_0, values = (expand_dims_24, concat_20_values1_0, var_426, concat_20_values3_0))[name = string("concat_20")]; tensor key_cache_internal_tensor_assign_3_stride_0 = const()[name = string("key_cache_internal_tensor_assign_3_stride_0"), val = tensor([1, 1, 1, 1])]; tensor key_cache_internal_tensor_assign_3_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_3_begin_mask_0"), val = tensor([false, false, false, true])]; tensor key_cache_internal_tensor_assign_3_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_3_end_mask_0"), val = tensor([false, true, false, true])]; tensor key_cache_internal_tensor_assign_3_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_3_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor key_cache_internal_tensor_assign_3_cast_fp16 = slice_update(begin = concat_19, begin_mask = key_cache_internal_tensor_assign_3_begin_mask_0, end = concat_20, end_mask = key_cache_internal_tensor_assign_3_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_3_squeeze_mask_0, stride = key_cache_internal_tensor_assign_3_stride_0, update = key_states_11_cast_fp16, x = coreml_update_state_50)[name = string("key_cache_internal_tensor_assign_3_cast_fp16")]; write_state(data = key_cache_internal_tensor_assign_3_cast_fp16, input = key_cache)[name = string("coreml_update_state_266_write_state")]; tensor coreml_update_state_52 = read_state(input = key_cache)[name = string("coreml_update_state_266")]; tensor value_cache_internal_tensor_assign_3_stride_0 = const()[name = string("value_cache_internal_tensor_assign_3_stride_0"), val = tensor([1, 1, 1, 1])]; tensor value_cache_internal_tensor_assign_3_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_3_begin_mask_0"), val = tensor([false, false, false, true])]; tensor value_cache_internal_tensor_assign_3_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_3_end_mask_0"), val = tensor([false, true, false, true])]; tensor value_cache_internal_tensor_assign_3_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_3_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor value_states_11_cast_fp16 = transpose(perm = value_states_11_perm_0, x = var_943_cast_fp16)[name = string("transpose_64")]; tensor value_cache_internal_tensor_assign_3_cast_fp16 = slice_update(begin = concat_19, begin_mask = value_cache_internal_tensor_assign_3_begin_mask_0, end = concat_20, end_mask = value_cache_internal_tensor_assign_3_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_3_squeeze_mask_0, stride = value_cache_internal_tensor_assign_3_stride_0, update = value_states_11_cast_fp16, x = coreml_update_state_51)[name = string("value_cache_internal_tensor_assign_3_cast_fp16")]; write_state(data = value_cache_internal_tensor_assign_3_cast_fp16, input = value_cache)[name = string("coreml_update_state_267_write_state")]; tensor coreml_update_state_53 = read_state(input = value_cache)[name = string("coreml_update_state_267")]; tensor var_1007_begin_0 = const()[name = string("op_1007_begin_0"), val = tensor([2, 0, 0, 0])]; tensor var_1007_end_0 = const()[name = string("op_1007_end_0"), val = tensor([3, 2, 2048, 64])]; tensor var_1007_end_mask_0 = const()[name = string("op_1007_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_1007_cast_fp16 = slice_by_index(begin = var_1007_begin_0, end = var_1007_end_0, end_mask = var_1007_end_mask_0, x = coreml_update_state_52)[name = string("op_1007_cast_fp16")]; tensor tile_4 = const()[name = string("tile_4"), val = tensor([1, 1])]; int32 var_1010_axis_0 = const()[name = string("op_1010_axis_0"), val = int32(1)]; tensor var_1010_cast_fp16_0, tensor var_1010_cast_fp16_1 = split(axis = var_1010_axis_0, split_sizes = tile_4, x = var_1007_cast_fp16)[name = string("op_1010_cast_fp16")]; tensor var_1017_begin_0 = const()[name = string("op_1017_begin_0"), val = tensor([2, 0, 0, 0])]; tensor var_1017_end_0 = const()[name = string("op_1017_end_0"), val = tensor([3, 2, 2048, 64])]; tensor var_1017_end_mask_0 = const()[name = string("op_1017_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_1017_cast_fp16 = slice_by_index(begin = var_1017_begin_0, end = var_1017_end_0, end_mask = var_1017_end_mask_0, x = coreml_update_state_53)[name = string("op_1017_cast_fp16")]; tensor tile_5 = const()[name = string("tile_5"), val = tensor([1, 1])]; int32 var_1020_axis_0 = const()[name = string("op_1020_axis_0"), val = int32(1)]; tensor var_1020_cast_fp16_0, tensor var_1020_cast_fp16_1 = split(axis = var_1020_axis_0, split_sizes = tile_5, x = var_1017_cast_fp16)[name = string("op_1020_cast_fp16")]; tensor var_1023_split_sizes_0 = const()[name = string("op_1023_split_sizes_0"), val = tensor([8, 8])]; int32 var_1023_axis_0 = const()[name = string("op_1023_axis_0"), val = int32(1)]; tensor var_1023_cast_fp16_0, tensor var_1023_cast_fp16_1 = split(axis = var_1023_axis_0, split_sizes = var_1023_split_sizes_0, x = query_states_11_cast_fp16)[name = string("op_1023_cast_fp16")]; bool attn_weights_33_transpose_x_0 = const()[name = string("attn_weights_33_transpose_x_0"), val = bool(false)]; bool attn_weights_33_transpose_y_0 = const()[name = string("attn_weights_33_transpose_y_0"), val = bool(false)]; tensor attn_weights_33_cast_fp16 = matmul(transpose_x = attn_weights_33_transpose_x_0, transpose_y = attn_weights_33_transpose_y_0, x = var_1010_cast_fp16_0, y = var_1023_cast_fp16_0)[name = string("attn_weights_33_cast_fp16")]; fp16 _inversed_attn_weights_35_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_35_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_35_cast_fp16 = mul(x = attn_weights_33_cast_fp16, y = _inversed_attn_weights_35_y_0_to_fp16)[name = string("_inversed_attn_weights_35_cast_fp16")]; tensor attn_weights_37_cast_fp16 = add(x = _inversed_attn_weights_35_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_37_cast_fp16")]; int32 var_1030 = const()[name = string("op_1030"), val = int32(2)]; tensor attn_weights_39_cast_fp16 = softmax(axis = var_1030, x = attn_weights_37_cast_fp16)[name = string("attn_weights_39_cast_fp16")]; bool var_1036_transpose_x_1 = const()[name = string("op_1036_transpose_x_1"), val = bool(true)]; bool var_1036_transpose_y_1 = const()[name = string("op_1036_transpose_y_1"), val = bool(false)]; tensor var_1036_cast_fp16 = matmul(transpose_x = var_1036_transpose_x_1, transpose_y = var_1036_transpose_y_1, x = attn_weights_39_cast_fp16, y = var_1020_cast_fp16_0)[name = string("op_1036_cast_fp16")]; bool attn_weights_41_transpose_x_0 = const()[name = string("attn_weights_41_transpose_x_0"), val = bool(false)]; bool attn_weights_41_transpose_y_0 = const()[name = string("attn_weights_41_transpose_y_0"), val = bool(false)]; tensor attn_weights_41_cast_fp16 = matmul(transpose_x = attn_weights_41_transpose_x_0, transpose_y = attn_weights_41_transpose_y_0, x = var_1010_cast_fp16_1, y = var_1023_cast_fp16_1)[name = string("attn_weights_41_cast_fp16")]; fp16 _inversed_attn_weights_43_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_43_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_43_cast_fp16 = mul(x = attn_weights_41_cast_fp16, y = _inversed_attn_weights_43_y_0_to_fp16)[name = string("_inversed_attn_weights_43_cast_fp16")]; tensor attn_weights_45_cast_fp16 = add(x = _inversed_attn_weights_43_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_45_cast_fp16")]; int32 var_1042 = const()[name = string("op_1042"), val = int32(2)]; tensor attn_weights_47_cast_fp16 = softmax(axis = var_1042, x = attn_weights_45_cast_fp16)[name = string("attn_weights_47_cast_fp16")]; bool attn_output_13_transpose_x_1 = const()[name = string("attn_output_13_transpose_x_1"), val = bool(true)]; bool attn_output_13_transpose_y_1 = const()[name = string("attn_output_13_transpose_y_1"), val = bool(false)]; tensor attn_output_13_cast_fp16 = matmul(transpose_x = attn_output_13_transpose_x_1, transpose_y = attn_output_13_transpose_y_1, x = attn_weights_47_cast_fp16, y = var_1020_cast_fp16_1)[name = string("attn_output_13_cast_fp16")]; int32 var_1050 = const()[name = string("op_1050"), val = int32(1)]; bool attn_output_15_interleave_0 = const()[name = string("attn_output_15_interleave_0"), val = bool(false)]; tensor attn_output_15_cast_fp16 = concat(axis = var_1050, interleave = attn_output_15_interleave_0, values = (var_1036_cast_fp16, attn_output_13_cast_fp16))[name = string("attn_output_15_cast_fp16")]; tensor var_1054_perm_0 = const()[name = string("op_1054_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_1059 = const()[name = string("op_1059"), val = tensor([1, 1024, 1, 16])]; tensor var_1054_cast_fp16 = transpose(perm = var_1054_perm_0, x = attn_output_15_cast_fp16)[name = string("transpose_63")]; tensor x_41_cast_fp16 = reshape(shape = var_1059, x = var_1054_cast_fp16)[name = string("x_41_cast_fp16")]; string hidden_states_15_pad_type_0 = const()[name = string("hidden_states_15_pad_type_0"), val = string("valid")]; tensor hidden_states_15_strides_0 = const()[name = string("hidden_states_15_strides_0"), val = tensor([1, 1])]; tensor hidden_states_15_pad_0 = const()[name = string("hidden_states_15_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_15_dilations_0 = const()[name = string("hidden_states_15_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_15_groups_0 = const()[name = string("hidden_states_15_groups_0"), val = int32(1)]; tensor var_1066_to_fp16 = const()[name = string("op_1066_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(70813376)))]; tensor hidden_states_15_cast_fp16 = conv(dilations = hidden_states_15_dilations_0, groups = hidden_states_15_groups_0, pad = hidden_states_15_pad_0, pad_type = hidden_states_15_pad_type_0, strides = hidden_states_15_strides_0, weight = var_1066_to_fp16, x = x_41_cast_fp16)[name = string("hidden_states_15_cast_fp16")]; tensor x_43_cast_fp16 = add(x = x_35_cast_fp16, y = hidden_states_15_cast_fp16)[name = string("x_43_cast_fp16")]; int32 var_1078 = const()[name = string("op_1078"), val = int32(1)]; fp16 const_33_promoted_to_fp16 = const()[name = string("const_33_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_1081_cast_fp16 = mul(x = x_43_cast_fp16, y = const_33_promoted_to_fp16)[name = string("op_1081_cast_fp16")]; bool x_45_interleave_0 = const()[name = string("x_45_interleave_0"), val = bool(false)]; tensor x_45_cast_fp16 = concat(axis = var_1078, interleave = x_45_interleave_0, values = (x_43_cast_fp16, var_1081_cast_fp16))[name = string("x_45_cast_fp16")]; tensor out_31_axes_0 = const()[name = string("out_31_axes_0"), val = tensor([1])]; fp16 var_1091_to_fp16 = const()[name = string("op_1091_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_31_cast_fp16 = layer_norm(axes = out_31_axes_0, epsilon = var_1091_to_fp16, x = x_45_cast_fp16)[name = string("out_31_cast_fp16")]; tensor layer_layers_2_post_attention_layernorm_weight_to_fp16 = const()[name = string("layer_layers_2_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(72910592)))]; tensor out_33_cast_fp16 = mul(x = out_31_cast_fp16, y = layer_layers_2_post_attention_layernorm_weight_to_fp16)[name = string("out_33_cast_fp16")]; tensor var_1097_split_sizes_0 = const()[name = string("op_1097_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_1097_axis_0 = const()[name = string("op_1097_axis_0"), val = int32(1)]; tensor var_1097_cast_fp16_0, tensor var_1097_cast_fp16_1 = split(axis = var_1097_axis_0, split_sizes = var_1097_split_sizes_0, x = out_33_cast_fp16)[name = string("op_1097_cast_fp16")]; string input_5_pad_type_0 = const()[name = string("input_5_pad_type_0"), val = string("valid")]; tensor input_5_strides_0 = const()[name = string("input_5_strides_0"), val = tensor([1, 1])]; tensor input_5_pad_0 = const()[name = string("input_5_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_5_dilations_0 = const()[name = string("input_5_dilations_0"), val = tensor([1, 1])]; int32 input_5_groups_0 = const()[name = string("input_5_groups_0"), val = int32(1)]; tensor var_1102_to_fp16 = const()[name = string("op_1102_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(72914752)))]; tensor input_5_cast_fp16 = conv(dilations = input_5_dilations_0, groups = input_5_groups_0, pad = input_5_pad_0, pad_type = input_5_pad_type_0, strides = input_5_strides_0, weight = var_1102_to_fp16, x = var_1097_cast_fp16_0)[name = string("input_5_cast_fp16")]; tensor var_1113_cast_fp16 = silu(x = input_5_cast_fp16)[name = string("op_1113_cast_fp16")]; string var_1118_pad_type_0 = const()[name = string("op_1118_pad_type_0"), val = string("valid")]; tensor var_1118_strides_0 = const()[name = string("op_1118_strides_0"), val = tensor([1, 1])]; tensor var_1118_pad_0 = const()[name = string("op_1118_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_1118_dilations_0 = const()[name = string("op_1118_dilations_0"), val = tensor([1, 1])]; int32 var_1118_groups_0 = const()[name = string("op_1118_groups_0"), val = int32(1)]; tensor var_1101_to_fp16 = const()[name = string("op_1101_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(81303424)))]; tensor var_1118_cast_fp16 = conv(dilations = var_1118_dilations_0, groups = var_1118_groups_0, pad = var_1118_pad_0, pad_type = var_1118_pad_type_0, strides = var_1118_strides_0, weight = var_1101_to_fp16, x = var_1097_cast_fp16_0)[name = string("op_1118_cast_fp16")]; tensor x_51_cast_fp16 = mul(x = var_1113_cast_fp16, y = var_1118_cast_fp16)[name = string("x_51_cast_fp16")]; string hidden_states_17_pad_type_0 = const()[name = string("hidden_states_17_pad_type_0"), val = string("valid")]; tensor hidden_states_17_strides_0 = const()[name = string("hidden_states_17_strides_0"), val = tensor([1, 1])]; tensor hidden_states_17_pad_0 = const()[name = string("hidden_states_17_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_17_dilations_0 = const()[name = string("hidden_states_17_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_17_groups_0 = const()[name = string("hidden_states_17_groups_0"), val = int32(1)]; tensor var_1100_to_fp16 = const()[name = string("op_1100_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(89692096)))]; tensor hidden_states_17_cast_fp16 = conv(dilations = hidden_states_17_dilations_0, groups = hidden_states_17_groups_0, pad = hidden_states_17_pad_0, pad_type = hidden_states_17_pad_type_0, strides = hidden_states_17_strides_0, weight = var_1100_to_fp16, x = x_51_cast_fp16)[name = string("hidden_states_17_cast_fp16")]; tensor x_53_cast_fp16 = add(x = x_43_cast_fp16, y = hidden_states_17_cast_fp16)[name = string("x_53_cast_fp16")]; int32 var_1131 = const()[name = string("op_1131"), val = int32(1)]; fp16 const_34_promoted_to_fp16 = const()[name = string("const_34_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_1134_cast_fp16 = mul(x = x_53_cast_fp16, y = const_34_promoted_to_fp16)[name = string("op_1134_cast_fp16")]; bool x_55_interleave_0 = const()[name = string("x_55_interleave_0"), val = bool(false)]; tensor x_55_cast_fp16 = concat(axis = var_1131, interleave = x_55_interleave_0, values = (x_53_cast_fp16, var_1134_cast_fp16))[name = string("x_55_cast_fp16")]; tensor out_37_axes_0 = const()[name = string("out_37_axes_0"), val = tensor([1])]; fp16 var_1144_to_fp16 = const()[name = string("op_1144_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_37_cast_fp16 = layer_norm(axes = out_37_axes_0, epsilon = var_1144_to_fp16, x = x_55_cast_fp16)[name = string("out_37_cast_fp16")]; tensor layer_layers_3_input_layernorm_weight_to_fp16 = const()[name = string("layer_layers_3_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(98080768)))]; tensor out_39_cast_fp16 = mul(x = out_37_cast_fp16, y = layer_layers_3_input_layernorm_weight_to_fp16)[name = string("out_39_cast_fp16")]; tensor var_1150_split_sizes_0 = const()[name = string("op_1150_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_1150_axis_0 = const()[name = string("op_1150_axis_0"), val = int32(1)]; tensor var_1150_cast_fp16_0, tensor var_1150_cast_fp16_1 = split(axis = var_1150_axis_0, split_sizes = var_1150_split_sizes_0, x = out_39_cast_fp16)[name = string("op_1150_cast_fp16")]; string query_states_13_pad_type_0 = const()[name = string("query_states_13_pad_type_0"), val = string("valid")]; tensor query_states_13_strides_0 = const()[name = string("query_states_13_strides_0"), val = tensor([1, 1])]; tensor query_states_13_pad_0 = const()[name = string("query_states_13_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_states_13_dilations_0 = const()[name = string("query_states_13_dilations_0"), val = tensor([1, 1])]; int32 query_states_13_groups_0 = const()[name = string("query_states_13_groups_0"), val = int32(1)]; tensor var_1172_to_fp16 = const()[name = string("op_1172_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(98084928)))]; tensor query_states_13_cast_fp16 = conv(dilations = query_states_13_dilations_0, groups = query_states_13_groups_0, pad = query_states_13_pad_0, pad_type = query_states_13_pad_type_0, strides = query_states_13_strides_0, weight = var_1172_to_fp16, x = var_1150_cast_fp16_0)[name = string("query_states_13_cast_fp16")]; string key_states_13_pad_type_0 = const()[name = string("key_states_13_pad_type_0"), val = string("valid")]; tensor key_states_13_strides_0 = const()[name = string("key_states_13_strides_0"), val = tensor([1, 1])]; tensor key_states_13_pad_0 = const()[name = string("key_states_13_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_states_13_dilations_0 = const()[name = string("key_states_13_dilations_0"), val = tensor([1, 1])]; int32 key_states_13_groups_0 = const()[name = string("key_states_13_groups_0"), val = int32(1)]; tensor var_1183_to_fp16 = const()[name = string("op_1183_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(100182144)))]; tensor key_states_13_cast_fp16 = conv(dilations = key_states_13_dilations_0, groups = key_states_13_groups_0, pad = key_states_13_pad_0, pad_type = key_states_13_pad_type_0, strides = key_states_13_strides_0, weight = var_1183_to_fp16, x = var_1150_cast_fp16_0)[name = string("key_states_13_cast_fp16")]; string value_states_13_pad_type_0 = const()[name = string("value_states_13_pad_type_0"), val = string("valid")]; tensor value_states_13_strides_0 = const()[name = string("value_states_13_strides_0"), val = tensor([1, 1])]; tensor value_states_13_pad_0 = const()[name = string("value_states_13_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_states_13_dilations_0 = const()[name = string("value_states_13_dilations_0"), val = tensor([1, 1])]; int32 value_states_13_groups_0 = const()[name = string("value_states_13_groups_0"), val = int32(1)]; tensor var_1194_to_fp16 = const()[name = string("op_1194_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(100444352)))]; tensor value_states_13_cast_fp16 = conv(dilations = value_states_13_dilations_0, groups = value_states_13_groups_0, pad = value_states_13_pad_0, pad_type = value_states_13_pad_type_0, strides = value_states_13_strides_0, weight = var_1194_to_fp16, x = var_1150_cast_fp16_0)[name = string("value_states_13_cast_fp16")]; tensor var_1202 = const()[name = string("op_1202"), val = tensor([1, 16, 64, 16])]; tensor embed_13_cast_fp16 = reshape(shape = var_1202, x = query_states_13_cast_fp16)[name = string("embed_13_cast_fp16")]; tensor var_1206 = const()[name = string("op_1206"), val = tensor([1, 2, 64, 16])]; tensor var_1207_cast_fp16 = reshape(shape = var_1206, x = key_states_13_cast_fp16)[name = string("op_1207_cast_fp16")]; tensor embed_15_perm_0 = const()[name = string("embed_15_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_1213 = const()[name = string("op_1213"), val = tensor([1, 2, 64, 16])]; tensor var_1214_cast_fp16 = reshape(shape = var_1213, x = value_states_13_cast_fp16)[name = string("op_1214_cast_fp16")]; tensor value_states_15_perm_0 = const()[name = string("value_states_15_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_1218_cast_fp16 = mul(x = embed_13_cast_fp16, y = cos_1_cast_fp16)[name = string("op_1218_cast_fp16")]; tensor var_1219_split_sizes_0 = const()[name = string("op_1219_split_sizes_0"), val = tensor([32, 32])]; int32 var_1219_axis_0 = const()[name = string("op_1219_axis_0"), val = int32(-2)]; tensor var_1219_cast_fp16_0, tensor var_1219_cast_fp16_1 = split(axis = var_1219_axis_0, split_sizes = var_1219_split_sizes_0, x = embed_13_cast_fp16)[name = string("op_1219_cast_fp16")]; fp16 const_37_promoted_to_fp16 = const()[name = string("const_37_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_1221_cast_fp16 = mul(x = var_1219_cast_fp16_1, y = const_37_promoted_to_fp16)[name = string("op_1221_cast_fp16")]; int32 var_1223 = const()[name = string("op_1223"), val = int32(-2)]; bool var_1224_interleave_0 = const()[name = string("op_1224_interleave_0"), val = bool(false)]; tensor var_1224_cast_fp16 = concat(axis = var_1223, interleave = var_1224_interleave_0, values = (var_1221_cast_fp16, var_1219_cast_fp16_0))[name = string("op_1224_cast_fp16")]; tensor var_1225_cast_fp16 = mul(x = var_1224_cast_fp16, y = sin_1_cast_fp16)[name = string("op_1225_cast_fp16")]; tensor query_states_15_cast_fp16 = add(x = var_1218_cast_fp16, y = var_1225_cast_fp16)[name = string("query_states_15_cast_fp16")]; tensor embed_15_cast_fp16 = transpose(perm = embed_15_perm_0, x = var_1207_cast_fp16)[name = string("transpose_62")]; tensor var_1228_cast_fp16 = mul(x = embed_15_cast_fp16, y = cos_cast_fp16)[name = string("op_1228_cast_fp16")]; tensor var_1229_split_sizes_0 = const()[name = string("op_1229_split_sizes_0"), val = tensor([32, 32])]; int32 var_1229_axis_0 = const()[name = string("op_1229_axis_0"), val = int32(-1)]; tensor var_1229_cast_fp16_0, tensor var_1229_cast_fp16_1 = split(axis = var_1229_axis_0, split_sizes = var_1229_split_sizes_0, x = embed_15_cast_fp16)[name = string("op_1229_cast_fp16")]; fp16 const_38_promoted_to_fp16 = const()[name = string("const_38_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_1231_cast_fp16 = mul(x = var_1229_cast_fp16_1, y = const_38_promoted_to_fp16)[name = string("op_1231_cast_fp16")]; int32 var_1233 = const()[name = string("op_1233"), val = int32(-1)]; bool var_1234_interleave_0 = const()[name = string("op_1234_interleave_0"), val = bool(false)]; tensor var_1234_cast_fp16 = concat(axis = var_1233, interleave = var_1234_interleave_0, values = (var_1231_cast_fp16, var_1229_cast_fp16_0))[name = string("op_1234_cast_fp16")]; tensor var_1235_cast_fp16 = mul(x = var_1234_cast_fp16, y = sin_cast_fp16)[name = string("op_1235_cast_fp16")]; tensor key_states_15_cast_fp16 = add(x = var_1228_cast_fp16, y = var_1235_cast_fp16)[name = string("key_states_15_cast_fp16")]; tensor expand_dims_31 = const()[name = string("expand_dims_31"), val = tensor([3])]; tensor expand_dims_32 = const()[name = string("expand_dims_32"), val = tensor([0])]; tensor expand_dims_34 = const()[name = string("expand_dims_34"), val = tensor([4])]; tensor concat_27_values3_0 = const()[name = string("concat_27_values3_0"), val = tensor([0])]; int32 concat_27_axis_0 = const()[name = string("concat_27_axis_0"), val = int32(0)]; bool concat_27_interleave_0 = const()[name = string("concat_27_interleave_0"), val = bool(false)]; tensor concat_27 = concat(axis = concat_27_axis_0, interleave = concat_27_interleave_0, values = (expand_dims_31, expand_dims_32, position_id, concat_27_values3_0))[name = string("concat_27")]; tensor concat_28_values1_0 = const()[name = string("concat_28_values1_0"), val = tensor([0])]; tensor concat_28_values3_0 = const()[name = string("concat_28_values3_0"), val = tensor([0])]; int32 concat_28_axis_0 = const()[name = string("concat_28_axis_0"), val = int32(0)]; bool concat_28_interleave_0 = const()[name = string("concat_28_interleave_0"), val = bool(false)]; tensor concat_28 = concat(axis = concat_28_axis_0, interleave = concat_28_interleave_0, values = (expand_dims_34, concat_28_values1_0, var_426, concat_28_values3_0))[name = string("concat_28")]; tensor key_cache_internal_tensor_assign_4_stride_0 = const()[name = string("key_cache_internal_tensor_assign_4_stride_0"), val = tensor([1, 1, 1, 1])]; tensor key_cache_internal_tensor_assign_4_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_4_begin_mask_0"), val = tensor([false, false, false, true])]; tensor key_cache_internal_tensor_assign_4_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_4_end_mask_0"), val = tensor([false, true, false, true])]; tensor key_cache_internal_tensor_assign_4_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_4_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor key_cache_internal_tensor_assign_4_cast_fp16 = slice_update(begin = concat_27, begin_mask = key_cache_internal_tensor_assign_4_begin_mask_0, end = concat_28, end_mask = key_cache_internal_tensor_assign_4_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_4_squeeze_mask_0, stride = key_cache_internal_tensor_assign_4_stride_0, update = key_states_15_cast_fp16, x = coreml_update_state_52)[name = string("key_cache_internal_tensor_assign_4_cast_fp16")]; write_state(data = key_cache_internal_tensor_assign_4_cast_fp16, input = key_cache)[name = string("coreml_update_state_268_write_state")]; tensor coreml_update_state_54 = read_state(input = key_cache)[name = string("coreml_update_state_268")]; tensor value_cache_internal_tensor_assign_4_stride_0 = const()[name = string("value_cache_internal_tensor_assign_4_stride_0"), val = tensor([1, 1, 1, 1])]; tensor value_cache_internal_tensor_assign_4_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_4_begin_mask_0"), val = tensor([false, false, false, true])]; tensor value_cache_internal_tensor_assign_4_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_4_end_mask_0"), val = tensor([false, true, false, true])]; tensor value_cache_internal_tensor_assign_4_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_4_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor value_states_15_cast_fp16 = transpose(perm = value_states_15_perm_0, x = var_1214_cast_fp16)[name = string("transpose_61")]; tensor value_cache_internal_tensor_assign_4_cast_fp16 = slice_update(begin = concat_27, begin_mask = value_cache_internal_tensor_assign_4_begin_mask_0, end = concat_28, end_mask = value_cache_internal_tensor_assign_4_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_4_squeeze_mask_0, stride = value_cache_internal_tensor_assign_4_stride_0, update = value_states_15_cast_fp16, x = coreml_update_state_53)[name = string("value_cache_internal_tensor_assign_4_cast_fp16")]; write_state(data = value_cache_internal_tensor_assign_4_cast_fp16, input = value_cache)[name = string("coreml_update_state_269_write_state")]; tensor coreml_update_state_55 = read_state(input = value_cache)[name = string("coreml_update_state_269")]; tensor var_1278_begin_0 = const()[name = string("op_1278_begin_0"), val = tensor([3, 0, 0, 0])]; tensor var_1278_end_0 = const()[name = string("op_1278_end_0"), val = tensor([4, 2, 2048, 64])]; tensor var_1278_end_mask_0 = const()[name = string("op_1278_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_1278_cast_fp16 = slice_by_index(begin = var_1278_begin_0, end = var_1278_end_0, end_mask = var_1278_end_mask_0, x = coreml_update_state_54)[name = string("op_1278_cast_fp16")]; tensor tile_6 = const()[name = string("tile_6"), val = tensor([1, 1])]; int32 var_1281_axis_0 = const()[name = string("op_1281_axis_0"), val = int32(1)]; tensor var_1281_cast_fp16_0, tensor var_1281_cast_fp16_1 = split(axis = var_1281_axis_0, split_sizes = tile_6, x = var_1278_cast_fp16)[name = string("op_1281_cast_fp16")]; tensor var_1288_begin_0 = const()[name = string("op_1288_begin_0"), val = tensor([3, 0, 0, 0])]; tensor var_1288_end_0 = const()[name = string("op_1288_end_0"), val = tensor([4, 2, 2048, 64])]; tensor var_1288_end_mask_0 = const()[name = string("op_1288_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_1288_cast_fp16 = slice_by_index(begin = var_1288_begin_0, end = var_1288_end_0, end_mask = var_1288_end_mask_0, x = coreml_update_state_55)[name = string("op_1288_cast_fp16")]; tensor tile_7 = const()[name = string("tile_7"), val = tensor([1, 1])]; int32 var_1291_axis_0 = const()[name = string("op_1291_axis_0"), val = int32(1)]; tensor var_1291_cast_fp16_0, tensor var_1291_cast_fp16_1 = split(axis = var_1291_axis_0, split_sizes = tile_7, x = var_1288_cast_fp16)[name = string("op_1291_cast_fp16")]; tensor var_1294_split_sizes_0 = const()[name = string("op_1294_split_sizes_0"), val = tensor([8, 8])]; int32 var_1294_axis_0 = const()[name = string("op_1294_axis_0"), val = int32(1)]; tensor var_1294_cast_fp16_0, tensor var_1294_cast_fp16_1 = split(axis = var_1294_axis_0, split_sizes = var_1294_split_sizes_0, x = query_states_15_cast_fp16)[name = string("op_1294_cast_fp16")]; bool attn_weights_49_transpose_x_0 = const()[name = string("attn_weights_49_transpose_x_0"), val = bool(false)]; bool attn_weights_49_transpose_y_0 = const()[name = string("attn_weights_49_transpose_y_0"), val = bool(false)]; tensor attn_weights_49_cast_fp16 = matmul(transpose_x = attn_weights_49_transpose_x_0, transpose_y = attn_weights_49_transpose_y_0, x = var_1281_cast_fp16_0, y = var_1294_cast_fp16_0)[name = string("attn_weights_49_cast_fp16")]; fp16 _inversed_attn_weights_51_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_51_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_51_cast_fp16 = mul(x = attn_weights_49_cast_fp16, y = _inversed_attn_weights_51_y_0_to_fp16)[name = string("_inversed_attn_weights_51_cast_fp16")]; tensor attn_weights_53_cast_fp16 = add(x = _inversed_attn_weights_51_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_53_cast_fp16")]; int32 var_1301 = const()[name = string("op_1301"), val = int32(2)]; tensor attn_weights_55_cast_fp16 = softmax(axis = var_1301, x = attn_weights_53_cast_fp16)[name = string("attn_weights_55_cast_fp16")]; bool var_1307_transpose_x_1 = const()[name = string("op_1307_transpose_x_1"), val = bool(true)]; bool var_1307_transpose_y_1 = const()[name = string("op_1307_transpose_y_1"), val = bool(false)]; tensor var_1307_cast_fp16 = matmul(transpose_x = var_1307_transpose_x_1, transpose_y = var_1307_transpose_y_1, x = attn_weights_55_cast_fp16, y = var_1291_cast_fp16_0)[name = string("op_1307_cast_fp16")]; bool attn_weights_57_transpose_x_0 = const()[name = string("attn_weights_57_transpose_x_0"), val = bool(false)]; bool attn_weights_57_transpose_y_0 = const()[name = string("attn_weights_57_transpose_y_0"), val = bool(false)]; tensor attn_weights_57_cast_fp16 = matmul(transpose_x = attn_weights_57_transpose_x_0, transpose_y = attn_weights_57_transpose_y_0, x = var_1281_cast_fp16_1, y = var_1294_cast_fp16_1)[name = string("attn_weights_57_cast_fp16")]; fp16 _inversed_attn_weights_59_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_59_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_59_cast_fp16 = mul(x = attn_weights_57_cast_fp16, y = _inversed_attn_weights_59_y_0_to_fp16)[name = string("_inversed_attn_weights_59_cast_fp16")]; tensor attn_weights_61_cast_fp16 = add(x = _inversed_attn_weights_59_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_61_cast_fp16")]; int32 var_1313 = const()[name = string("op_1313"), val = int32(2)]; tensor attn_weights_63_cast_fp16 = softmax(axis = var_1313, x = attn_weights_61_cast_fp16)[name = string("attn_weights_63_cast_fp16")]; bool attn_output_19_transpose_x_1 = const()[name = string("attn_output_19_transpose_x_1"), val = bool(true)]; bool attn_output_19_transpose_y_1 = const()[name = string("attn_output_19_transpose_y_1"), val = bool(false)]; tensor attn_output_19_cast_fp16 = matmul(transpose_x = attn_output_19_transpose_x_1, transpose_y = attn_output_19_transpose_y_1, x = attn_weights_63_cast_fp16, y = var_1291_cast_fp16_1)[name = string("attn_output_19_cast_fp16")]; int32 var_1321 = const()[name = string("op_1321"), val = int32(1)]; bool attn_output_21_interleave_0 = const()[name = string("attn_output_21_interleave_0"), val = bool(false)]; tensor attn_output_21_cast_fp16 = concat(axis = var_1321, interleave = attn_output_21_interleave_0, values = (var_1307_cast_fp16, attn_output_19_cast_fp16))[name = string("attn_output_21_cast_fp16")]; tensor var_1325_perm_0 = const()[name = string("op_1325_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_1330 = const()[name = string("op_1330"), val = tensor([1, 1024, 1, 16])]; tensor var_1325_cast_fp16 = transpose(perm = var_1325_perm_0, x = attn_output_21_cast_fp16)[name = string("transpose_60")]; tensor x_59_cast_fp16 = reshape(shape = var_1330, x = var_1325_cast_fp16)[name = string("x_59_cast_fp16")]; string hidden_states_21_pad_type_0 = const()[name = string("hidden_states_21_pad_type_0"), val = string("valid")]; tensor hidden_states_21_strides_0 = const()[name = string("hidden_states_21_strides_0"), val = tensor([1, 1])]; tensor hidden_states_21_pad_0 = const()[name = string("hidden_states_21_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_21_dilations_0 = const()[name = string("hidden_states_21_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_21_groups_0 = const()[name = string("hidden_states_21_groups_0"), val = int32(1)]; tensor var_1337_to_fp16 = const()[name = string("op_1337_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(100706560)))]; tensor hidden_states_21_cast_fp16 = conv(dilations = hidden_states_21_dilations_0, groups = hidden_states_21_groups_0, pad = hidden_states_21_pad_0, pad_type = hidden_states_21_pad_type_0, strides = hidden_states_21_strides_0, weight = var_1337_to_fp16, x = x_59_cast_fp16)[name = string("hidden_states_21_cast_fp16")]; tensor x_61_cast_fp16 = add(x = x_53_cast_fp16, y = hidden_states_21_cast_fp16)[name = string("x_61_cast_fp16")]; int32 var_1349 = const()[name = string("op_1349"), val = int32(1)]; fp16 const_43_promoted_to_fp16 = const()[name = string("const_43_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_1352_cast_fp16 = mul(x = x_61_cast_fp16, y = const_43_promoted_to_fp16)[name = string("op_1352_cast_fp16")]; bool x_63_interleave_0 = const()[name = string("x_63_interleave_0"), val = bool(false)]; tensor x_63_cast_fp16 = concat(axis = var_1349, interleave = x_63_interleave_0, values = (x_61_cast_fp16, var_1352_cast_fp16))[name = string("x_63_cast_fp16")]; tensor out_43_axes_0 = const()[name = string("out_43_axes_0"), val = tensor([1])]; fp16 var_1362_to_fp16 = const()[name = string("op_1362_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_43_cast_fp16 = layer_norm(axes = out_43_axes_0, epsilon = var_1362_to_fp16, x = x_63_cast_fp16)[name = string("out_43_cast_fp16")]; tensor layer_layers_3_post_attention_layernorm_weight_to_fp16 = const()[name = string("layer_layers_3_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(102803776)))]; tensor out_45_cast_fp16 = mul(x = out_43_cast_fp16, y = layer_layers_3_post_attention_layernorm_weight_to_fp16)[name = string("out_45_cast_fp16")]; tensor var_1368_split_sizes_0 = const()[name = string("op_1368_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_1368_axis_0 = const()[name = string("op_1368_axis_0"), val = int32(1)]; tensor var_1368_cast_fp16_0, tensor var_1368_cast_fp16_1 = split(axis = var_1368_axis_0, split_sizes = var_1368_split_sizes_0, x = out_45_cast_fp16)[name = string("op_1368_cast_fp16")]; string input_7_pad_type_0 = const()[name = string("input_7_pad_type_0"), val = string("valid")]; tensor input_7_strides_0 = const()[name = string("input_7_strides_0"), val = tensor([1, 1])]; tensor input_7_pad_0 = const()[name = string("input_7_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_7_dilations_0 = const()[name = string("input_7_dilations_0"), val = tensor([1, 1])]; int32 input_7_groups_0 = const()[name = string("input_7_groups_0"), val = int32(1)]; tensor var_1373_to_fp16 = const()[name = string("op_1373_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(102807936)))]; tensor input_7_cast_fp16 = conv(dilations = input_7_dilations_0, groups = input_7_groups_0, pad = input_7_pad_0, pad_type = input_7_pad_type_0, strides = input_7_strides_0, weight = var_1373_to_fp16, x = var_1368_cast_fp16_0)[name = string("input_7_cast_fp16")]; tensor var_1384_cast_fp16 = silu(x = input_7_cast_fp16)[name = string("op_1384_cast_fp16")]; string var_1389_pad_type_0 = const()[name = string("op_1389_pad_type_0"), val = string("valid")]; tensor var_1389_strides_0 = const()[name = string("op_1389_strides_0"), val = tensor([1, 1])]; tensor var_1389_pad_0 = const()[name = string("op_1389_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_1389_dilations_0 = const()[name = string("op_1389_dilations_0"), val = tensor([1, 1])]; int32 var_1389_groups_0 = const()[name = string("op_1389_groups_0"), val = int32(1)]; tensor var_1372_to_fp16 = const()[name = string("op_1372_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(111196608)))]; tensor var_1389_cast_fp16 = conv(dilations = var_1389_dilations_0, groups = var_1389_groups_0, pad = var_1389_pad_0, pad_type = var_1389_pad_type_0, strides = var_1389_strides_0, weight = var_1372_to_fp16, x = var_1368_cast_fp16_0)[name = string("op_1389_cast_fp16")]; tensor x_69_cast_fp16 = mul(x = var_1384_cast_fp16, y = var_1389_cast_fp16)[name = string("x_69_cast_fp16")]; string hidden_states_23_pad_type_0 = const()[name = string("hidden_states_23_pad_type_0"), val = string("valid")]; tensor hidden_states_23_strides_0 = const()[name = string("hidden_states_23_strides_0"), val = tensor([1, 1])]; tensor hidden_states_23_pad_0 = const()[name = string("hidden_states_23_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_23_dilations_0 = const()[name = string("hidden_states_23_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_23_groups_0 = const()[name = string("hidden_states_23_groups_0"), val = int32(1)]; tensor var_1371_to_fp16 = const()[name = string("op_1371_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(119585280)))]; tensor hidden_states_23_cast_fp16 = conv(dilations = hidden_states_23_dilations_0, groups = hidden_states_23_groups_0, pad = hidden_states_23_pad_0, pad_type = hidden_states_23_pad_type_0, strides = hidden_states_23_strides_0, weight = var_1371_to_fp16, x = x_69_cast_fp16)[name = string("hidden_states_23_cast_fp16")]; tensor x_71_cast_fp16 = add(x = x_61_cast_fp16, y = hidden_states_23_cast_fp16)[name = string("x_71_cast_fp16")]; int32 var_1402 = const()[name = string("op_1402"), val = int32(1)]; fp16 const_44_promoted_to_fp16 = const()[name = string("const_44_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_1405_cast_fp16 = mul(x = x_71_cast_fp16, y = const_44_promoted_to_fp16)[name = string("op_1405_cast_fp16")]; bool x_73_interleave_0 = const()[name = string("x_73_interleave_0"), val = bool(false)]; tensor x_73_cast_fp16 = concat(axis = var_1402, interleave = x_73_interleave_0, values = (x_71_cast_fp16, var_1405_cast_fp16))[name = string("x_73_cast_fp16")]; tensor out_49_axes_0 = const()[name = string("out_49_axes_0"), val = tensor([1])]; fp16 var_1415_to_fp16 = const()[name = string("op_1415_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_49_cast_fp16 = layer_norm(axes = out_49_axes_0, epsilon = var_1415_to_fp16, x = x_73_cast_fp16)[name = string("out_49_cast_fp16")]; tensor layer_layers_4_input_layernorm_weight_to_fp16 = const()[name = string("layer_layers_4_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(127973952)))]; tensor out_51_cast_fp16 = mul(x = out_49_cast_fp16, y = layer_layers_4_input_layernorm_weight_to_fp16)[name = string("out_51_cast_fp16")]; tensor var_1421_split_sizes_0 = const()[name = string("op_1421_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_1421_axis_0 = const()[name = string("op_1421_axis_0"), val = int32(1)]; tensor var_1421_cast_fp16_0, tensor var_1421_cast_fp16_1 = split(axis = var_1421_axis_0, split_sizes = var_1421_split_sizes_0, x = out_51_cast_fp16)[name = string("op_1421_cast_fp16")]; string query_states_17_pad_type_0 = const()[name = string("query_states_17_pad_type_0"), val = string("valid")]; tensor query_states_17_strides_0 = const()[name = string("query_states_17_strides_0"), val = tensor([1, 1])]; tensor query_states_17_pad_0 = const()[name = string("query_states_17_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_states_17_dilations_0 = const()[name = string("query_states_17_dilations_0"), val = tensor([1, 1])]; int32 query_states_17_groups_0 = const()[name = string("query_states_17_groups_0"), val = int32(1)]; tensor var_1443_to_fp16 = const()[name = string("op_1443_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(127978112)))]; tensor query_states_17_cast_fp16 = conv(dilations = query_states_17_dilations_0, groups = query_states_17_groups_0, pad = query_states_17_pad_0, pad_type = query_states_17_pad_type_0, strides = query_states_17_strides_0, weight = var_1443_to_fp16, x = var_1421_cast_fp16_0)[name = string("query_states_17_cast_fp16")]; string key_states_17_pad_type_0 = const()[name = string("key_states_17_pad_type_0"), val = string("valid")]; tensor key_states_17_strides_0 = const()[name = string("key_states_17_strides_0"), val = tensor([1, 1])]; tensor key_states_17_pad_0 = const()[name = string("key_states_17_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_states_17_dilations_0 = const()[name = string("key_states_17_dilations_0"), val = tensor([1, 1])]; int32 key_states_17_groups_0 = const()[name = string("key_states_17_groups_0"), val = int32(1)]; tensor var_1454_to_fp16 = const()[name = string("op_1454_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(130075328)))]; tensor key_states_17_cast_fp16 = conv(dilations = key_states_17_dilations_0, groups = key_states_17_groups_0, pad = key_states_17_pad_0, pad_type = key_states_17_pad_type_0, strides = key_states_17_strides_0, weight = var_1454_to_fp16, x = var_1421_cast_fp16_0)[name = string("key_states_17_cast_fp16")]; string value_states_17_pad_type_0 = const()[name = string("value_states_17_pad_type_0"), val = string("valid")]; tensor value_states_17_strides_0 = const()[name = string("value_states_17_strides_0"), val = tensor([1, 1])]; tensor value_states_17_pad_0 = const()[name = string("value_states_17_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_states_17_dilations_0 = const()[name = string("value_states_17_dilations_0"), val = tensor([1, 1])]; int32 value_states_17_groups_0 = const()[name = string("value_states_17_groups_0"), val = int32(1)]; tensor var_1465_to_fp16 = const()[name = string("op_1465_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(130337536)))]; tensor value_states_17_cast_fp16 = conv(dilations = value_states_17_dilations_0, groups = value_states_17_groups_0, pad = value_states_17_pad_0, pad_type = value_states_17_pad_type_0, strides = value_states_17_strides_0, weight = var_1465_to_fp16, x = var_1421_cast_fp16_0)[name = string("value_states_17_cast_fp16")]; tensor var_1473 = const()[name = string("op_1473"), val = tensor([1, 16, 64, 16])]; tensor embed_17_cast_fp16 = reshape(shape = var_1473, x = query_states_17_cast_fp16)[name = string("embed_17_cast_fp16")]; tensor var_1477 = const()[name = string("op_1477"), val = tensor([1, 2, 64, 16])]; tensor var_1478_cast_fp16 = reshape(shape = var_1477, x = key_states_17_cast_fp16)[name = string("op_1478_cast_fp16")]; tensor embed_19_perm_0 = const()[name = string("embed_19_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_1484 = const()[name = string("op_1484"), val = tensor([1, 2, 64, 16])]; tensor var_1485_cast_fp16 = reshape(shape = var_1484, x = value_states_17_cast_fp16)[name = string("op_1485_cast_fp16")]; tensor value_states_19_perm_0 = const()[name = string("value_states_19_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_1489_cast_fp16 = mul(x = embed_17_cast_fp16, y = cos_1_cast_fp16)[name = string("op_1489_cast_fp16")]; tensor var_1490_split_sizes_0 = const()[name = string("op_1490_split_sizes_0"), val = tensor([32, 32])]; int32 var_1490_axis_0 = const()[name = string("op_1490_axis_0"), val = int32(-2)]; tensor var_1490_cast_fp16_0, tensor var_1490_cast_fp16_1 = split(axis = var_1490_axis_0, split_sizes = var_1490_split_sizes_0, x = embed_17_cast_fp16)[name = string("op_1490_cast_fp16")]; fp16 const_47_promoted_to_fp16 = const()[name = string("const_47_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_1492_cast_fp16 = mul(x = var_1490_cast_fp16_1, y = const_47_promoted_to_fp16)[name = string("op_1492_cast_fp16")]; int32 var_1494 = const()[name = string("op_1494"), val = int32(-2)]; bool var_1495_interleave_0 = const()[name = string("op_1495_interleave_0"), val = bool(false)]; tensor var_1495_cast_fp16 = concat(axis = var_1494, interleave = var_1495_interleave_0, values = (var_1492_cast_fp16, var_1490_cast_fp16_0))[name = string("op_1495_cast_fp16")]; tensor var_1496_cast_fp16 = mul(x = var_1495_cast_fp16, y = sin_1_cast_fp16)[name = string("op_1496_cast_fp16")]; tensor query_states_19_cast_fp16 = add(x = var_1489_cast_fp16, y = var_1496_cast_fp16)[name = string("query_states_19_cast_fp16")]; tensor embed_19_cast_fp16 = transpose(perm = embed_19_perm_0, x = var_1478_cast_fp16)[name = string("transpose_59")]; tensor var_1499_cast_fp16 = mul(x = embed_19_cast_fp16, y = cos_cast_fp16)[name = string("op_1499_cast_fp16")]; tensor var_1500_split_sizes_0 = const()[name = string("op_1500_split_sizes_0"), val = tensor([32, 32])]; int32 var_1500_axis_0 = const()[name = string("op_1500_axis_0"), val = int32(-1)]; tensor var_1500_cast_fp16_0, tensor var_1500_cast_fp16_1 = split(axis = var_1500_axis_0, split_sizes = var_1500_split_sizes_0, x = embed_19_cast_fp16)[name = string("op_1500_cast_fp16")]; fp16 const_48_promoted_to_fp16 = const()[name = string("const_48_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_1502_cast_fp16 = mul(x = var_1500_cast_fp16_1, y = const_48_promoted_to_fp16)[name = string("op_1502_cast_fp16")]; int32 var_1504 = const()[name = string("op_1504"), val = int32(-1)]; bool var_1505_interleave_0 = const()[name = string("op_1505_interleave_0"), val = bool(false)]; tensor var_1505_cast_fp16 = concat(axis = var_1504, interleave = var_1505_interleave_0, values = (var_1502_cast_fp16, var_1500_cast_fp16_0))[name = string("op_1505_cast_fp16")]; tensor var_1506_cast_fp16 = mul(x = var_1505_cast_fp16, y = sin_cast_fp16)[name = string("op_1506_cast_fp16")]; tensor key_states_19_cast_fp16 = add(x = var_1499_cast_fp16, y = var_1506_cast_fp16)[name = string("key_states_19_cast_fp16")]; tensor expand_dims_41 = const()[name = string("expand_dims_41"), val = tensor([4])]; tensor expand_dims_42 = const()[name = string("expand_dims_42"), val = tensor([0])]; tensor expand_dims_44 = const()[name = string("expand_dims_44"), val = tensor([5])]; tensor concat_35_values3_0 = const()[name = string("concat_35_values3_0"), val = tensor([0])]; int32 concat_35_axis_0 = const()[name = string("concat_35_axis_0"), val = int32(0)]; bool concat_35_interleave_0 = const()[name = string("concat_35_interleave_0"), val = bool(false)]; tensor concat_35 = concat(axis = concat_35_axis_0, interleave = concat_35_interleave_0, values = (expand_dims_41, expand_dims_42, position_id, concat_35_values3_0))[name = string("concat_35")]; tensor concat_36_values1_0 = const()[name = string("concat_36_values1_0"), val = tensor([0])]; tensor concat_36_values3_0 = const()[name = string("concat_36_values3_0"), val = tensor([0])]; int32 concat_36_axis_0 = const()[name = string("concat_36_axis_0"), val = int32(0)]; bool concat_36_interleave_0 = const()[name = string("concat_36_interleave_0"), val = bool(false)]; tensor concat_36 = concat(axis = concat_36_axis_0, interleave = concat_36_interleave_0, values = (expand_dims_44, concat_36_values1_0, var_426, concat_36_values3_0))[name = string("concat_36")]; tensor key_cache_internal_tensor_assign_5_stride_0 = const()[name = string("key_cache_internal_tensor_assign_5_stride_0"), val = tensor([1, 1, 1, 1])]; tensor key_cache_internal_tensor_assign_5_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_5_begin_mask_0"), val = tensor([false, false, false, true])]; tensor key_cache_internal_tensor_assign_5_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_5_end_mask_0"), val = tensor([false, true, false, true])]; tensor key_cache_internal_tensor_assign_5_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_5_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor key_cache_internal_tensor_assign_5_cast_fp16 = slice_update(begin = concat_35, begin_mask = key_cache_internal_tensor_assign_5_begin_mask_0, end = concat_36, end_mask = key_cache_internal_tensor_assign_5_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_5_squeeze_mask_0, stride = key_cache_internal_tensor_assign_5_stride_0, update = key_states_19_cast_fp16, x = coreml_update_state_54)[name = string("key_cache_internal_tensor_assign_5_cast_fp16")]; write_state(data = key_cache_internal_tensor_assign_5_cast_fp16, input = key_cache)[name = string("coreml_update_state_270_write_state")]; tensor coreml_update_state_56 = read_state(input = key_cache)[name = string("coreml_update_state_270")]; tensor value_cache_internal_tensor_assign_5_stride_0 = const()[name = string("value_cache_internal_tensor_assign_5_stride_0"), val = tensor([1, 1, 1, 1])]; tensor value_cache_internal_tensor_assign_5_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_5_begin_mask_0"), val = tensor([false, false, false, true])]; tensor value_cache_internal_tensor_assign_5_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_5_end_mask_0"), val = tensor([false, true, false, true])]; tensor value_cache_internal_tensor_assign_5_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_5_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor value_states_19_cast_fp16 = transpose(perm = value_states_19_perm_0, x = var_1485_cast_fp16)[name = string("transpose_58")]; tensor value_cache_internal_tensor_assign_5_cast_fp16 = slice_update(begin = concat_35, begin_mask = value_cache_internal_tensor_assign_5_begin_mask_0, end = concat_36, end_mask = value_cache_internal_tensor_assign_5_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_5_squeeze_mask_0, stride = value_cache_internal_tensor_assign_5_stride_0, update = value_states_19_cast_fp16, x = coreml_update_state_55)[name = string("value_cache_internal_tensor_assign_5_cast_fp16")]; write_state(data = value_cache_internal_tensor_assign_5_cast_fp16, input = value_cache)[name = string("coreml_update_state_271_write_state")]; tensor coreml_update_state_57 = read_state(input = value_cache)[name = string("coreml_update_state_271")]; tensor var_1549_begin_0 = const()[name = string("op_1549_begin_0"), val = tensor([4, 0, 0, 0])]; tensor var_1549_end_0 = const()[name = string("op_1549_end_0"), val = tensor([5, 2, 2048, 64])]; tensor var_1549_end_mask_0 = const()[name = string("op_1549_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_1549_cast_fp16 = slice_by_index(begin = var_1549_begin_0, end = var_1549_end_0, end_mask = var_1549_end_mask_0, x = coreml_update_state_56)[name = string("op_1549_cast_fp16")]; tensor tile_8 = const()[name = string("tile_8"), val = tensor([1, 1])]; int32 var_1552_axis_0 = const()[name = string("op_1552_axis_0"), val = int32(1)]; tensor var_1552_cast_fp16_0, tensor var_1552_cast_fp16_1 = split(axis = var_1552_axis_0, split_sizes = tile_8, x = var_1549_cast_fp16)[name = string("op_1552_cast_fp16")]; tensor var_1559_begin_0 = const()[name = string("op_1559_begin_0"), val = tensor([4, 0, 0, 0])]; tensor var_1559_end_0 = const()[name = string("op_1559_end_0"), val = tensor([5, 2, 2048, 64])]; tensor var_1559_end_mask_0 = const()[name = string("op_1559_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_1559_cast_fp16 = slice_by_index(begin = var_1559_begin_0, end = var_1559_end_0, end_mask = var_1559_end_mask_0, x = coreml_update_state_57)[name = string("op_1559_cast_fp16")]; tensor tile_9 = const()[name = string("tile_9"), val = tensor([1, 1])]; int32 var_1562_axis_0 = const()[name = string("op_1562_axis_0"), val = int32(1)]; tensor var_1562_cast_fp16_0, tensor var_1562_cast_fp16_1 = split(axis = var_1562_axis_0, split_sizes = tile_9, x = var_1559_cast_fp16)[name = string("op_1562_cast_fp16")]; tensor var_1565_split_sizes_0 = const()[name = string("op_1565_split_sizes_0"), val = tensor([8, 8])]; int32 var_1565_axis_0 = const()[name = string("op_1565_axis_0"), val = int32(1)]; tensor var_1565_cast_fp16_0, tensor var_1565_cast_fp16_1 = split(axis = var_1565_axis_0, split_sizes = var_1565_split_sizes_0, x = query_states_19_cast_fp16)[name = string("op_1565_cast_fp16")]; bool attn_weights_65_transpose_x_0 = const()[name = string("attn_weights_65_transpose_x_0"), val = bool(false)]; bool attn_weights_65_transpose_y_0 = const()[name = string("attn_weights_65_transpose_y_0"), val = bool(false)]; tensor attn_weights_65_cast_fp16 = matmul(transpose_x = attn_weights_65_transpose_x_0, transpose_y = attn_weights_65_transpose_y_0, x = var_1552_cast_fp16_0, y = var_1565_cast_fp16_0)[name = string("attn_weights_65_cast_fp16")]; fp16 _inversed_attn_weights_67_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_67_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_67_cast_fp16 = mul(x = attn_weights_65_cast_fp16, y = _inversed_attn_weights_67_y_0_to_fp16)[name = string("_inversed_attn_weights_67_cast_fp16")]; tensor attn_weights_69_cast_fp16 = add(x = _inversed_attn_weights_67_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_69_cast_fp16")]; int32 var_1572 = const()[name = string("op_1572"), val = int32(2)]; tensor attn_weights_71_cast_fp16 = softmax(axis = var_1572, x = attn_weights_69_cast_fp16)[name = string("attn_weights_71_cast_fp16")]; bool var_1578_transpose_x_1 = const()[name = string("op_1578_transpose_x_1"), val = bool(true)]; bool var_1578_transpose_y_1 = const()[name = string("op_1578_transpose_y_1"), val = bool(false)]; tensor var_1578_cast_fp16 = matmul(transpose_x = var_1578_transpose_x_1, transpose_y = var_1578_transpose_y_1, x = attn_weights_71_cast_fp16, y = var_1562_cast_fp16_0)[name = string("op_1578_cast_fp16")]; bool attn_weights_73_transpose_x_0 = const()[name = string("attn_weights_73_transpose_x_0"), val = bool(false)]; bool attn_weights_73_transpose_y_0 = const()[name = string("attn_weights_73_transpose_y_0"), val = bool(false)]; tensor attn_weights_73_cast_fp16 = matmul(transpose_x = attn_weights_73_transpose_x_0, transpose_y = attn_weights_73_transpose_y_0, x = var_1552_cast_fp16_1, y = var_1565_cast_fp16_1)[name = string("attn_weights_73_cast_fp16")]; fp16 _inversed_attn_weights_75_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_75_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_75_cast_fp16 = mul(x = attn_weights_73_cast_fp16, y = _inversed_attn_weights_75_y_0_to_fp16)[name = string("_inversed_attn_weights_75_cast_fp16")]; tensor attn_weights_77_cast_fp16 = add(x = _inversed_attn_weights_75_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_77_cast_fp16")]; int32 var_1584 = const()[name = string("op_1584"), val = int32(2)]; tensor attn_weights_79_cast_fp16 = softmax(axis = var_1584, x = attn_weights_77_cast_fp16)[name = string("attn_weights_79_cast_fp16")]; bool attn_output_25_transpose_x_1 = const()[name = string("attn_output_25_transpose_x_1"), val = bool(true)]; bool attn_output_25_transpose_y_1 = const()[name = string("attn_output_25_transpose_y_1"), val = bool(false)]; tensor attn_output_25_cast_fp16 = matmul(transpose_x = attn_output_25_transpose_x_1, transpose_y = attn_output_25_transpose_y_1, x = attn_weights_79_cast_fp16, y = var_1562_cast_fp16_1)[name = string("attn_output_25_cast_fp16")]; int32 var_1592 = const()[name = string("op_1592"), val = int32(1)]; bool attn_output_27_interleave_0 = const()[name = string("attn_output_27_interleave_0"), val = bool(false)]; tensor attn_output_27_cast_fp16 = concat(axis = var_1592, interleave = attn_output_27_interleave_0, values = (var_1578_cast_fp16, attn_output_25_cast_fp16))[name = string("attn_output_27_cast_fp16")]; tensor var_1596_perm_0 = const()[name = string("op_1596_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_1601 = const()[name = string("op_1601"), val = tensor([1, 1024, 1, 16])]; tensor var_1596_cast_fp16 = transpose(perm = var_1596_perm_0, x = attn_output_27_cast_fp16)[name = string("transpose_57")]; tensor x_77_cast_fp16 = reshape(shape = var_1601, x = var_1596_cast_fp16)[name = string("x_77_cast_fp16")]; string hidden_states_27_pad_type_0 = const()[name = string("hidden_states_27_pad_type_0"), val = string("valid")]; tensor hidden_states_27_strides_0 = const()[name = string("hidden_states_27_strides_0"), val = tensor([1, 1])]; tensor hidden_states_27_pad_0 = const()[name = string("hidden_states_27_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_27_dilations_0 = const()[name = string("hidden_states_27_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_27_groups_0 = const()[name = string("hidden_states_27_groups_0"), val = int32(1)]; tensor var_1608_to_fp16 = const()[name = string("op_1608_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(130599744)))]; tensor hidden_states_27_cast_fp16 = conv(dilations = hidden_states_27_dilations_0, groups = hidden_states_27_groups_0, pad = hidden_states_27_pad_0, pad_type = hidden_states_27_pad_type_0, strides = hidden_states_27_strides_0, weight = var_1608_to_fp16, x = x_77_cast_fp16)[name = string("hidden_states_27_cast_fp16")]; tensor x_79_cast_fp16 = add(x = x_71_cast_fp16, y = hidden_states_27_cast_fp16)[name = string("x_79_cast_fp16")]; int32 var_1620 = const()[name = string("op_1620"), val = int32(1)]; fp16 const_53_promoted_to_fp16 = const()[name = string("const_53_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_1623_cast_fp16 = mul(x = x_79_cast_fp16, y = const_53_promoted_to_fp16)[name = string("op_1623_cast_fp16")]; bool x_81_interleave_0 = const()[name = string("x_81_interleave_0"), val = bool(false)]; tensor x_81_cast_fp16 = concat(axis = var_1620, interleave = x_81_interleave_0, values = (x_79_cast_fp16, var_1623_cast_fp16))[name = string("x_81_cast_fp16")]; tensor out_55_axes_0 = const()[name = string("out_55_axes_0"), val = tensor([1])]; fp16 var_1633_to_fp16 = const()[name = string("op_1633_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_55_cast_fp16 = layer_norm(axes = out_55_axes_0, epsilon = var_1633_to_fp16, x = x_81_cast_fp16)[name = string("out_55_cast_fp16")]; tensor layer_layers_4_post_attention_layernorm_weight_to_fp16 = const()[name = string("layer_layers_4_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(132696960)))]; tensor out_57_cast_fp16 = mul(x = out_55_cast_fp16, y = layer_layers_4_post_attention_layernorm_weight_to_fp16)[name = string("out_57_cast_fp16")]; tensor var_1639_split_sizes_0 = const()[name = string("op_1639_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_1639_axis_0 = const()[name = string("op_1639_axis_0"), val = int32(1)]; tensor var_1639_cast_fp16_0, tensor var_1639_cast_fp16_1 = split(axis = var_1639_axis_0, split_sizes = var_1639_split_sizes_0, x = out_57_cast_fp16)[name = string("op_1639_cast_fp16")]; string input_9_pad_type_0 = const()[name = string("input_9_pad_type_0"), val = string("valid")]; tensor input_9_strides_0 = const()[name = string("input_9_strides_0"), val = tensor([1, 1])]; tensor input_9_pad_0 = const()[name = string("input_9_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_9_dilations_0 = const()[name = string("input_9_dilations_0"), val = tensor([1, 1])]; int32 input_9_groups_0 = const()[name = string("input_9_groups_0"), val = int32(1)]; tensor var_1644_to_fp16 = const()[name = string("op_1644_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(132701120)))]; tensor input_9_cast_fp16 = conv(dilations = input_9_dilations_0, groups = input_9_groups_0, pad = input_9_pad_0, pad_type = input_9_pad_type_0, strides = input_9_strides_0, weight = var_1644_to_fp16, x = var_1639_cast_fp16_0)[name = string("input_9_cast_fp16")]; tensor var_1655_cast_fp16 = silu(x = input_9_cast_fp16)[name = string("op_1655_cast_fp16")]; string var_1660_pad_type_0 = const()[name = string("op_1660_pad_type_0"), val = string("valid")]; tensor var_1660_strides_0 = const()[name = string("op_1660_strides_0"), val = tensor([1, 1])]; tensor var_1660_pad_0 = const()[name = string("op_1660_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_1660_dilations_0 = const()[name = string("op_1660_dilations_0"), val = tensor([1, 1])]; int32 var_1660_groups_0 = const()[name = string("op_1660_groups_0"), val = int32(1)]; tensor var_1643_to_fp16 = const()[name = string("op_1643_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(141089792)))]; tensor var_1660_cast_fp16 = conv(dilations = var_1660_dilations_0, groups = var_1660_groups_0, pad = var_1660_pad_0, pad_type = var_1660_pad_type_0, strides = var_1660_strides_0, weight = var_1643_to_fp16, x = var_1639_cast_fp16_0)[name = string("op_1660_cast_fp16")]; tensor x_87_cast_fp16 = mul(x = var_1655_cast_fp16, y = var_1660_cast_fp16)[name = string("x_87_cast_fp16")]; string hidden_states_29_pad_type_0 = const()[name = string("hidden_states_29_pad_type_0"), val = string("valid")]; tensor hidden_states_29_strides_0 = const()[name = string("hidden_states_29_strides_0"), val = tensor([1, 1])]; tensor hidden_states_29_pad_0 = const()[name = string("hidden_states_29_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_29_dilations_0 = const()[name = string("hidden_states_29_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_29_groups_0 = const()[name = string("hidden_states_29_groups_0"), val = int32(1)]; tensor var_1642_to_fp16 = const()[name = string("op_1642_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(149478464)))]; tensor hidden_states_29_cast_fp16 = conv(dilations = hidden_states_29_dilations_0, groups = hidden_states_29_groups_0, pad = hidden_states_29_pad_0, pad_type = hidden_states_29_pad_type_0, strides = hidden_states_29_strides_0, weight = var_1642_to_fp16, x = x_87_cast_fp16)[name = string("hidden_states_29_cast_fp16")]; tensor x_89_cast_fp16 = add(x = x_79_cast_fp16, y = hidden_states_29_cast_fp16)[name = string("x_89_cast_fp16")]; int32 var_1673 = const()[name = string("op_1673"), val = int32(1)]; fp16 const_54_promoted_to_fp16 = const()[name = string("const_54_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_1676_cast_fp16 = mul(x = x_89_cast_fp16, y = const_54_promoted_to_fp16)[name = string("op_1676_cast_fp16")]; bool x_91_interleave_0 = const()[name = string("x_91_interleave_0"), val = bool(false)]; tensor x_91_cast_fp16 = concat(axis = var_1673, interleave = x_91_interleave_0, values = (x_89_cast_fp16, var_1676_cast_fp16))[name = string("x_91_cast_fp16")]; tensor out_61_axes_0 = const()[name = string("out_61_axes_0"), val = tensor([1])]; fp16 var_1686_to_fp16 = const()[name = string("op_1686_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_61_cast_fp16 = layer_norm(axes = out_61_axes_0, epsilon = var_1686_to_fp16, x = x_91_cast_fp16)[name = string("out_61_cast_fp16")]; tensor layer_layers_5_input_layernorm_weight_to_fp16 = const()[name = string("layer_layers_5_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(157867136)))]; tensor out_63_cast_fp16 = mul(x = out_61_cast_fp16, y = layer_layers_5_input_layernorm_weight_to_fp16)[name = string("out_63_cast_fp16")]; tensor var_1692_split_sizes_0 = const()[name = string("op_1692_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_1692_axis_0 = const()[name = string("op_1692_axis_0"), val = int32(1)]; tensor var_1692_cast_fp16_0, tensor var_1692_cast_fp16_1 = split(axis = var_1692_axis_0, split_sizes = var_1692_split_sizes_0, x = out_63_cast_fp16)[name = string("op_1692_cast_fp16")]; string query_states_21_pad_type_0 = const()[name = string("query_states_21_pad_type_0"), val = string("valid")]; tensor query_states_21_strides_0 = const()[name = string("query_states_21_strides_0"), val = tensor([1, 1])]; tensor query_states_21_pad_0 = const()[name = string("query_states_21_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_states_21_dilations_0 = const()[name = string("query_states_21_dilations_0"), val = tensor([1, 1])]; int32 query_states_21_groups_0 = const()[name = string("query_states_21_groups_0"), val = int32(1)]; tensor var_1714_to_fp16 = const()[name = string("op_1714_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(157871296)))]; tensor query_states_21_cast_fp16 = conv(dilations = query_states_21_dilations_0, groups = query_states_21_groups_0, pad = query_states_21_pad_0, pad_type = query_states_21_pad_type_0, strides = query_states_21_strides_0, weight = var_1714_to_fp16, x = var_1692_cast_fp16_0)[name = string("query_states_21_cast_fp16")]; string key_states_21_pad_type_0 = const()[name = string("key_states_21_pad_type_0"), val = string("valid")]; tensor key_states_21_strides_0 = const()[name = string("key_states_21_strides_0"), val = tensor([1, 1])]; tensor key_states_21_pad_0 = const()[name = string("key_states_21_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_states_21_dilations_0 = const()[name = string("key_states_21_dilations_0"), val = tensor([1, 1])]; int32 key_states_21_groups_0 = const()[name = string("key_states_21_groups_0"), val = int32(1)]; tensor var_1725_to_fp16 = const()[name = string("op_1725_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(159968512)))]; tensor key_states_21_cast_fp16 = conv(dilations = key_states_21_dilations_0, groups = key_states_21_groups_0, pad = key_states_21_pad_0, pad_type = key_states_21_pad_type_0, strides = key_states_21_strides_0, weight = var_1725_to_fp16, x = var_1692_cast_fp16_0)[name = string("key_states_21_cast_fp16")]; string value_states_21_pad_type_0 = const()[name = string("value_states_21_pad_type_0"), val = string("valid")]; tensor value_states_21_strides_0 = const()[name = string("value_states_21_strides_0"), val = tensor([1, 1])]; tensor value_states_21_pad_0 = const()[name = string("value_states_21_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_states_21_dilations_0 = const()[name = string("value_states_21_dilations_0"), val = tensor([1, 1])]; int32 value_states_21_groups_0 = const()[name = string("value_states_21_groups_0"), val = int32(1)]; tensor var_1736_to_fp16 = const()[name = string("op_1736_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(160230720)))]; tensor value_states_21_cast_fp16 = conv(dilations = value_states_21_dilations_0, groups = value_states_21_groups_0, pad = value_states_21_pad_0, pad_type = value_states_21_pad_type_0, strides = value_states_21_strides_0, weight = var_1736_to_fp16, x = var_1692_cast_fp16_0)[name = string("value_states_21_cast_fp16")]; tensor var_1744 = const()[name = string("op_1744"), val = tensor([1, 16, 64, 16])]; tensor embed_21_cast_fp16 = reshape(shape = var_1744, x = query_states_21_cast_fp16)[name = string("embed_21_cast_fp16")]; tensor var_1748 = const()[name = string("op_1748"), val = tensor([1, 2, 64, 16])]; tensor var_1749_cast_fp16 = reshape(shape = var_1748, x = key_states_21_cast_fp16)[name = string("op_1749_cast_fp16")]; tensor embed_23_perm_0 = const()[name = string("embed_23_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_1755 = const()[name = string("op_1755"), val = tensor([1, 2, 64, 16])]; tensor var_1756_cast_fp16 = reshape(shape = var_1755, x = value_states_21_cast_fp16)[name = string("op_1756_cast_fp16")]; tensor value_states_23_perm_0 = const()[name = string("value_states_23_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_1760_cast_fp16 = mul(x = embed_21_cast_fp16, y = cos_1_cast_fp16)[name = string("op_1760_cast_fp16")]; tensor var_1761_split_sizes_0 = const()[name = string("op_1761_split_sizes_0"), val = tensor([32, 32])]; int32 var_1761_axis_0 = const()[name = string("op_1761_axis_0"), val = int32(-2)]; tensor var_1761_cast_fp16_0, tensor var_1761_cast_fp16_1 = split(axis = var_1761_axis_0, split_sizes = var_1761_split_sizes_0, x = embed_21_cast_fp16)[name = string("op_1761_cast_fp16")]; fp16 const_57_promoted_to_fp16 = const()[name = string("const_57_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_1763_cast_fp16 = mul(x = var_1761_cast_fp16_1, y = const_57_promoted_to_fp16)[name = string("op_1763_cast_fp16")]; int32 var_1765 = const()[name = string("op_1765"), val = int32(-2)]; bool var_1766_interleave_0 = const()[name = string("op_1766_interleave_0"), val = bool(false)]; tensor var_1766_cast_fp16 = concat(axis = var_1765, interleave = var_1766_interleave_0, values = (var_1763_cast_fp16, var_1761_cast_fp16_0))[name = string("op_1766_cast_fp16")]; tensor var_1767_cast_fp16 = mul(x = var_1766_cast_fp16, y = sin_1_cast_fp16)[name = string("op_1767_cast_fp16")]; tensor query_states_23_cast_fp16 = add(x = var_1760_cast_fp16, y = var_1767_cast_fp16)[name = string("query_states_23_cast_fp16")]; tensor embed_23_cast_fp16 = transpose(perm = embed_23_perm_0, x = var_1749_cast_fp16)[name = string("transpose_56")]; tensor var_1770_cast_fp16 = mul(x = embed_23_cast_fp16, y = cos_cast_fp16)[name = string("op_1770_cast_fp16")]; tensor var_1771_split_sizes_0 = const()[name = string("op_1771_split_sizes_0"), val = tensor([32, 32])]; int32 var_1771_axis_0 = const()[name = string("op_1771_axis_0"), val = int32(-1)]; tensor var_1771_cast_fp16_0, tensor var_1771_cast_fp16_1 = split(axis = var_1771_axis_0, split_sizes = var_1771_split_sizes_0, x = embed_23_cast_fp16)[name = string("op_1771_cast_fp16")]; fp16 const_58_promoted_to_fp16 = const()[name = string("const_58_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_1773_cast_fp16 = mul(x = var_1771_cast_fp16_1, y = const_58_promoted_to_fp16)[name = string("op_1773_cast_fp16")]; int32 var_1775 = const()[name = string("op_1775"), val = int32(-1)]; bool var_1776_interleave_0 = const()[name = string("op_1776_interleave_0"), val = bool(false)]; tensor var_1776_cast_fp16 = concat(axis = var_1775, interleave = var_1776_interleave_0, values = (var_1773_cast_fp16, var_1771_cast_fp16_0))[name = string("op_1776_cast_fp16")]; tensor var_1777_cast_fp16 = mul(x = var_1776_cast_fp16, y = sin_cast_fp16)[name = string("op_1777_cast_fp16")]; tensor key_states_23_cast_fp16 = add(x = var_1770_cast_fp16, y = var_1777_cast_fp16)[name = string("key_states_23_cast_fp16")]; tensor expand_dims_51 = const()[name = string("expand_dims_51"), val = tensor([5])]; tensor expand_dims_52 = const()[name = string("expand_dims_52"), val = tensor([0])]; tensor expand_dims_54 = const()[name = string("expand_dims_54"), val = tensor([6])]; tensor concat_43_values3_0 = const()[name = string("concat_43_values3_0"), val = tensor([0])]; int32 concat_43_axis_0 = const()[name = string("concat_43_axis_0"), val = int32(0)]; bool concat_43_interleave_0 = const()[name = string("concat_43_interleave_0"), val = bool(false)]; tensor concat_43 = concat(axis = concat_43_axis_0, interleave = concat_43_interleave_0, values = (expand_dims_51, expand_dims_52, position_id, concat_43_values3_0))[name = string("concat_43")]; tensor concat_44_values1_0 = const()[name = string("concat_44_values1_0"), val = tensor([0])]; tensor concat_44_values3_0 = const()[name = string("concat_44_values3_0"), val = tensor([0])]; int32 concat_44_axis_0 = const()[name = string("concat_44_axis_0"), val = int32(0)]; bool concat_44_interleave_0 = const()[name = string("concat_44_interleave_0"), val = bool(false)]; tensor concat_44 = concat(axis = concat_44_axis_0, interleave = concat_44_interleave_0, values = (expand_dims_54, concat_44_values1_0, var_426, concat_44_values3_0))[name = string("concat_44")]; tensor key_cache_internal_tensor_assign_6_stride_0 = const()[name = string("key_cache_internal_tensor_assign_6_stride_0"), val = tensor([1, 1, 1, 1])]; tensor key_cache_internal_tensor_assign_6_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_6_begin_mask_0"), val = tensor([false, false, false, true])]; tensor key_cache_internal_tensor_assign_6_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_6_end_mask_0"), val = tensor([false, true, false, true])]; tensor key_cache_internal_tensor_assign_6_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_6_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor key_cache_internal_tensor_assign_6_cast_fp16 = slice_update(begin = concat_43, begin_mask = key_cache_internal_tensor_assign_6_begin_mask_0, end = concat_44, end_mask = key_cache_internal_tensor_assign_6_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_6_squeeze_mask_0, stride = key_cache_internal_tensor_assign_6_stride_0, update = key_states_23_cast_fp16, x = coreml_update_state_56)[name = string("key_cache_internal_tensor_assign_6_cast_fp16")]; write_state(data = key_cache_internal_tensor_assign_6_cast_fp16, input = key_cache)[name = string("coreml_update_state_272_write_state")]; tensor coreml_update_state_58 = read_state(input = key_cache)[name = string("coreml_update_state_272")]; tensor value_cache_internal_tensor_assign_6_stride_0 = const()[name = string("value_cache_internal_tensor_assign_6_stride_0"), val = tensor([1, 1, 1, 1])]; tensor value_cache_internal_tensor_assign_6_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_6_begin_mask_0"), val = tensor([false, false, false, true])]; tensor value_cache_internal_tensor_assign_6_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_6_end_mask_0"), val = tensor([false, true, false, true])]; tensor value_cache_internal_tensor_assign_6_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_6_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor value_states_23_cast_fp16 = transpose(perm = value_states_23_perm_0, x = var_1756_cast_fp16)[name = string("transpose_55")]; tensor value_cache_internal_tensor_assign_6_cast_fp16 = slice_update(begin = concat_43, begin_mask = value_cache_internal_tensor_assign_6_begin_mask_0, end = concat_44, end_mask = value_cache_internal_tensor_assign_6_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_6_squeeze_mask_0, stride = value_cache_internal_tensor_assign_6_stride_0, update = value_states_23_cast_fp16, x = coreml_update_state_57)[name = string("value_cache_internal_tensor_assign_6_cast_fp16")]; write_state(data = value_cache_internal_tensor_assign_6_cast_fp16, input = value_cache)[name = string("coreml_update_state_273_write_state")]; tensor coreml_update_state_59 = read_state(input = value_cache)[name = string("coreml_update_state_273")]; tensor var_1820_begin_0 = const()[name = string("op_1820_begin_0"), val = tensor([5, 0, 0, 0])]; tensor var_1820_end_0 = const()[name = string("op_1820_end_0"), val = tensor([6, 2, 2048, 64])]; tensor var_1820_end_mask_0 = const()[name = string("op_1820_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_1820_cast_fp16 = slice_by_index(begin = var_1820_begin_0, end = var_1820_end_0, end_mask = var_1820_end_mask_0, x = coreml_update_state_58)[name = string("op_1820_cast_fp16")]; tensor tile_10 = const()[name = string("tile_10"), val = tensor([1, 1])]; int32 var_1823_axis_0 = const()[name = string("op_1823_axis_0"), val = int32(1)]; tensor var_1823_cast_fp16_0, tensor var_1823_cast_fp16_1 = split(axis = var_1823_axis_0, split_sizes = tile_10, x = var_1820_cast_fp16)[name = string("op_1823_cast_fp16")]; tensor var_1830_begin_0 = const()[name = string("op_1830_begin_0"), val = tensor([5, 0, 0, 0])]; tensor var_1830_end_0 = const()[name = string("op_1830_end_0"), val = tensor([6, 2, 2048, 64])]; tensor var_1830_end_mask_0 = const()[name = string("op_1830_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_1830_cast_fp16 = slice_by_index(begin = var_1830_begin_0, end = var_1830_end_0, end_mask = var_1830_end_mask_0, x = coreml_update_state_59)[name = string("op_1830_cast_fp16")]; tensor tile_11 = const()[name = string("tile_11"), val = tensor([1, 1])]; int32 var_1833_axis_0 = const()[name = string("op_1833_axis_0"), val = int32(1)]; tensor var_1833_cast_fp16_0, tensor var_1833_cast_fp16_1 = split(axis = var_1833_axis_0, split_sizes = tile_11, x = var_1830_cast_fp16)[name = string("op_1833_cast_fp16")]; tensor var_1836_split_sizes_0 = const()[name = string("op_1836_split_sizes_0"), val = tensor([8, 8])]; int32 var_1836_axis_0 = const()[name = string("op_1836_axis_0"), val = int32(1)]; tensor var_1836_cast_fp16_0, tensor var_1836_cast_fp16_1 = split(axis = var_1836_axis_0, split_sizes = var_1836_split_sizes_0, x = query_states_23_cast_fp16)[name = string("op_1836_cast_fp16")]; bool attn_weights_81_transpose_x_0 = const()[name = string("attn_weights_81_transpose_x_0"), val = bool(false)]; bool attn_weights_81_transpose_y_0 = const()[name = string("attn_weights_81_transpose_y_0"), val = bool(false)]; tensor attn_weights_81_cast_fp16 = matmul(transpose_x = attn_weights_81_transpose_x_0, transpose_y = attn_weights_81_transpose_y_0, x = var_1823_cast_fp16_0, y = var_1836_cast_fp16_0)[name = string("attn_weights_81_cast_fp16")]; fp16 _inversed_attn_weights_83_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_83_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_83_cast_fp16 = mul(x = attn_weights_81_cast_fp16, y = _inversed_attn_weights_83_y_0_to_fp16)[name = string("_inversed_attn_weights_83_cast_fp16")]; tensor attn_weights_85_cast_fp16 = add(x = _inversed_attn_weights_83_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_85_cast_fp16")]; int32 var_1843 = const()[name = string("op_1843"), val = int32(2)]; tensor attn_weights_87_cast_fp16 = softmax(axis = var_1843, x = attn_weights_85_cast_fp16)[name = string("attn_weights_87_cast_fp16")]; bool var_1849_transpose_x_1 = const()[name = string("op_1849_transpose_x_1"), val = bool(true)]; bool var_1849_transpose_y_1 = const()[name = string("op_1849_transpose_y_1"), val = bool(false)]; tensor var_1849_cast_fp16 = matmul(transpose_x = var_1849_transpose_x_1, transpose_y = var_1849_transpose_y_1, x = attn_weights_87_cast_fp16, y = var_1833_cast_fp16_0)[name = string("op_1849_cast_fp16")]; bool attn_weights_89_transpose_x_0 = const()[name = string("attn_weights_89_transpose_x_0"), val = bool(false)]; bool attn_weights_89_transpose_y_0 = const()[name = string("attn_weights_89_transpose_y_0"), val = bool(false)]; tensor attn_weights_89_cast_fp16 = matmul(transpose_x = attn_weights_89_transpose_x_0, transpose_y = attn_weights_89_transpose_y_0, x = var_1823_cast_fp16_1, y = var_1836_cast_fp16_1)[name = string("attn_weights_89_cast_fp16")]; fp16 _inversed_attn_weights_91_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_91_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_91_cast_fp16 = mul(x = attn_weights_89_cast_fp16, y = _inversed_attn_weights_91_y_0_to_fp16)[name = string("_inversed_attn_weights_91_cast_fp16")]; tensor attn_weights_93_cast_fp16 = add(x = _inversed_attn_weights_91_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_93_cast_fp16")]; int32 var_1855 = const()[name = string("op_1855"), val = int32(2)]; tensor attn_weights_95_cast_fp16 = softmax(axis = var_1855, x = attn_weights_93_cast_fp16)[name = string("attn_weights_95_cast_fp16")]; bool attn_output_31_transpose_x_1 = const()[name = string("attn_output_31_transpose_x_1"), val = bool(true)]; bool attn_output_31_transpose_y_1 = const()[name = string("attn_output_31_transpose_y_1"), val = bool(false)]; tensor attn_output_31_cast_fp16 = matmul(transpose_x = attn_output_31_transpose_x_1, transpose_y = attn_output_31_transpose_y_1, x = attn_weights_95_cast_fp16, y = var_1833_cast_fp16_1)[name = string("attn_output_31_cast_fp16")]; int32 var_1863 = const()[name = string("op_1863"), val = int32(1)]; bool attn_output_33_interleave_0 = const()[name = string("attn_output_33_interleave_0"), val = bool(false)]; tensor attn_output_33_cast_fp16 = concat(axis = var_1863, interleave = attn_output_33_interleave_0, values = (var_1849_cast_fp16, attn_output_31_cast_fp16))[name = string("attn_output_33_cast_fp16")]; tensor var_1867_perm_0 = const()[name = string("op_1867_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_1872 = const()[name = string("op_1872"), val = tensor([1, 1024, 1, 16])]; tensor var_1867_cast_fp16 = transpose(perm = var_1867_perm_0, x = attn_output_33_cast_fp16)[name = string("transpose_54")]; tensor x_95_cast_fp16 = reshape(shape = var_1872, x = var_1867_cast_fp16)[name = string("x_95_cast_fp16")]; string hidden_states_33_pad_type_0 = const()[name = string("hidden_states_33_pad_type_0"), val = string("valid")]; tensor hidden_states_33_strides_0 = const()[name = string("hidden_states_33_strides_0"), val = tensor([1, 1])]; tensor hidden_states_33_pad_0 = const()[name = string("hidden_states_33_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_33_dilations_0 = const()[name = string("hidden_states_33_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_33_groups_0 = const()[name = string("hidden_states_33_groups_0"), val = int32(1)]; tensor var_1879_to_fp16 = const()[name = string("op_1879_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(160492928)))]; tensor hidden_states_33_cast_fp16 = conv(dilations = hidden_states_33_dilations_0, groups = hidden_states_33_groups_0, pad = hidden_states_33_pad_0, pad_type = hidden_states_33_pad_type_0, strides = hidden_states_33_strides_0, weight = var_1879_to_fp16, x = x_95_cast_fp16)[name = string("hidden_states_33_cast_fp16")]; tensor x_97_cast_fp16 = add(x = x_89_cast_fp16, y = hidden_states_33_cast_fp16)[name = string("x_97_cast_fp16")]; int32 var_1891 = const()[name = string("op_1891"), val = int32(1)]; fp16 const_63_promoted_to_fp16 = const()[name = string("const_63_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_1894_cast_fp16 = mul(x = x_97_cast_fp16, y = const_63_promoted_to_fp16)[name = string("op_1894_cast_fp16")]; bool x_99_interleave_0 = const()[name = string("x_99_interleave_0"), val = bool(false)]; tensor x_99_cast_fp16 = concat(axis = var_1891, interleave = x_99_interleave_0, values = (x_97_cast_fp16, var_1894_cast_fp16))[name = string("x_99_cast_fp16")]; tensor out_67_axes_0 = const()[name = string("out_67_axes_0"), val = tensor([1])]; fp16 var_1904_to_fp16 = const()[name = string("op_1904_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_67_cast_fp16 = layer_norm(axes = out_67_axes_0, epsilon = var_1904_to_fp16, x = x_99_cast_fp16)[name = string("out_67_cast_fp16")]; tensor layer_layers_5_post_attention_layernorm_weight_to_fp16 = const()[name = string("layer_layers_5_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(162590144)))]; tensor out_69_cast_fp16 = mul(x = out_67_cast_fp16, y = layer_layers_5_post_attention_layernorm_weight_to_fp16)[name = string("out_69_cast_fp16")]; tensor var_1910_split_sizes_0 = const()[name = string("op_1910_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_1910_axis_0 = const()[name = string("op_1910_axis_0"), val = int32(1)]; tensor var_1910_cast_fp16_0, tensor var_1910_cast_fp16_1 = split(axis = var_1910_axis_0, split_sizes = var_1910_split_sizes_0, x = out_69_cast_fp16)[name = string("op_1910_cast_fp16")]; string input_11_pad_type_0 = const()[name = string("input_11_pad_type_0"), val = string("valid")]; tensor input_11_strides_0 = const()[name = string("input_11_strides_0"), val = tensor([1, 1])]; tensor input_11_pad_0 = const()[name = string("input_11_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_11_dilations_0 = const()[name = string("input_11_dilations_0"), val = tensor([1, 1])]; int32 input_11_groups_0 = const()[name = string("input_11_groups_0"), val = int32(1)]; tensor var_1915_to_fp16 = const()[name = string("op_1915_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(162594304)))]; tensor input_11_cast_fp16 = conv(dilations = input_11_dilations_0, groups = input_11_groups_0, pad = input_11_pad_0, pad_type = input_11_pad_type_0, strides = input_11_strides_0, weight = var_1915_to_fp16, x = var_1910_cast_fp16_0)[name = string("input_11_cast_fp16")]; tensor var_1926_cast_fp16 = silu(x = input_11_cast_fp16)[name = string("op_1926_cast_fp16")]; string var_1931_pad_type_0 = const()[name = string("op_1931_pad_type_0"), val = string("valid")]; tensor var_1931_strides_0 = const()[name = string("op_1931_strides_0"), val = tensor([1, 1])]; tensor var_1931_pad_0 = const()[name = string("op_1931_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_1931_dilations_0 = const()[name = string("op_1931_dilations_0"), val = tensor([1, 1])]; int32 var_1931_groups_0 = const()[name = string("op_1931_groups_0"), val = int32(1)]; tensor var_1914_to_fp16 = const()[name = string("op_1914_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(170982976)))]; tensor var_1931_cast_fp16 = conv(dilations = var_1931_dilations_0, groups = var_1931_groups_0, pad = var_1931_pad_0, pad_type = var_1931_pad_type_0, strides = var_1931_strides_0, weight = var_1914_to_fp16, x = var_1910_cast_fp16_0)[name = string("op_1931_cast_fp16")]; tensor x_105_cast_fp16 = mul(x = var_1926_cast_fp16, y = var_1931_cast_fp16)[name = string("x_105_cast_fp16")]; string hidden_states_35_pad_type_0 = const()[name = string("hidden_states_35_pad_type_0"), val = string("valid")]; tensor hidden_states_35_strides_0 = const()[name = string("hidden_states_35_strides_0"), val = tensor([1, 1])]; tensor hidden_states_35_pad_0 = const()[name = string("hidden_states_35_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_35_dilations_0 = const()[name = string("hidden_states_35_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_35_groups_0 = const()[name = string("hidden_states_35_groups_0"), val = int32(1)]; tensor var_1913_to_fp16 = const()[name = string("op_1913_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(179371648)))]; tensor hidden_states_35_cast_fp16 = conv(dilations = hidden_states_35_dilations_0, groups = hidden_states_35_groups_0, pad = hidden_states_35_pad_0, pad_type = hidden_states_35_pad_type_0, strides = hidden_states_35_strides_0, weight = var_1913_to_fp16, x = x_105_cast_fp16)[name = string("hidden_states_35_cast_fp16")]; tensor x_107_cast_fp16 = add(x = x_97_cast_fp16, y = hidden_states_35_cast_fp16)[name = string("x_107_cast_fp16")]; int32 var_1944 = const()[name = string("op_1944"), val = int32(1)]; fp16 const_64_promoted_to_fp16 = const()[name = string("const_64_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_1947_cast_fp16 = mul(x = x_107_cast_fp16, y = const_64_promoted_to_fp16)[name = string("op_1947_cast_fp16")]; bool x_109_interleave_0 = const()[name = string("x_109_interleave_0"), val = bool(false)]; tensor x_109_cast_fp16 = concat(axis = var_1944, interleave = x_109_interleave_0, values = (x_107_cast_fp16, var_1947_cast_fp16))[name = string("x_109_cast_fp16")]; tensor out_73_axes_0 = const()[name = string("out_73_axes_0"), val = tensor([1])]; fp16 var_1957_to_fp16 = const()[name = string("op_1957_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_73_cast_fp16 = layer_norm(axes = out_73_axes_0, epsilon = var_1957_to_fp16, x = x_109_cast_fp16)[name = string("out_73_cast_fp16")]; tensor layer_layers_6_input_layernorm_weight_to_fp16 = const()[name = string("layer_layers_6_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(187760320)))]; tensor out_75_cast_fp16 = mul(x = out_73_cast_fp16, y = layer_layers_6_input_layernorm_weight_to_fp16)[name = string("out_75_cast_fp16")]; tensor var_1963_split_sizes_0 = const()[name = string("op_1963_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_1963_axis_0 = const()[name = string("op_1963_axis_0"), val = int32(1)]; tensor var_1963_cast_fp16_0, tensor var_1963_cast_fp16_1 = split(axis = var_1963_axis_0, split_sizes = var_1963_split_sizes_0, x = out_75_cast_fp16)[name = string("op_1963_cast_fp16")]; string query_states_25_pad_type_0 = const()[name = string("query_states_25_pad_type_0"), val = string("valid")]; tensor query_states_25_strides_0 = const()[name = string("query_states_25_strides_0"), val = tensor([1, 1])]; tensor query_states_25_pad_0 = const()[name = string("query_states_25_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_states_25_dilations_0 = const()[name = string("query_states_25_dilations_0"), val = tensor([1, 1])]; int32 query_states_25_groups_0 = const()[name = string("query_states_25_groups_0"), val = int32(1)]; tensor var_1985_to_fp16 = const()[name = string("op_1985_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(187764480)))]; tensor query_states_25_cast_fp16 = conv(dilations = query_states_25_dilations_0, groups = query_states_25_groups_0, pad = query_states_25_pad_0, pad_type = query_states_25_pad_type_0, strides = query_states_25_strides_0, weight = var_1985_to_fp16, x = var_1963_cast_fp16_0)[name = string("query_states_25_cast_fp16")]; string key_states_25_pad_type_0 = const()[name = string("key_states_25_pad_type_0"), val = string("valid")]; tensor key_states_25_strides_0 = const()[name = string("key_states_25_strides_0"), val = tensor([1, 1])]; tensor key_states_25_pad_0 = const()[name = string("key_states_25_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_states_25_dilations_0 = const()[name = string("key_states_25_dilations_0"), val = tensor([1, 1])]; int32 key_states_25_groups_0 = const()[name = string("key_states_25_groups_0"), val = int32(1)]; tensor var_1996_to_fp16 = const()[name = string("op_1996_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(189861696)))]; tensor key_states_25_cast_fp16 = conv(dilations = key_states_25_dilations_0, groups = key_states_25_groups_0, pad = key_states_25_pad_0, pad_type = key_states_25_pad_type_0, strides = key_states_25_strides_0, weight = var_1996_to_fp16, x = var_1963_cast_fp16_0)[name = string("key_states_25_cast_fp16")]; string value_states_25_pad_type_0 = const()[name = string("value_states_25_pad_type_0"), val = string("valid")]; tensor value_states_25_strides_0 = const()[name = string("value_states_25_strides_0"), val = tensor([1, 1])]; tensor value_states_25_pad_0 = const()[name = string("value_states_25_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_states_25_dilations_0 = const()[name = string("value_states_25_dilations_0"), val = tensor([1, 1])]; int32 value_states_25_groups_0 = const()[name = string("value_states_25_groups_0"), val = int32(1)]; tensor var_2007_to_fp16 = const()[name = string("op_2007_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(190123904)))]; tensor value_states_25_cast_fp16 = conv(dilations = value_states_25_dilations_0, groups = value_states_25_groups_0, pad = value_states_25_pad_0, pad_type = value_states_25_pad_type_0, strides = value_states_25_strides_0, weight = var_2007_to_fp16, x = var_1963_cast_fp16_0)[name = string("value_states_25_cast_fp16")]; tensor var_2015 = const()[name = string("op_2015"), val = tensor([1, 16, 64, 16])]; tensor embed_25_cast_fp16 = reshape(shape = var_2015, x = query_states_25_cast_fp16)[name = string("embed_25_cast_fp16")]; tensor var_2019 = const()[name = string("op_2019"), val = tensor([1, 2, 64, 16])]; tensor var_2020_cast_fp16 = reshape(shape = var_2019, x = key_states_25_cast_fp16)[name = string("op_2020_cast_fp16")]; tensor embed_27_perm_0 = const()[name = string("embed_27_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_2026 = const()[name = string("op_2026"), val = tensor([1, 2, 64, 16])]; tensor var_2027_cast_fp16 = reshape(shape = var_2026, x = value_states_25_cast_fp16)[name = string("op_2027_cast_fp16")]; tensor value_states_27_perm_0 = const()[name = string("value_states_27_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_2031_cast_fp16 = mul(x = embed_25_cast_fp16, y = cos_1_cast_fp16)[name = string("op_2031_cast_fp16")]; tensor var_2032_split_sizes_0 = const()[name = string("op_2032_split_sizes_0"), val = tensor([32, 32])]; int32 var_2032_axis_0 = const()[name = string("op_2032_axis_0"), val = int32(-2)]; tensor var_2032_cast_fp16_0, tensor var_2032_cast_fp16_1 = split(axis = var_2032_axis_0, split_sizes = var_2032_split_sizes_0, x = embed_25_cast_fp16)[name = string("op_2032_cast_fp16")]; fp16 const_67_promoted_to_fp16 = const()[name = string("const_67_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_2034_cast_fp16 = mul(x = var_2032_cast_fp16_1, y = const_67_promoted_to_fp16)[name = string("op_2034_cast_fp16")]; int32 var_2036 = const()[name = string("op_2036"), val = int32(-2)]; bool var_2037_interleave_0 = const()[name = string("op_2037_interleave_0"), val = bool(false)]; tensor var_2037_cast_fp16 = concat(axis = var_2036, interleave = var_2037_interleave_0, values = (var_2034_cast_fp16, var_2032_cast_fp16_0))[name = string("op_2037_cast_fp16")]; tensor var_2038_cast_fp16 = mul(x = var_2037_cast_fp16, y = sin_1_cast_fp16)[name = string("op_2038_cast_fp16")]; tensor query_states_27_cast_fp16 = add(x = var_2031_cast_fp16, y = var_2038_cast_fp16)[name = string("query_states_27_cast_fp16")]; tensor embed_27_cast_fp16 = transpose(perm = embed_27_perm_0, x = var_2020_cast_fp16)[name = string("transpose_53")]; tensor var_2041_cast_fp16 = mul(x = embed_27_cast_fp16, y = cos_cast_fp16)[name = string("op_2041_cast_fp16")]; tensor var_2042_split_sizes_0 = const()[name = string("op_2042_split_sizes_0"), val = tensor([32, 32])]; int32 var_2042_axis_0 = const()[name = string("op_2042_axis_0"), val = int32(-1)]; tensor var_2042_cast_fp16_0, tensor var_2042_cast_fp16_1 = split(axis = var_2042_axis_0, split_sizes = var_2042_split_sizes_0, x = embed_27_cast_fp16)[name = string("op_2042_cast_fp16")]; fp16 const_68_promoted_to_fp16 = const()[name = string("const_68_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_2044_cast_fp16 = mul(x = var_2042_cast_fp16_1, y = const_68_promoted_to_fp16)[name = string("op_2044_cast_fp16")]; int32 var_2046 = const()[name = string("op_2046"), val = int32(-1)]; bool var_2047_interleave_0 = const()[name = string("op_2047_interleave_0"), val = bool(false)]; tensor var_2047_cast_fp16 = concat(axis = var_2046, interleave = var_2047_interleave_0, values = (var_2044_cast_fp16, var_2042_cast_fp16_0))[name = string("op_2047_cast_fp16")]; tensor var_2048_cast_fp16 = mul(x = var_2047_cast_fp16, y = sin_cast_fp16)[name = string("op_2048_cast_fp16")]; tensor key_states_27_cast_fp16 = add(x = var_2041_cast_fp16, y = var_2048_cast_fp16)[name = string("key_states_27_cast_fp16")]; tensor expand_dims_61 = const()[name = string("expand_dims_61"), val = tensor([6])]; tensor expand_dims_62 = const()[name = string("expand_dims_62"), val = tensor([0])]; tensor expand_dims_64 = const()[name = string("expand_dims_64"), val = tensor([7])]; tensor concat_51_values3_0 = const()[name = string("concat_51_values3_0"), val = tensor([0])]; int32 concat_51_axis_0 = const()[name = string("concat_51_axis_0"), val = int32(0)]; bool concat_51_interleave_0 = const()[name = string("concat_51_interleave_0"), val = bool(false)]; tensor concat_51 = concat(axis = concat_51_axis_0, interleave = concat_51_interleave_0, values = (expand_dims_61, expand_dims_62, position_id, concat_51_values3_0))[name = string("concat_51")]; tensor concat_52_values1_0 = const()[name = string("concat_52_values1_0"), val = tensor([0])]; tensor concat_52_values3_0 = const()[name = string("concat_52_values3_0"), val = tensor([0])]; int32 concat_52_axis_0 = const()[name = string("concat_52_axis_0"), val = int32(0)]; bool concat_52_interleave_0 = const()[name = string("concat_52_interleave_0"), val = bool(false)]; tensor concat_52 = concat(axis = concat_52_axis_0, interleave = concat_52_interleave_0, values = (expand_dims_64, concat_52_values1_0, var_426, concat_52_values3_0))[name = string("concat_52")]; tensor key_cache_internal_tensor_assign_7_stride_0 = const()[name = string("key_cache_internal_tensor_assign_7_stride_0"), val = tensor([1, 1, 1, 1])]; tensor key_cache_internal_tensor_assign_7_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_7_begin_mask_0"), val = tensor([false, false, false, true])]; tensor key_cache_internal_tensor_assign_7_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_7_end_mask_0"), val = tensor([false, true, false, true])]; tensor key_cache_internal_tensor_assign_7_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_7_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor key_cache_internal_tensor_assign_7_cast_fp16 = slice_update(begin = concat_51, begin_mask = key_cache_internal_tensor_assign_7_begin_mask_0, end = concat_52, end_mask = key_cache_internal_tensor_assign_7_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_7_squeeze_mask_0, stride = key_cache_internal_tensor_assign_7_stride_0, update = key_states_27_cast_fp16, x = coreml_update_state_58)[name = string("key_cache_internal_tensor_assign_7_cast_fp16")]; write_state(data = key_cache_internal_tensor_assign_7_cast_fp16, input = key_cache)[name = string("coreml_update_state_274_write_state")]; tensor coreml_update_state_60 = read_state(input = key_cache)[name = string("coreml_update_state_274")]; tensor value_cache_internal_tensor_assign_7_stride_0 = const()[name = string("value_cache_internal_tensor_assign_7_stride_0"), val = tensor([1, 1, 1, 1])]; tensor value_cache_internal_tensor_assign_7_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_7_begin_mask_0"), val = tensor([false, false, false, true])]; tensor value_cache_internal_tensor_assign_7_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_7_end_mask_0"), val = tensor([false, true, false, true])]; tensor value_cache_internal_tensor_assign_7_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_7_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor value_states_27_cast_fp16 = transpose(perm = value_states_27_perm_0, x = var_2027_cast_fp16)[name = string("transpose_52")]; tensor value_cache_internal_tensor_assign_7_cast_fp16 = slice_update(begin = concat_51, begin_mask = value_cache_internal_tensor_assign_7_begin_mask_0, end = concat_52, end_mask = value_cache_internal_tensor_assign_7_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_7_squeeze_mask_0, stride = value_cache_internal_tensor_assign_7_stride_0, update = value_states_27_cast_fp16, x = coreml_update_state_59)[name = string("value_cache_internal_tensor_assign_7_cast_fp16")]; write_state(data = value_cache_internal_tensor_assign_7_cast_fp16, input = value_cache)[name = string("coreml_update_state_275_write_state")]; tensor coreml_update_state_61 = read_state(input = value_cache)[name = string("coreml_update_state_275")]; tensor var_2091_begin_0 = const()[name = string("op_2091_begin_0"), val = tensor([6, 0, 0, 0])]; tensor var_2091_end_0 = const()[name = string("op_2091_end_0"), val = tensor([7, 2, 2048, 64])]; tensor var_2091_end_mask_0 = const()[name = string("op_2091_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_2091_cast_fp16 = slice_by_index(begin = var_2091_begin_0, end = var_2091_end_0, end_mask = var_2091_end_mask_0, x = coreml_update_state_60)[name = string("op_2091_cast_fp16")]; tensor tile_12 = const()[name = string("tile_12"), val = tensor([1, 1])]; int32 var_2094_axis_0 = const()[name = string("op_2094_axis_0"), val = int32(1)]; tensor var_2094_cast_fp16_0, tensor var_2094_cast_fp16_1 = split(axis = var_2094_axis_0, split_sizes = tile_12, x = var_2091_cast_fp16)[name = string("op_2094_cast_fp16")]; tensor var_2101_begin_0 = const()[name = string("op_2101_begin_0"), val = tensor([6, 0, 0, 0])]; tensor var_2101_end_0 = const()[name = string("op_2101_end_0"), val = tensor([7, 2, 2048, 64])]; tensor var_2101_end_mask_0 = const()[name = string("op_2101_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_2101_cast_fp16 = slice_by_index(begin = var_2101_begin_0, end = var_2101_end_0, end_mask = var_2101_end_mask_0, x = coreml_update_state_61)[name = string("op_2101_cast_fp16")]; tensor tile_13 = const()[name = string("tile_13"), val = tensor([1, 1])]; int32 var_2104_axis_0 = const()[name = string("op_2104_axis_0"), val = int32(1)]; tensor var_2104_cast_fp16_0, tensor var_2104_cast_fp16_1 = split(axis = var_2104_axis_0, split_sizes = tile_13, x = var_2101_cast_fp16)[name = string("op_2104_cast_fp16")]; tensor var_2107_split_sizes_0 = const()[name = string("op_2107_split_sizes_0"), val = tensor([8, 8])]; int32 var_2107_axis_0 = const()[name = string("op_2107_axis_0"), val = int32(1)]; tensor var_2107_cast_fp16_0, tensor var_2107_cast_fp16_1 = split(axis = var_2107_axis_0, split_sizes = var_2107_split_sizes_0, x = query_states_27_cast_fp16)[name = string("op_2107_cast_fp16")]; bool attn_weights_97_transpose_x_0 = const()[name = string("attn_weights_97_transpose_x_0"), val = bool(false)]; bool attn_weights_97_transpose_y_0 = const()[name = string("attn_weights_97_transpose_y_0"), val = bool(false)]; tensor attn_weights_97_cast_fp16 = matmul(transpose_x = attn_weights_97_transpose_x_0, transpose_y = attn_weights_97_transpose_y_0, x = var_2094_cast_fp16_0, y = var_2107_cast_fp16_0)[name = string("attn_weights_97_cast_fp16")]; fp16 _inversed_attn_weights_99_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_99_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_99_cast_fp16 = mul(x = attn_weights_97_cast_fp16, y = _inversed_attn_weights_99_y_0_to_fp16)[name = string("_inversed_attn_weights_99_cast_fp16")]; tensor attn_weights_101_cast_fp16 = add(x = _inversed_attn_weights_99_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_101_cast_fp16")]; int32 var_2114 = const()[name = string("op_2114"), val = int32(2)]; tensor attn_weights_103_cast_fp16 = softmax(axis = var_2114, x = attn_weights_101_cast_fp16)[name = string("attn_weights_103_cast_fp16")]; bool var_2120_transpose_x_1 = const()[name = string("op_2120_transpose_x_1"), val = bool(true)]; bool var_2120_transpose_y_1 = const()[name = string("op_2120_transpose_y_1"), val = bool(false)]; tensor var_2120_cast_fp16 = matmul(transpose_x = var_2120_transpose_x_1, transpose_y = var_2120_transpose_y_1, x = attn_weights_103_cast_fp16, y = var_2104_cast_fp16_0)[name = string("op_2120_cast_fp16")]; bool attn_weights_105_transpose_x_0 = const()[name = string("attn_weights_105_transpose_x_0"), val = bool(false)]; bool attn_weights_105_transpose_y_0 = const()[name = string("attn_weights_105_transpose_y_0"), val = bool(false)]; tensor attn_weights_105_cast_fp16 = matmul(transpose_x = attn_weights_105_transpose_x_0, transpose_y = attn_weights_105_transpose_y_0, x = var_2094_cast_fp16_1, y = var_2107_cast_fp16_1)[name = string("attn_weights_105_cast_fp16")]; fp16 _inversed_attn_weights_107_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_107_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_107_cast_fp16 = mul(x = attn_weights_105_cast_fp16, y = _inversed_attn_weights_107_y_0_to_fp16)[name = string("_inversed_attn_weights_107_cast_fp16")]; tensor attn_weights_109_cast_fp16 = add(x = _inversed_attn_weights_107_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_109_cast_fp16")]; int32 var_2126 = const()[name = string("op_2126"), val = int32(2)]; tensor attn_weights_111_cast_fp16 = softmax(axis = var_2126, x = attn_weights_109_cast_fp16)[name = string("attn_weights_111_cast_fp16")]; bool attn_output_37_transpose_x_1 = const()[name = string("attn_output_37_transpose_x_1"), val = bool(true)]; bool attn_output_37_transpose_y_1 = const()[name = string("attn_output_37_transpose_y_1"), val = bool(false)]; tensor attn_output_37_cast_fp16 = matmul(transpose_x = attn_output_37_transpose_x_1, transpose_y = attn_output_37_transpose_y_1, x = attn_weights_111_cast_fp16, y = var_2104_cast_fp16_1)[name = string("attn_output_37_cast_fp16")]; int32 var_2134 = const()[name = string("op_2134"), val = int32(1)]; bool attn_output_39_interleave_0 = const()[name = string("attn_output_39_interleave_0"), val = bool(false)]; tensor attn_output_39_cast_fp16 = concat(axis = var_2134, interleave = attn_output_39_interleave_0, values = (var_2120_cast_fp16, attn_output_37_cast_fp16))[name = string("attn_output_39_cast_fp16")]; tensor var_2138_perm_0 = const()[name = string("op_2138_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_2143 = const()[name = string("op_2143"), val = tensor([1, 1024, 1, 16])]; tensor var_2138_cast_fp16 = transpose(perm = var_2138_perm_0, x = attn_output_39_cast_fp16)[name = string("transpose_51")]; tensor x_113_cast_fp16 = reshape(shape = var_2143, x = var_2138_cast_fp16)[name = string("x_113_cast_fp16")]; string hidden_states_39_pad_type_0 = const()[name = string("hidden_states_39_pad_type_0"), val = string("valid")]; tensor hidden_states_39_strides_0 = const()[name = string("hidden_states_39_strides_0"), val = tensor([1, 1])]; tensor hidden_states_39_pad_0 = const()[name = string("hidden_states_39_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_39_dilations_0 = const()[name = string("hidden_states_39_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_39_groups_0 = const()[name = string("hidden_states_39_groups_0"), val = int32(1)]; tensor var_2150_to_fp16 = const()[name = string("op_2150_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(190386112)))]; tensor hidden_states_39_cast_fp16 = conv(dilations = hidden_states_39_dilations_0, groups = hidden_states_39_groups_0, pad = hidden_states_39_pad_0, pad_type = hidden_states_39_pad_type_0, strides = hidden_states_39_strides_0, weight = var_2150_to_fp16, x = x_113_cast_fp16)[name = string("hidden_states_39_cast_fp16")]; tensor x_115_cast_fp16 = add(x = x_107_cast_fp16, y = hidden_states_39_cast_fp16)[name = string("x_115_cast_fp16")]; int32 var_2162 = const()[name = string("op_2162"), val = int32(1)]; fp16 const_73_promoted_to_fp16 = const()[name = string("const_73_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_2165_cast_fp16 = mul(x = x_115_cast_fp16, y = const_73_promoted_to_fp16)[name = string("op_2165_cast_fp16")]; bool x_117_interleave_0 = const()[name = string("x_117_interleave_0"), val = bool(false)]; tensor x_117_cast_fp16 = concat(axis = var_2162, interleave = x_117_interleave_0, values = (x_115_cast_fp16, var_2165_cast_fp16))[name = string("x_117_cast_fp16")]; tensor out_79_axes_0 = const()[name = string("out_79_axes_0"), val = tensor([1])]; fp16 var_2175_to_fp16 = const()[name = string("op_2175_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_79_cast_fp16 = layer_norm(axes = out_79_axes_0, epsilon = var_2175_to_fp16, x = x_117_cast_fp16)[name = string("out_79_cast_fp16")]; tensor layer_layers_6_post_attention_layernorm_weight_to_fp16 = const()[name = string("layer_layers_6_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(192483328)))]; tensor out_81_cast_fp16 = mul(x = out_79_cast_fp16, y = layer_layers_6_post_attention_layernorm_weight_to_fp16)[name = string("out_81_cast_fp16")]; tensor var_2181_split_sizes_0 = const()[name = string("op_2181_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_2181_axis_0 = const()[name = string("op_2181_axis_0"), val = int32(1)]; tensor var_2181_cast_fp16_0, tensor var_2181_cast_fp16_1 = split(axis = var_2181_axis_0, split_sizes = var_2181_split_sizes_0, x = out_81_cast_fp16)[name = string("op_2181_cast_fp16")]; string input_13_pad_type_0 = const()[name = string("input_13_pad_type_0"), val = string("valid")]; tensor input_13_strides_0 = const()[name = string("input_13_strides_0"), val = tensor([1, 1])]; tensor input_13_pad_0 = const()[name = string("input_13_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_13_dilations_0 = const()[name = string("input_13_dilations_0"), val = tensor([1, 1])]; int32 input_13_groups_0 = const()[name = string("input_13_groups_0"), val = int32(1)]; tensor var_2186_to_fp16 = const()[name = string("op_2186_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(192487488)))]; tensor input_13_cast_fp16 = conv(dilations = input_13_dilations_0, groups = input_13_groups_0, pad = input_13_pad_0, pad_type = input_13_pad_type_0, strides = input_13_strides_0, weight = var_2186_to_fp16, x = var_2181_cast_fp16_0)[name = string("input_13_cast_fp16")]; tensor var_2197_cast_fp16 = silu(x = input_13_cast_fp16)[name = string("op_2197_cast_fp16")]; string var_2202_pad_type_0 = const()[name = string("op_2202_pad_type_0"), val = string("valid")]; tensor var_2202_strides_0 = const()[name = string("op_2202_strides_0"), val = tensor([1, 1])]; tensor var_2202_pad_0 = const()[name = string("op_2202_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_2202_dilations_0 = const()[name = string("op_2202_dilations_0"), val = tensor([1, 1])]; int32 var_2202_groups_0 = const()[name = string("op_2202_groups_0"), val = int32(1)]; tensor var_2185_to_fp16 = const()[name = string("op_2185_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(200876160)))]; tensor var_2202_cast_fp16 = conv(dilations = var_2202_dilations_0, groups = var_2202_groups_0, pad = var_2202_pad_0, pad_type = var_2202_pad_type_0, strides = var_2202_strides_0, weight = var_2185_to_fp16, x = var_2181_cast_fp16_0)[name = string("op_2202_cast_fp16")]; tensor x_123_cast_fp16 = mul(x = var_2197_cast_fp16, y = var_2202_cast_fp16)[name = string("x_123_cast_fp16")]; string hidden_states_41_pad_type_0 = const()[name = string("hidden_states_41_pad_type_0"), val = string("valid")]; tensor hidden_states_41_strides_0 = const()[name = string("hidden_states_41_strides_0"), val = tensor([1, 1])]; tensor hidden_states_41_pad_0 = const()[name = string("hidden_states_41_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_41_dilations_0 = const()[name = string("hidden_states_41_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_41_groups_0 = const()[name = string("hidden_states_41_groups_0"), val = int32(1)]; tensor var_2184_to_fp16 = const()[name = string("op_2184_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(209264832)))]; tensor hidden_states_41_cast_fp16 = conv(dilations = hidden_states_41_dilations_0, groups = hidden_states_41_groups_0, pad = hidden_states_41_pad_0, pad_type = hidden_states_41_pad_type_0, strides = hidden_states_41_strides_0, weight = var_2184_to_fp16, x = x_123_cast_fp16)[name = string("hidden_states_41_cast_fp16")]; tensor x_125_cast_fp16 = add(x = x_115_cast_fp16, y = hidden_states_41_cast_fp16)[name = string("x_125_cast_fp16")]; int32 var_2215 = const()[name = string("op_2215"), val = int32(1)]; fp16 const_74_promoted_to_fp16 = const()[name = string("const_74_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_2218_cast_fp16 = mul(x = x_125_cast_fp16, y = const_74_promoted_to_fp16)[name = string("op_2218_cast_fp16")]; bool x_127_interleave_0 = const()[name = string("x_127_interleave_0"), val = bool(false)]; tensor x_127_cast_fp16 = concat(axis = var_2215, interleave = x_127_interleave_0, values = (x_125_cast_fp16, var_2218_cast_fp16))[name = string("x_127_cast_fp16")]; tensor out_85_axes_0 = const()[name = string("out_85_axes_0"), val = tensor([1])]; fp16 var_2228_to_fp16 = const()[name = string("op_2228_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_85_cast_fp16 = layer_norm(axes = out_85_axes_0, epsilon = var_2228_to_fp16, x = x_127_cast_fp16)[name = string("out_85_cast_fp16")]; tensor layer_layers_7_input_layernorm_weight_to_fp16 = const()[name = string("layer_layers_7_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(217653504)))]; tensor out_87_cast_fp16 = mul(x = out_85_cast_fp16, y = layer_layers_7_input_layernorm_weight_to_fp16)[name = string("out_87_cast_fp16")]; tensor var_2234_split_sizes_0 = const()[name = string("op_2234_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_2234_axis_0 = const()[name = string("op_2234_axis_0"), val = int32(1)]; tensor var_2234_cast_fp16_0, tensor var_2234_cast_fp16_1 = split(axis = var_2234_axis_0, split_sizes = var_2234_split_sizes_0, x = out_87_cast_fp16)[name = string("op_2234_cast_fp16")]; string query_states_29_pad_type_0 = const()[name = string("query_states_29_pad_type_0"), val = string("valid")]; tensor query_states_29_strides_0 = const()[name = string("query_states_29_strides_0"), val = tensor([1, 1])]; tensor query_states_29_pad_0 = const()[name = string("query_states_29_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_states_29_dilations_0 = const()[name = string("query_states_29_dilations_0"), val = tensor([1, 1])]; int32 query_states_29_groups_0 = const()[name = string("query_states_29_groups_0"), val = int32(1)]; tensor var_2256_to_fp16 = const()[name = string("op_2256_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(217657664)))]; tensor query_states_29_cast_fp16 = conv(dilations = query_states_29_dilations_0, groups = query_states_29_groups_0, pad = query_states_29_pad_0, pad_type = query_states_29_pad_type_0, strides = query_states_29_strides_0, weight = var_2256_to_fp16, x = var_2234_cast_fp16_0)[name = string("query_states_29_cast_fp16")]; string key_states_29_pad_type_0 = const()[name = string("key_states_29_pad_type_0"), val = string("valid")]; tensor key_states_29_strides_0 = const()[name = string("key_states_29_strides_0"), val = tensor([1, 1])]; tensor key_states_29_pad_0 = const()[name = string("key_states_29_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_states_29_dilations_0 = const()[name = string("key_states_29_dilations_0"), val = tensor([1, 1])]; int32 key_states_29_groups_0 = const()[name = string("key_states_29_groups_0"), val = int32(1)]; tensor var_2267_to_fp16 = const()[name = string("op_2267_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(219754880)))]; tensor key_states_29_cast_fp16 = conv(dilations = key_states_29_dilations_0, groups = key_states_29_groups_0, pad = key_states_29_pad_0, pad_type = key_states_29_pad_type_0, strides = key_states_29_strides_0, weight = var_2267_to_fp16, x = var_2234_cast_fp16_0)[name = string("key_states_29_cast_fp16")]; string value_states_29_pad_type_0 = const()[name = string("value_states_29_pad_type_0"), val = string("valid")]; tensor value_states_29_strides_0 = const()[name = string("value_states_29_strides_0"), val = tensor([1, 1])]; tensor value_states_29_pad_0 = const()[name = string("value_states_29_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_states_29_dilations_0 = const()[name = string("value_states_29_dilations_0"), val = tensor([1, 1])]; int32 value_states_29_groups_0 = const()[name = string("value_states_29_groups_0"), val = int32(1)]; tensor var_2278_to_fp16 = const()[name = string("op_2278_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(220017088)))]; tensor value_states_29_cast_fp16 = conv(dilations = value_states_29_dilations_0, groups = value_states_29_groups_0, pad = value_states_29_pad_0, pad_type = value_states_29_pad_type_0, strides = value_states_29_strides_0, weight = var_2278_to_fp16, x = var_2234_cast_fp16_0)[name = string("value_states_29_cast_fp16")]; tensor var_2286 = const()[name = string("op_2286"), val = tensor([1, 16, 64, 16])]; tensor embed_29_cast_fp16 = reshape(shape = var_2286, x = query_states_29_cast_fp16)[name = string("embed_29_cast_fp16")]; tensor var_2290 = const()[name = string("op_2290"), val = tensor([1, 2, 64, 16])]; tensor var_2291_cast_fp16 = reshape(shape = var_2290, x = key_states_29_cast_fp16)[name = string("op_2291_cast_fp16")]; tensor embed_31_perm_0 = const()[name = string("embed_31_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_2297 = const()[name = string("op_2297"), val = tensor([1, 2, 64, 16])]; tensor var_2298_cast_fp16 = reshape(shape = var_2297, x = value_states_29_cast_fp16)[name = string("op_2298_cast_fp16")]; tensor value_states_31_perm_0 = const()[name = string("value_states_31_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_2302_cast_fp16 = mul(x = embed_29_cast_fp16, y = cos_1_cast_fp16)[name = string("op_2302_cast_fp16")]; tensor var_2303_split_sizes_0 = const()[name = string("op_2303_split_sizes_0"), val = tensor([32, 32])]; int32 var_2303_axis_0 = const()[name = string("op_2303_axis_0"), val = int32(-2)]; tensor var_2303_cast_fp16_0, tensor var_2303_cast_fp16_1 = split(axis = var_2303_axis_0, split_sizes = var_2303_split_sizes_0, x = embed_29_cast_fp16)[name = string("op_2303_cast_fp16")]; fp16 const_77_promoted_to_fp16 = const()[name = string("const_77_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_2305_cast_fp16 = mul(x = var_2303_cast_fp16_1, y = const_77_promoted_to_fp16)[name = string("op_2305_cast_fp16")]; int32 var_2307 = const()[name = string("op_2307"), val = int32(-2)]; bool var_2308_interleave_0 = const()[name = string("op_2308_interleave_0"), val = bool(false)]; tensor var_2308_cast_fp16 = concat(axis = var_2307, interleave = var_2308_interleave_0, values = (var_2305_cast_fp16, var_2303_cast_fp16_0))[name = string("op_2308_cast_fp16")]; tensor var_2309_cast_fp16 = mul(x = var_2308_cast_fp16, y = sin_1_cast_fp16)[name = string("op_2309_cast_fp16")]; tensor query_states_31_cast_fp16 = add(x = var_2302_cast_fp16, y = var_2309_cast_fp16)[name = string("query_states_31_cast_fp16")]; tensor embed_31_cast_fp16 = transpose(perm = embed_31_perm_0, x = var_2291_cast_fp16)[name = string("transpose_50")]; tensor var_2312_cast_fp16 = mul(x = embed_31_cast_fp16, y = cos_cast_fp16)[name = string("op_2312_cast_fp16")]; tensor var_2313_split_sizes_0 = const()[name = string("op_2313_split_sizes_0"), val = tensor([32, 32])]; int32 var_2313_axis_0 = const()[name = string("op_2313_axis_0"), val = int32(-1)]; tensor var_2313_cast_fp16_0, tensor var_2313_cast_fp16_1 = split(axis = var_2313_axis_0, split_sizes = var_2313_split_sizes_0, x = embed_31_cast_fp16)[name = string("op_2313_cast_fp16")]; fp16 const_78_promoted_to_fp16 = const()[name = string("const_78_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_2315_cast_fp16 = mul(x = var_2313_cast_fp16_1, y = const_78_promoted_to_fp16)[name = string("op_2315_cast_fp16")]; int32 var_2317 = const()[name = string("op_2317"), val = int32(-1)]; bool var_2318_interleave_0 = const()[name = string("op_2318_interleave_0"), val = bool(false)]; tensor var_2318_cast_fp16 = concat(axis = var_2317, interleave = var_2318_interleave_0, values = (var_2315_cast_fp16, var_2313_cast_fp16_0))[name = string("op_2318_cast_fp16")]; tensor var_2319_cast_fp16 = mul(x = var_2318_cast_fp16, y = sin_cast_fp16)[name = string("op_2319_cast_fp16")]; tensor key_states_31_cast_fp16 = add(x = var_2312_cast_fp16, y = var_2319_cast_fp16)[name = string("key_states_31_cast_fp16")]; tensor expand_dims_71 = const()[name = string("expand_dims_71"), val = tensor([7])]; tensor expand_dims_72 = const()[name = string("expand_dims_72"), val = tensor([0])]; tensor expand_dims_74 = const()[name = string("expand_dims_74"), val = tensor([8])]; tensor concat_59_values3_0 = const()[name = string("concat_59_values3_0"), val = tensor([0])]; int32 concat_59_axis_0 = const()[name = string("concat_59_axis_0"), val = int32(0)]; bool concat_59_interleave_0 = const()[name = string("concat_59_interleave_0"), val = bool(false)]; tensor concat_59 = concat(axis = concat_59_axis_0, interleave = concat_59_interleave_0, values = (expand_dims_71, expand_dims_72, position_id, concat_59_values3_0))[name = string("concat_59")]; tensor concat_60_values1_0 = const()[name = string("concat_60_values1_0"), val = tensor([0])]; tensor concat_60_values3_0 = const()[name = string("concat_60_values3_0"), val = tensor([0])]; int32 concat_60_axis_0 = const()[name = string("concat_60_axis_0"), val = int32(0)]; bool concat_60_interleave_0 = const()[name = string("concat_60_interleave_0"), val = bool(false)]; tensor concat_60 = concat(axis = concat_60_axis_0, interleave = concat_60_interleave_0, values = (expand_dims_74, concat_60_values1_0, var_426, concat_60_values3_0))[name = string("concat_60")]; tensor key_cache_internal_tensor_assign_8_stride_0 = const()[name = string("key_cache_internal_tensor_assign_8_stride_0"), val = tensor([1, 1, 1, 1])]; tensor key_cache_internal_tensor_assign_8_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_8_begin_mask_0"), val = tensor([false, false, false, true])]; tensor key_cache_internal_tensor_assign_8_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_8_end_mask_0"), val = tensor([false, true, false, true])]; tensor key_cache_internal_tensor_assign_8_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_8_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor key_cache_internal_tensor_assign_8_cast_fp16 = slice_update(begin = concat_59, begin_mask = key_cache_internal_tensor_assign_8_begin_mask_0, end = concat_60, end_mask = key_cache_internal_tensor_assign_8_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_8_squeeze_mask_0, stride = key_cache_internal_tensor_assign_8_stride_0, update = key_states_31_cast_fp16, x = coreml_update_state_60)[name = string("key_cache_internal_tensor_assign_8_cast_fp16")]; write_state(data = key_cache_internal_tensor_assign_8_cast_fp16, input = key_cache)[name = string("coreml_update_state_276_write_state")]; tensor coreml_update_state_62 = read_state(input = key_cache)[name = string("coreml_update_state_276")]; tensor value_cache_internal_tensor_assign_8_stride_0 = const()[name = string("value_cache_internal_tensor_assign_8_stride_0"), val = tensor([1, 1, 1, 1])]; tensor value_cache_internal_tensor_assign_8_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_8_begin_mask_0"), val = tensor([false, false, false, true])]; tensor value_cache_internal_tensor_assign_8_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_8_end_mask_0"), val = tensor([false, true, false, true])]; tensor value_cache_internal_tensor_assign_8_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_8_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor value_states_31_cast_fp16 = transpose(perm = value_states_31_perm_0, x = var_2298_cast_fp16)[name = string("transpose_49")]; tensor value_cache_internal_tensor_assign_8_cast_fp16 = slice_update(begin = concat_59, begin_mask = value_cache_internal_tensor_assign_8_begin_mask_0, end = concat_60, end_mask = value_cache_internal_tensor_assign_8_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_8_squeeze_mask_0, stride = value_cache_internal_tensor_assign_8_stride_0, update = value_states_31_cast_fp16, x = coreml_update_state_61)[name = string("value_cache_internal_tensor_assign_8_cast_fp16")]; write_state(data = value_cache_internal_tensor_assign_8_cast_fp16, input = value_cache)[name = string("coreml_update_state_277_write_state")]; tensor coreml_update_state_63 = read_state(input = value_cache)[name = string("coreml_update_state_277")]; tensor var_2362_begin_0 = const()[name = string("op_2362_begin_0"), val = tensor([7, 0, 0, 0])]; tensor var_2362_end_0 = const()[name = string("op_2362_end_0"), val = tensor([8, 2, 2048, 64])]; tensor var_2362_end_mask_0 = const()[name = string("op_2362_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_2362_cast_fp16 = slice_by_index(begin = var_2362_begin_0, end = var_2362_end_0, end_mask = var_2362_end_mask_0, x = coreml_update_state_62)[name = string("op_2362_cast_fp16")]; tensor tile_14 = const()[name = string("tile_14"), val = tensor([1, 1])]; int32 var_2365_axis_0 = const()[name = string("op_2365_axis_0"), val = int32(1)]; tensor var_2365_cast_fp16_0, tensor var_2365_cast_fp16_1 = split(axis = var_2365_axis_0, split_sizes = tile_14, x = var_2362_cast_fp16)[name = string("op_2365_cast_fp16")]; tensor var_2372_begin_0 = const()[name = string("op_2372_begin_0"), val = tensor([7, 0, 0, 0])]; tensor var_2372_end_0 = const()[name = string("op_2372_end_0"), val = tensor([8, 2, 2048, 64])]; tensor var_2372_end_mask_0 = const()[name = string("op_2372_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_2372_cast_fp16 = slice_by_index(begin = var_2372_begin_0, end = var_2372_end_0, end_mask = var_2372_end_mask_0, x = coreml_update_state_63)[name = string("op_2372_cast_fp16")]; tensor tile_15 = const()[name = string("tile_15"), val = tensor([1, 1])]; int32 var_2375_axis_0 = const()[name = string("op_2375_axis_0"), val = int32(1)]; tensor var_2375_cast_fp16_0, tensor var_2375_cast_fp16_1 = split(axis = var_2375_axis_0, split_sizes = tile_15, x = var_2372_cast_fp16)[name = string("op_2375_cast_fp16")]; tensor var_2378_split_sizes_0 = const()[name = string("op_2378_split_sizes_0"), val = tensor([8, 8])]; int32 var_2378_axis_0 = const()[name = string("op_2378_axis_0"), val = int32(1)]; tensor var_2378_cast_fp16_0, tensor var_2378_cast_fp16_1 = split(axis = var_2378_axis_0, split_sizes = var_2378_split_sizes_0, x = query_states_31_cast_fp16)[name = string("op_2378_cast_fp16")]; bool attn_weights_113_transpose_x_0 = const()[name = string("attn_weights_113_transpose_x_0"), val = bool(false)]; bool attn_weights_113_transpose_y_0 = const()[name = string("attn_weights_113_transpose_y_0"), val = bool(false)]; tensor attn_weights_113_cast_fp16 = matmul(transpose_x = attn_weights_113_transpose_x_0, transpose_y = attn_weights_113_transpose_y_0, x = var_2365_cast_fp16_0, y = var_2378_cast_fp16_0)[name = string("attn_weights_113_cast_fp16")]; fp16 _inversed_attn_weights_115_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_115_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_115_cast_fp16 = mul(x = attn_weights_113_cast_fp16, y = _inversed_attn_weights_115_y_0_to_fp16)[name = string("_inversed_attn_weights_115_cast_fp16")]; tensor attn_weights_117_cast_fp16 = add(x = _inversed_attn_weights_115_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_117_cast_fp16")]; int32 var_2385 = const()[name = string("op_2385"), val = int32(2)]; tensor attn_weights_119_cast_fp16 = softmax(axis = var_2385, x = attn_weights_117_cast_fp16)[name = string("attn_weights_119_cast_fp16")]; bool var_2391_transpose_x_1 = const()[name = string("op_2391_transpose_x_1"), val = bool(true)]; bool var_2391_transpose_y_1 = const()[name = string("op_2391_transpose_y_1"), val = bool(false)]; tensor var_2391_cast_fp16 = matmul(transpose_x = var_2391_transpose_x_1, transpose_y = var_2391_transpose_y_1, x = attn_weights_119_cast_fp16, y = var_2375_cast_fp16_0)[name = string("op_2391_cast_fp16")]; bool attn_weights_121_transpose_x_0 = const()[name = string("attn_weights_121_transpose_x_0"), val = bool(false)]; bool attn_weights_121_transpose_y_0 = const()[name = string("attn_weights_121_transpose_y_0"), val = bool(false)]; tensor attn_weights_121_cast_fp16 = matmul(transpose_x = attn_weights_121_transpose_x_0, transpose_y = attn_weights_121_transpose_y_0, x = var_2365_cast_fp16_1, y = var_2378_cast_fp16_1)[name = string("attn_weights_121_cast_fp16")]; fp16 _inversed_attn_weights_123_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_123_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_123_cast_fp16 = mul(x = attn_weights_121_cast_fp16, y = _inversed_attn_weights_123_y_0_to_fp16)[name = string("_inversed_attn_weights_123_cast_fp16")]; tensor attn_weights_125_cast_fp16 = add(x = _inversed_attn_weights_123_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_125_cast_fp16")]; int32 var_2397 = const()[name = string("op_2397"), val = int32(2)]; tensor attn_weights_127_cast_fp16 = softmax(axis = var_2397, x = attn_weights_125_cast_fp16)[name = string("attn_weights_127_cast_fp16")]; bool attn_output_43_transpose_x_1 = const()[name = string("attn_output_43_transpose_x_1"), val = bool(true)]; bool attn_output_43_transpose_y_1 = const()[name = string("attn_output_43_transpose_y_1"), val = bool(false)]; tensor attn_output_43_cast_fp16 = matmul(transpose_x = attn_output_43_transpose_x_1, transpose_y = attn_output_43_transpose_y_1, x = attn_weights_127_cast_fp16, y = var_2375_cast_fp16_1)[name = string("attn_output_43_cast_fp16")]; int32 var_2405 = const()[name = string("op_2405"), val = int32(1)]; bool attn_output_45_interleave_0 = const()[name = string("attn_output_45_interleave_0"), val = bool(false)]; tensor attn_output_45_cast_fp16 = concat(axis = var_2405, interleave = attn_output_45_interleave_0, values = (var_2391_cast_fp16, attn_output_43_cast_fp16))[name = string("attn_output_45_cast_fp16")]; tensor var_2409_perm_0 = const()[name = string("op_2409_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_2414 = const()[name = string("op_2414"), val = tensor([1, 1024, 1, 16])]; tensor var_2409_cast_fp16 = transpose(perm = var_2409_perm_0, x = attn_output_45_cast_fp16)[name = string("transpose_48")]; tensor x_131_cast_fp16 = reshape(shape = var_2414, x = var_2409_cast_fp16)[name = string("x_131_cast_fp16")]; string hidden_states_45_pad_type_0 = const()[name = string("hidden_states_45_pad_type_0"), val = string("valid")]; tensor hidden_states_45_strides_0 = const()[name = string("hidden_states_45_strides_0"), val = tensor([1, 1])]; tensor hidden_states_45_pad_0 = const()[name = string("hidden_states_45_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_45_dilations_0 = const()[name = string("hidden_states_45_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_45_groups_0 = const()[name = string("hidden_states_45_groups_0"), val = int32(1)]; tensor var_2421_to_fp16 = const()[name = string("op_2421_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(220279296)))]; tensor hidden_states_45_cast_fp16 = conv(dilations = hidden_states_45_dilations_0, groups = hidden_states_45_groups_0, pad = hidden_states_45_pad_0, pad_type = hidden_states_45_pad_type_0, strides = hidden_states_45_strides_0, weight = var_2421_to_fp16, x = x_131_cast_fp16)[name = string("hidden_states_45_cast_fp16")]; tensor x_133_cast_fp16 = add(x = x_125_cast_fp16, y = hidden_states_45_cast_fp16)[name = string("x_133_cast_fp16")]; int32 var_2433 = const()[name = string("op_2433"), val = int32(1)]; fp16 const_83_promoted_to_fp16 = const()[name = string("const_83_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_2436_cast_fp16 = mul(x = x_133_cast_fp16, y = const_83_promoted_to_fp16)[name = string("op_2436_cast_fp16")]; bool x_135_interleave_0 = const()[name = string("x_135_interleave_0"), val = bool(false)]; tensor x_135_cast_fp16 = concat(axis = var_2433, interleave = x_135_interleave_0, values = (x_133_cast_fp16, var_2436_cast_fp16))[name = string("x_135_cast_fp16")]; tensor out_91_axes_0 = const()[name = string("out_91_axes_0"), val = tensor([1])]; fp16 var_2446_to_fp16 = const()[name = string("op_2446_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_91_cast_fp16 = layer_norm(axes = out_91_axes_0, epsilon = var_2446_to_fp16, x = x_135_cast_fp16)[name = string("out_91_cast_fp16")]; tensor layer_layers_7_post_attention_layernorm_weight_to_fp16 = const()[name = string("layer_layers_7_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(222376512)))]; tensor out_93_cast_fp16 = mul(x = out_91_cast_fp16, y = layer_layers_7_post_attention_layernorm_weight_to_fp16)[name = string("out_93_cast_fp16")]; tensor var_2452_split_sizes_0 = const()[name = string("op_2452_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_2452_axis_0 = const()[name = string("op_2452_axis_0"), val = int32(1)]; tensor var_2452_cast_fp16_0, tensor var_2452_cast_fp16_1 = split(axis = var_2452_axis_0, split_sizes = var_2452_split_sizes_0, x = out_93_cast_fp16)[name = string("op_2452_cast_fp16")]; string input_15_pad_type_0 = const()[name = string("input_15_pad_type_0"), val = string("valid")]; tensor input_15_strides_0 = const()[name = string("input_15_strides_0"), val = tensor([1, 1])]; tensor input_15_pad_0 = const()[name = string("input_15_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_15_dilations_0 = const()[name = string("input_15_dilations_0"), val = tensor([1, 1])]; int32 input_15_groups_0 = const()[name = string("input_15_groups_0"), val = int32(1)]; tensor var_2457_to_fp16 = const()[name = string("op_2457_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(222380672)))]; tensor input_15_cast_fp16 = conv(dilations = input_15_dilations_0, groups = input_15_groups_0, pad = input_15_pad_0, pad_type = input_15_pad_type_0, strides = input_15_strides_0, weight = var_2457_to_fp16, x = var_2452_cast_fp16_0)[name = string("input_15_cast_fp16")]; tensor var_2468_cast_fp16 = silu(x = input_15_cast_fp16)[name = string("op_2468_cast_fp16")]; string var_2473_pad_type_0 = const()[name = string("op_2473_pad_type_0"), val = string("valid")]; tensor var_2473_strides_0 = const()[name = string("op_2473_strides_0"), val = tensor([1, 1])]; tensor var_2473_pad_0 = const()[name = string("op_2473_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_2473_dilations_0 = const()[name = string("op_2473_dilations_0"), val = tensor([1, 1])]; int32 var_2473_groups_0 = const()[name = string("op_2473_groups_0"), val = int32(1)]; tensor var_2456_to_fp16 = const()[name = string("op_2456_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(230769344)))]; tensor var_2473_cast_fp16 = conv(dilations = var_2473_dilations_0, groups = var_2473_groups_0, pad = var_2473_pad_0, pad_type = var_2473_pad_type_0, strides = var_2473_strides_0, weight = var_2456_to_fp16, x = var_2452_cast_fp16_0)[name = string("op_2473_cast_fp16")]; tensor x_141_cast_fp16 = mul(x = var_2468_cast_fp16, y = var_2473_cast_fp16)[name = string("x_141_cast_fp16")]; string hidden_states_47_pad_type_0 = const()[name = string("hidden_states_47_pad_type_0"), val = string("valid")]; tensor hidden_states_47_strides_0 = const()[name = string("hidden_states_47_strides_0"), val = tensor([1, 1])]; tensor hidden_states_47_pad_0 = const()[name = string("hidden_states_47_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_47_dilations_0 = const()[name = string("hidden_states_47_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_47_groups_0 = const()[name = string("hidden_states_47_groups_0"), val = int32(1)]; tensor var_2455_to_fp16 = const()[name = string("op_2455_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(239158016)))]; tensor hidden_states_47_cast_fp16 = conv(dilations = hidden_states_47_dilations_0, groups = hidden_states_47_groups_0, pad = hidden_states_47_pad_0, pad_type = hidden_states_47_pad_type_0, strides = hidden_states_47_strides_0, weight = var_2455_to_fp16, x = x_141_cast_fp16)[name = string("hidden_states_47_cast_fp16")]; tensor x_143_cast_fp16 = add(x = x_133_cast_fp16, y = hidden_states_47_cast_fp16)[name = string("x_143_cast_fp16")]; int32 var_2486 = const()[name = string("op_2486"), val = int32(1)]; fp16 const_84_promoted_to_fp16 = const()[name = string("const_84_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_2489_cast_fp16 = mul(x = x_143_cast_fp16, y = const_84_promoted_to_fp16)[name = string("op_2489_cast_fp16")]; bool x_145_interleave_0 = const()[name = string("x_145_interleave_0"), val = bool(false)]; tensor x_145_cast_fp16 = concat(axis = var_2486, interleave = x_145_interleave_0, values = (x_143_cast_fp16, var_2489_cast_fp16))[name = string("x_145_cast_fp16")]; tensor out_97_axes_0 = const()[name = string("out_97_axes_0"), val = tensor([1])]; fp16 var_2499_to_fp16 = const()[name = string("op_2499_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_97_cast_fp16 = layer_norm(axes = out_97_axes_0, epsilon = var_2499_to_fp16, x = x_145_cast_fp16)[name = string("out_97_cast_fp16")]; tensor layer_layers_8_input_layernorm_weight_to_fp16 = const()[name = string("layer_layers_8_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(247546688)))]; tensor out_99_cast_fp16 = mul(x = out_97_cast_fp16, y = layer_layers_8_input_layernorm_weight_to_fp16)[name = string("out_99_cast_fp16")]; tensor var_2505_split_sizes_0 = const()[name = string("op_2505_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_2505_axis_0 = const()[name = string("op_2505_axis_0"), val = int32(1)]; tensor var_2505_cast_fp16_0, tensor var_2505_cast_fp16_1 = split(axis = var_2505_axis_0, split_sizes = var_2505_split_sizes_0, x = out_99_cast_fp16)[name = string("op_2505_cast_fp16")]; string query_states_33_pad_type_0 = const()[name = string("query_states_33_pad_type_0"), val = string("valid")]; tensor query_states_33_strides_0 = const()[name = string("query_states_33_strides_0"), val = tensor([1, 1])]; tensor query_states_33_pad_0 = const()[name = string("query_states_33_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_states_33_dilations_0 = const()[name = string("query_states_33_dilations_0"), val = tensor([1, 1])]; int32 query_states_33_groups_0 = const()[name = string("query_states_33_groups_0"), val = int32(1)]; tensor var_2527_to_fp16 = const()[name = string("op_2527_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(247550848)))]; tensor query_states_33_cast_fp16 = conv(dilations = query_states_33_dilations_0, groups = query_states_33_groups_0, pad = query_states_33_pad_0, pad_type = query_states_33_pad_type_0, strides = query_states_33_strides_0, weight = var_2527_to_fp16, x = var_2505_cast_fp16_0)[name = string("query_states_33_cast_fp16")]; string key_states_33_pad_type_0 = const()[name = string("key_states_33_pad_type_0"), val = string("valid")]; tensor key_states_33_strides_0 = const()[name = string("key_states_33_strides_0"), val = tensor([1, 1])]; tensor key_states_33_pad_0 = const()[name = string("key_states_33_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_states_33_dilations_0 = const()[name = string("key_states_33_dilations_0"), val = tensor([1, 1])]; int32 key_states_33_groups_0 = const()[name = string("key_states_33_groups_0"), val = int32(1)]; tensor var_2538_to_fp16 = const()[name = string("op_2538_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(249648064)))]; tensor key_states_33_cast_fp16 = conv(dilations = key_states_33_dilations_0, groups = key_states_33_groups_0, pad = key_states_33_pad_0, pad_type = key_states_33_pad_type_0, strides = key_states_33_strides_0, weight = var_2538_to_fp16, x = var_2505_cast_fp16_0)[name = string("key_states_33_cast_fp16")]; string value_states_33_pad_type_0 = const()[name = string("value_states_33_pad_type_0"), val = string("valid")]; tensor value_states_33_strides_0 = const()[name = string("value_states_33_strides_0"), val = tensor([1, 1])]; tensor value_states_33_pad_0 = const()[name = string("value_states_33_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_states_33_dilations_0 = const()[name = string("value_states_33_dilations_0"), val = tensor([1, 1])]; int32 value_states_33_groups_0 = const()[name = string("value_states_33_groups_0"), val = int32(1)]; tensor var_2549_to_fp16 = const()[name = string("op_2549_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(249910272)))]; tensor value_states_33_cast_fp16 = conv(dilations = value_states_33_dilations_0, groups = value_states_33_groups_0, pad = value_states_33_pad_0, pad_type = value_states_33_pad_type_0, strides = value_states_33_strides_0, weight = var_2549_to_fp16, x = var_2505_cast_fp16_0)[name = string("value_states_33_cast_fp16")]; tensor var_2557 = const()[name = string("op_2557"), val = tensor([1, 16, 64, 16])]; tensor embed_33_cast_fp16 = reshape(shape = var_2557, x = query_states_33_cast_fp16)[name = string("embed_33_cast_fp16")]; tensor var_2561 = const()[name = string("op_2561"), val = tensor([1, 2, 64, 16])]; tensor var_2562_cast_fp16 = reshape(shape = var_2561, x = key_states_33_cast_fp16)[name = string("op_2562_cast_fp16")]; tensor embed_35_perm_0 = const()[name = string("embed_35_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_2568 = const()[name = string("op_2568"), val = tensor([1, 2, 64, 16])]; tensor var_2569_cast_fp16 = reshape(shape = var_2568, x = value_states_33_cast_fp16)[name = string("op_2569_cast_fp16")]; tensor value_states_35_perm_0 = const()[name = string("value_states_35_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_2573_cast_fp16 = mul(x = embed_33_cast_fp16, y = cos_1_cast_fp16)[name = string("op_2573_cast_fp16")]; tensor var_2574_split_sizes_0 = const()[name = string("op_2574_split_sizes_0"), val = tensor([32, 32])]; int32 var_2574_axis_0 = const()[name = string("op_2574_axis_0"), val = int32(-2)]; tensor var_2574_cast_fp16_0, tensor var_2574_cast_fp16_1 = split(axis = var_2574_axis_0, split_sizes = var_2574_split_sizes_0, x = embed_33_cast_fp16)[name = string("op_2574_cast_fp16")]; fp16 const_87_promoted_to_fp16 = const()[name = string("const_87_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_2576_cast_fp16 = mul(x = var_2574_cast_fp16_1, y = const_87_promoted_to_fp16)[name = string("op_2576_cast_fp16")]; int32 var_2578 = const()[name = string("op_2578"), val = int32(-2)]; bool var_2579_interleave_0 = const()[name = string("op_2579_interleave_0"), val = bool(false)]; tensor var_2579_cast_fp16 = concat(axis = var_2578, interleave = var_2579_interleave_0, values = (var_2576_cast_fp16, var_2574_cast_fp16_0))[name = string("op_2579_cast_fp16")]; tensor var_2580_cast_fp16 = mul(x = var_2579_cast_fp16, y = sin_1_cast_fp16)[name = string("op_2580_cast_fp16")]; tensor query_states_35_cast_fp16 = add(x = var_2573_cast_fp16, y = var_2580_cast_fp16)[name = string("query_states_35_cast_fp16")]; tensor embed_35_cast_fp16 = transpose(perm = embed_35_perm_0, x = var_2562_cast_fp16)[name = string("transpose_47")]; tensor var_2583_cast_fp16 = mul(x = embed_35_cast_fp16, y = cos_cast_fp16)[name = string("op_2583_cast_fp16")]; tensor var_2584_split_sizes_0 = const()[name = string("op_2584_split_sizes_0"), val = tensor([32, 32])]; int32 var_2584_axis_0 = const()[name = string("op_2584_axis_0"), val = int32(-1)]; tensor var_2584_cast_fp16_0, tensor var_2584_cast_fp16_1 = split(axis = var_2584_axis_0, split_sizes = var_2584_split_sizes_0, x = embed_35_cast_fp16)[name = string("op_2584_cast_fp16")]; fp16 const_88_promoted_to_fp16 = const()[name = string("const_88_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_2586_cast_fp16 = mul(x = var_2584_cast_fp16_1, y = const_88_promoted_to_fp16)[name = string("op_2586_cast_fp16")]; int32 var_2588 = const()[name = string("op_2588"), val = int32(-1)]; bool var_2589_interleave_0 = const()[name = string("op_2589_interleave_0"), val = bool(false)]; tensor var_2589_cast_fp16 = concat(axis = var_2588, interleave = var_2589_interleave_0, values = (var_2586_cast_fp16, var_2584_cast_fp16_0))[name = string("op_2589_cast_fp16")]; tensor var_2590_cast_fp16 = mul(x = var_2589_cast_fp16, y = sin_cast_fp16)[name = string("op_2590_cast_fp16")]; tensor key_states_35_cast_fp16 = add(x = var_2583_cast_fp16, y = var_2590_cast_fp16)[name = string("key_states_35_cast_fp16")]; tensor expand_dims_81 = const()[name = string("expand_dims_81"), val = tensor([8])]; tensor expand_dims_82 = const()[name = string("expand_dims_82"), val = tensor([0])]; tensor expand_dims_84 = const()[name = string("expand_dims_84"), val = tensor([9])]; tensor concat_67_values3_0 = const()[name = string("concat_67_values3_0"), val = tensor([0])]; int32 concat_67_axis_0 = const()[name = string("concat_67_axis_0"), val = int32(0)]; bool concat_67_interleave_0 = const()[name = string("concat_67_interleave_0"), val = bool(false)]; tensor concat_67 = concat(axis = concat_67_axis_0, interleave = concat_67_interleave_0, values = (expand_dims_81, expand_dims_82, position_id, concat_67_values3_0))[name = string("concat_67")]; tensor concat_68_values1_0 = const()[name = string("concat_68_values1_0"), val = tensor([0])]; tensor concat_68_values3_0 = const()[name = string("concat_68_values3_0"), val = tensor([0])]; int32 concat_68_axis_0 = const()[name = string("concat_68_axis_0"), val = int32(0)]; bool concat_68_interleave_0 = const()[name = string("concat_68_interleave_0"), val = bool(false)]; tensor concat_68 = concat(axis = concat_68_axis_0, interleave = concat_68_interleave_0, values = (expand_dims_84, concat_68_values1_0, var_426, concat_68_values3_0))[name = string("concat_68")]; tensor key_cache_internal_tensor_assign_9_stride_0 = const()[name = string("key_cache_internal_tensor_assign_9_stride_0"), val = tensor([1, 1, 1, 1])]; tensor key_cache_internal_tensor_assign_9_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_9_begin_mask_0"), val = tensor([false, false, false, true])]; tensor key_cache_internal_tensor_assign_9_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_9_end_mask_0"), val = tensor([false, true, false, true])]; tensor key_cache_internal_tensor_assign_9_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_9_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor key_cache_internal_tensor_assign_9_cast_fp16 = slice_update(begin = concat_67, begin_mask = key_cache_internal_tensor_assign_9_begin_mask_0, end = concat_68, end_mask = key_cache_internal_tensor_assign_9_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_9_squeeze_mask_0, stride = key_cache_internal_tensor_assign_9_stride_0, update = key_states_35_cast_fp16, x = coreml_update_state_62)[name = string("key_cache_internal_tensor_assign_9_cast_fp16")]; write_state(data = key_cache_internal_tensor_assign_9_cast_fp16, input = key_cache)[name = string("coreml_update_state_278_write_state")]; tensor coreml_update_state_64 = read_state(input = key_cache)[name = string("coreml_update_state_278")]; tensor value_cache_internal_tensor_assign_9_stride_0 = const()[name = string("value_cache_internal_tensor_assign_9_stride_0"), val = tensor([1, 1, 1, 1])]; tensor value_cache_internal_tensor_assign_9_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_9_begin_mask_0"), val = tensor([false, false, false, true])]; tensor value_cache_internal_tensor_assign_9_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_9_end_mask_0"), val = tensor([false, true, false, true])]; tensor value_cache_internal_tensor_assign_9_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_9_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor value_states_35_cast_fp16 = transpose(perm = value_states_35_perm_0, x = var_2569_cast_fp16)[name = string("transpose_46")]; tensor value_cache_internal_tensor_assign_9_cast_fp16 = slice_update(begin = concat_67, begin_mask = value_cache_internal_tensor_assign_9_begin_mask_0, end = concat_68, end_mask = value_cache_internal_tensor_assign_9_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_9_squeeze_mask_0, stride = value_cache_internal_tensor_assign_9_stride_0, update = value_states_35_cast_fp16, x = coreml_update_state_63)[name = string("value_cache_internal_tensor_assign_9_cast_fp16")]; write_state(data = value_cache_internal_tensor_assign_9_cast_fp16, input = value_cache)[name = string("coreml_update_state_279_write_state")]; tensor coreml_update_state_65 = read_state(input = value_cache)[name = string("coreml_update_state_279")]; tensor var_2633_begin_0 = const()[name = string("op_2633_begin_0"), val = tensor([8, 0, 0, 0])]; tensor var_2633_end_0 = const()[name = string("op_2633_end_0"), val = tensor([9, 2, 2048, 64])]; tensor var_2633_end_mask_0 = const()[name = string("op_2633_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_2633_cast_fp16 = slice_by_index(begin = var_2633_begin_0, end = var_2633_end_0, end_mask = var_2633_end_mask_0, x = coreml_update_state_64)[name = string("op_2633_cast_fp16")]; tensor tile_16 = const()[name = string("tile_16"), val = tensor([1, 1])]; int32 var_2636_axis_0 = const()[name = string("op_2636_axis_0"), val = int32(1)]; tensor var_2636_cast_fp16_0, tensor var_2636_cast_fp16_1 = split(axis = var_2636_axis_0, split_sizes = tile_16, x = var_2633_cast_fp16)[name = string("op_2636_cast_fp16")]; tensor var_2643_begin_0 = const()[name = string("op_2643_begin_0"), val = tensor([8, 0, 0, 0])]; tensor var_2643_end_0 = const()[name = string("op_2643_end_0"), val = tensor([9, 2, 2048, 64])]; tensor var_2643_end_mask_0 = const()[name = string("op_2643_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_2643_cast_fp16 = slice_by_index(begin = var_2643_begin_0, end = var_2643_end_0, end_mask = var_2643_end_mask_0, x = coreml_update_state_65)[name = string("op_2643_cast_fp16")]; tensor tile_17 = const()[name = string("tile_17"), val = tensor([1, 1])]; int32 var_2646_axis_0 = const()[name = string("op_2646_axis_0"), val = int32(1)]; tensor var_2646_cast_fp16_0, tensor var_2646_cast_fp16_1 = split(axis = var_2646_axis_0, split_sizes = tile_17, x = var_2643_cast_fp16)[name = string("op_2646_cast_fp16")]; tensor var_2649_split_sizes_0 = const()[name = string("op_2649_split_sizes_0"), val = tensor([8, 8])]; int32 var_2649_axis_0 = const()[name = string("op_2649_axis_0"), val = int32(1)]; tensor var_2649_cast_fp16_0, tensor var_2649_cast_fp16_1 = split(axis = var_2649_axis_0, split_sizes = var_2649_split_sizes_0, x = query_states_35_cast_fp16)[name = string("op_2649_cast_fp16")]; bool attn_weights_129_transpose_x_0 = const()[name = string("attn_weights_129_transpose_x_0"), val = bool(false)]; bool attn_weights_129_transpose_y_0 = const()[name = string("attn_weights_129_transpose_y_0"), val = bool(false)]; tensor attn_weights_129_cast_fp16 = matmul(transpose_x = attn_weights_129_transpose_x_0, transpose_y = attn_weights_129_transpose_y_0, x = var_2636_cast_fp16_0, y = var_2649_cast_fp16_0)[name = string("attn_weights_129_cast_fp16")]; fp16 _inversed_attn_weights_131_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_131_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_131_cast_fp16 = mul(x = attn_weights_129_cast_fp16, y = _inversed_attn_weights_131_y_0_to_fp16)[name = string("_inversed_attn_weights_131_cast_fp16")]; tensor attn_weights_133_cast_fp16 = add(x = _inversed_attn_weights_131_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_133_cast_fp16")]; int32 var_2656 = const()[name = string("op_2656"), val = int32(2)]; tensor attn_weights_135_cast_fp16 = softmax(axis = var_2656, x = attn_weights_133_cast_fp16)[name = string("attn_weights_135_cast_fp16")]; bool var_2662_transpose_x_1 = const()[name = string("op_2662_transpose_x_1"), val = bool(true)]; bool var_2662_transpose_y_1 = const()[name = string("op_2662_transpose_y_1"), val = bool(false)]; tensor var_2662_cast_fp16 = matmul(transpose_x = var_2662_transpose_x_1, transpose_y = var_2662_transpose_y_1, x = attn_weights_135_cast_fp16, y = var_2646_cast_fp16_0)[name = string("op_2662_cast_fp16")]; bool attn_weights_137_transpose_x_0 = const()[name = string("attn_weights_137_transpose_x_0"), val = bool(false)]; bool attn_weights_137_transpose_y_0 = const()[name = string("attn_weights_137_transpose_y_0"), val = bool(false)]; tensor attn_weights_137_cast_fp16 = matmul(transpose_x = attn_weights_137_transpose_x_0, transpose_y = attn_weights_137_transpose_y_0, x = var_2636_cast_fp16_1, y = var_2649_cast_fp16_1)[name = string("attn_weights_137_cast_fp16")]; fp16 _inversed_attn_weights_139_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_139_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_139_cast_fp16 = mul(x = attn_weights_137_cast_fp16, y = _inversed_attn_weights_139_y_0_to_fp16)[name = string("_inversed_attn_weights_139_cast_fp16")]; tensor attn_weights_141_cast_fp16 = add(x = _inversed_attn_weights_139_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_141_cast_fp16")]; int32 var_2668 = const()[name = string("op_2668"), val = int32(2)]; tensor attn_weights_143_cast_fp16 = softmax(axis = var_2668, x = attn_weights_141_cast_fp16)[name = string("attn_weights_143_cast_fp16")]; bool attn_output_49_transpose_x_1 = const()[name = string("attn_output_49_transpose_x_1"), val = bool(true)]; bool attn_output_49_transpose_y_1 = const()[name = string("attn_output_49_transpose_y_1"), val = bool(false)]; tensor attn_output_49_cast_fp16 = matmul(transpose_x = attn_output_49_transpose_x_1, transpose_y = attn_output_49_transpose_y_1, x = attn_weights_143_cast_fp16, y = var_2646_cast_fp16_1)[name = string("attn_output_49_cast_fp16")]; int32 var_2676 = const()[name = string("op_2676"), val = int32(1)]; bool attn_output_51_interleave_0 = const()[name = string("attn_output_51_interleave_0"), val = bool(false)]; tensor attn_output_51_cast_fp16 = concat(axis = var_2676, interleave = attn_output_51_interleave_0, values = (var_2662_cast_fp16, attn_output_49_cast_fp16))[name = string("attn_output_51_cast_fp16")]; tensor var_2680_perm_0 = const()[name = string("op_2680_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_2685 = const()[name = string("op_2685"), val = tensor([1, 1024, 1, 16])]; tensor var_2680_cast_fp16 = transpose(perm = var_2680_perm_0, x = attn_output_51_cast_fp16)[name = string("transpose_45")]; tensor x_149_cast_fp16 = reshape(shape = var_2685, x = var_2680_cast_fp16)[name = string("x_149_cast_fp16")]; string hidden_states_51_pad_type_0 = const()[name = string("hidden_states_51_pad_type_0"), val = string("valid")]; tensor hidden_states_51_strides_0 = const()[name = string("hidden_states_51_strides_0"), val = tensor([1, 1])]; tensor hidden_states_51_pad_0 = const()[name = string("hidden_states_51_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_51_dilations_0 = const()[name = string("hidden_states_51_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_51_groups_0 = const()[name = string("hidden_states_51_groups_0"), val = int32(1)]; tensor var_2692_to_fp16 = const()[name = string("op_2692_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(250172480)))]; tensor hidden_states_51_cast_fp16 = conv(dilations = hidden_states_51_dilations_0, groups = hidden_states_51_groups_0, pad = hidden_states_51_pad_0, pad_type = hidden_states_51_pad_type_0, strides = hidden_states_51_strides_0, weight = var_2692_to_fp16, x = x_149_cast_fp16)[name = string("hidden_states_51_cast_fp16")]; tensor x_151_cast_fp16 = add(x = x_143_cast_fp16, y = hidden_states_51_cast_fp16)[name = string("x_151_cast_fp16")]; int32 var_2704 = const()[name = string("op_2704"), val = int32(1)]; fp16 const_93_promoted_to_fp16 = const()[name = string("const_93_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_2707_cast_fp16 = mul(x = x_151_cast_fp16, y = const_93_promoted_to_fp16)[name = string("op_2707_cast_fp16")]; bool x_153_interleave_0 = const()[name = string("x_153_interleave_0"), val = bool(false)]; tensor x_153_cast_fp16 = concat(axis = var_2704, interleave = x_153_interleave_0, values = (x_151_cast_fp16, var_2707_cast_fp16))[name = string("x_153_cast_fp16")]; tensor out_103_axes_0 = const()[name = string("out_103_axes_0"), val = tensor([1])]; fp16 var_2717_to_fp16 = const()[name = string("op_2717_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_103_cast_fp16 = layer_norm(axes = out_103_axes_0, epsilon = var_2717_to_fp16, x = x_153_cast_fp16)[name = string("out_103_cast_fp16")]; tensor layer_layers_8_post_attention_layernorm_weight_to_fp16 = const()[name = string("layer_layers_8_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(252269696)))]; tensor out_105_cast_fp16 = mul(x = out_103_cast_fp16, y = layer_layers_8_post_attention_layernorm_weight_to_fp16)[name = string("out_105_cast_fp16")]; tensor var_2723_split_sizes_0 = const()[name = string("op_2723_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_2723_axis_0 = const()[name = string("op_2723_axis_0"), val = int32(1)]; tensor var_2723_cast_fp16_0, tensor var_2723_cast_fp16_1 = split(axis = var_2723_axis_0, split_sizes = var_2723_split_sizes_0, x = out_105_cast_fp16)[name = string("op_2723_cast_fp16")]; string input_17_pad_type_0 = const()[name = string("input_17_pad_type_0"), val = string("valid")]; tensor input_17_strides_0 = const()[name = string("input_17_strides_0"), val = tensor([1, 1])]; tensor input_17_pad_0 = const()[name = string("input_17_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_17_dilations_0 = const()[name = string("input_17_dilations_0"), val = tensor([1, 1])]; int32 input_17_groups_0 = const()[name = string("input_17_groups_0"), val = int32(1)]; tensor var_2728_to_fp16 = const()[name = string("op_2728_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(252273856)))]; tensor input_17_cast_fp16 = conv(dilations = input_17_dilations_0, groups = input_17_groups_0, pad = input_17_pad_0, pad_type = input_17_pad_type_0, strides = input_17_strides_0, weight = var_2728_to_fp16, x = var_2723_cast_fp16_0)[name = string("input_17_cast_fp16")]; tensor var_2739_cast_fp16 = silu(x = input_17_cast_fp16)[name = string("op_2739_cast_fp16")]; string var_2744_pad_type_0 = const()[name = string("op_2744_pad_type_0"), val = string("valid")]; tensor var_2744_strides_0 = const()[name = string("op_2744_strides_0"), val = tensor([1, 1])]; tensor var_2744_pad_0 = const()[name = string("op_2744_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_2744_dilations_0 = const()[name = string("op_2744_dilations_0"), val = tensor([1, 1])]; int32 var_2744_groups_0 = const()[name = string("op_2744_groups_0"), val = int32(1)]; tensor var_2727_to_fp16 = const()[name = string("op_2727_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(260662528)))]; tensor var_2744_cast_fp16 = conv(dilations = var_2744_dilations_0, groups = var_2744_groups_0, pad = var_2744_pad_0, pad_type = var_2744_pad_type_0, strides = var_2744_strides_0, weight = var_2727_to_fp16, x = var_2723_cast_fp16_0)[name = string("op_2744_cast_fp16")]; tensor x_159_cast_fp16 = mul(x = var_2739_cast_fp16, y = var_2744_cast_fp16)[name = string("x_159_cast_fp16")]; string hidden_states_53_pad_type_0 = const()[name = string("hidden_states_53_pad_type_0"), val = string("valid")]; tensor hidden_states_53_strides_0 = const()[name = string("hidden_states_53_strides_0"), val = tensor([1, 1])]; tensor hidden_states_53_pad_0 = const()[name = string("hidden_states_53_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_53_dilations_0 = const()[name = string("hidden_states_53_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_53_groups_0 = const()[name = string("hidden_states_53_groups_0"), val = int32(1)]; tensor var_2726_to_fp16 = const()[name = string("op_2726_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(269051200)))]; tensor hidden_states_53_cast_fp16 = conv(dilations = hidden_states_53_dilations_0, groups = hidden_states_53_groups_0, pad = hidden_states_53_pad_0, pad_type = hidden_states_53_pad_type_0, strides = hidden_states_53_strides_0, weight = var_2726_to_fp16, x = x_159_cast_fp16)[name = string("hidden_states_53_cast_fp16")]; tensor x_161_cast_fp16 = add(x = x_151_cast_fp16, y = hidden_states_53_cast_fp16)[name = string("x_161_cast_fp16")]; int32 var_2757 = const()[name = string("op_2757"), val = int32(1)]; fp16 const_94_promoted_to_fp16 = const()[name = string("const_94_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_2760_cast_fp16 = mul(x = x_161_cast_fp16, y = const_94_promoted_to_fp16)[name = string("op_2760_cast_fp16")]; bool x_163_interleave_0 = const()[name = string("x_163_interleave_0"), val = bool(false)]; tensor x_163_cast_fp16 = concat(axis = var_2757, interleave = x_163_interleave_0, values = (x_161_cast_fp16, var_2760_cast_fp16))[name = string("x_163_cast_fp16")]; tensor out_109_axes_0 = const()[name = string("out_109_axes_0"), val = tensor([1])]; fp16 var_2770_to_fp16 = const()[name = string("op_2770_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_109_cast_fp16 = layer_norm(axes = out_109_axes_0, epsilon = var_2770_to_fp16, x = x_163_cast_fp16)[name = string("out_109_cast_fp16")]; tensor layer_layers_9_input_layernorm_weight_to_fp16 = const()[name = string("layer_layers_9_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(277439872)))]; tensor out_111_cast_fp16 = mul(x = out_109_cast_fp16, y = layer_layers_9_input_layernorm_weight_to_fp16)[name = string("out_111_cast_fp16")]; tensor var_2776_split_sizes_0 = const()[name = string("op_2776_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_2776_axis_0 = const()[name = string("op_2776_axis_0"), val = int32(1)]; tensor var_2776_cast_fp16_0, tensor var_2776_cast_fp16_1 = split(axis = var_2776_axis_0, split_sizes = var_2776_split_sizes_0, x = out_111_cast_fp16)[name = string("op_2776_cast_fp16")]; string query_states_37_pad_type_0 = const()[name = string("query_states_37_pad_type_0"), val = string("valid")]; tensor query_states_37_strides_0 = const()[name = string("query_states_37_strides_0"), val = tensor([1, 1])]; tensor query_states_37_pad_0 = const()[name = string("query_states_37_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_states_37_dilations_0 = const()[name = string("query_states_37_dilations_0"), val = tensor([1, 1])]; int32 query_states_37_groups_0 = const()[name = string("query_states_37_groups_0"), val = int32(1)]; tensor var_2798_to_fp16 = const()[name = string("op_2798_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(277444032)))]; tensor query_states_37_cast_fp16 = conv(dilations = query_states_37_dilations_0, groups = query_states_37_groups_0, pad = query_states_37_pad_0, pad_type = query_states_37_pad_type_0, strides = query_states_37_strides_0, weight = var_2798_to_fp16, x = var_2776_cast_fp16_0)[name = string("query_states_37_cast_fp16")]; string key_states_37_pad_type_0 = const()[name = string("key_states_37_pad_type_0"), val = string("valid")]; tensor key_states_37_strides_0 = const()[name = string("key_states_37_strides_0"), val = tensor([1, 1])]; tensor key_states_37_pad_0 = const()[name = string("key_states_37_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_states_37_dilations_0 = const()[name = string("key_states_37_dilations_0"), val = tensor([1, 1])]; int32 key_states_37_groups_0 = const()[name = string("key_states_37_groups_0"), val = int32(1)]; tensor var_2809_to_fp16 = const()[name = string("op_2809_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(279541248)))]; tensor key_states_37_cast_fp16 = conv(dilations = key_states_37_dilations_0, groups = key_states_37_groups_0, pad = key_states_37_pad_0, pad_type = key_states_37_pad_type_0, strides = key_states_37_strides_0, weight = var_2809_to_fp16, x = var_2776_cast_fp16_0)[name = string("key_states_37_cast_fp16")]; string value_states_37_pad_type_0 = const()[name = string("value_states_37_pad_type_0"), val = string("valid")]; tensor value_states_37_strides_0 = const()[name = string("value_states_37_strides_0"), val = tensor([1, 1])]; tensor value_states_37_pad_0 = const()[name = string("value_states_37_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_states_37_dilations_0 = const()[name = string("value_states_37_dilations_0"), val = tensor([1, 1])]; int32 value_states_37_groups_0 = const()[name = string("value_states_37_groups_0"), val = int32(1)]; tensor var_2820_to_fp16 = const()[name = string("op_2820_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(279803456)))]; tensor value_states_37_cast_fp16 = conv(dilations = value_states_37_dilations_0, groups = value_states_37_groups_0, pad = value_states_37_pad_0, pad_type = value_states_37_pad_type_0, strides = value_states_37_strides_0, weight = var_2820_to_fp16, x = var_2776_cast_fp16_0)[name = string("value_states_37_cast_fp16")]; tensor var_2828 = const()[name = string("op_2828"), val = tensor([1, 16, 64, 16])]; tensor embed_37_cast_fp16 = reshape(shape = var_2828, x = query_states_37_cast_fp16)[name = string("embed_37_cast_fp16")]; tensor var_2832 = const()[name = string("op_2832"), val = tensor([1, 2, 64, 16])]; tensor var_2833_cast_fp16 = reshape(shape = var_2832, x = key_states_37_cast_fp16)[name = string("op_2833_cast_fp16")]; tensor embed_39_perm_0 = const()[name = string("embed_39_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_2839 = const()[name = string("op_2839"), val = tensor([1, 2, 64, 16])]; tensor var_2840_cast_fp16 = reshape(shape = var_2839, x = value_states_37_cast_fp16)[name = string("op_2840_cast_fp16")]; tensor value_states_39_perm_0 = const()[name = string("value_states_39_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_2844_cast_fp16 = mul(x = embed_37_cast_fp16, y = cos_1_cast_fp16)[name = string("op_2844_cast_fp16")]; tensor var_2845_split_sizes_0 = const()[name = string("op_2845_split_sizes_0"), val = tensor([32, 32])]; int32 var_2845_axis_0 = const()[name = string("op_2845_axis_0"), val = int32(-2)]; tensor var_2845_cast_fp16_0, tensor var_2845_cast_fp16_1 = split(axis = var_2845_axis_0, split_sizes = var_2845_split_sizes_0, x = embed_37_cast_fp16)[name = string("op_2845_cast_fp16")]; fp16 const_97_promoted_to_fp16 = const()[name = string("const_97_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_2847_cast_fp16 = mul(x = var_2845_cast_fp16_1, y = const_97_promoted_to_fp16)[name = string("op_2847_cast_fp16")]; int32 var_2849 = const()[name = string("op_2849"), val = int32(-2)]; bool var_2850_interleave_0 = const()[name = string("op_2850_interleave_0"), val = bool(false)]; tensor var_2850_cast_fp16 = concat(axis = var_2849, interleave = var_2850_interleave_0, values = (var_2847_cast_fp16, var_2845_cast_fp16_0))[name = string("op_2850_cast_fp16")]; tensor var_2851_cast_fp16 = mul(x = var_2850_cast_fp16, y = sin_1_cast_fp16)[name = string("op_2851_cast_fp16")]; tensor query_states_39_cast_fp16 = add(x = var_2844_cast_fp16, y = var_2851_cast_fp16)[name = string("query_states_39_cast_fp16")]; tensor embed_39_cast_fp16 = transpose(perm = embed_39_perm_0, x = var_2833_cast_fp16)[name = string("transpose_44")]; tensor var_2854_cast_fp16 = mul(x = embed_39_cast_fp16, y = cos_cast_fp16)[name = string("op_2854_cast_fp16")]; tensor var_2855_split_sizes_0 = const()[name = string("op_2855_split_sizes_0"), val = tensor([32, 32])]; int32 var_2855_axis_0 = const()[name = string("op_2855_axis_0"), val = int32(-1)]; tensor var_2855_cast_fp16_0, tensor var_2855_cast_fp16_1 = split(axis = var_2855_axis_0, split_sizes = var_2855_split_sizes_0, x = embed_39_cast_fp16)[name = string("op_2855_cast_fp16")]; fp16 const_98_promoted_to_fp16 = const()[name = string("const_98_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_2857_cast_fp16 = mul(x = var_2855_cast_fp16_1, y = const_98_promoted_to_fp16)[name = string("op_2857_cast_fp16")]; int32 var_2859 = const()[name = string("op_2859"), val = int32(-1)]; bool var_2860_interleave_0 = const()[name = string("op_2860_interleave_0"), val = bool(false)]; tensor var_2860_cast_fp16 = concat(axis = var_2859, interleave = var_2860_interleave_0, values = (var_2857_cast_fp16, var_2855_cast_fp16_0))[name = string("op_2860_cast_fp16")]; tensor var_2861_cast_fp16 = mul(x = var_2860_cast_fp16, y = sin_cast_fp16)[name = string("op_2861_cast_fp16")]; tensor key_states_39_cast_fp16 = add(x = var_2854_cast_fp16, y = var_2861_cast_fp16)[name = string("key_states_39_cast_fp16")]; tensor expand_dims_91 = const()[name = string("expand_dims_91"), val = tensor([9])]; tensor expand_dims_92 = const()[name = string("expand_dims_92"), val = tensor([0])]; tensor expand_dims_94 = const()[name = string("expand_dims_94"), val = tensor([10])]; tensor concat_75_values3_0 = const()[name = string("concat_75_values3_0"), val = tensor([0])]; int32 concat_75_axis_0 = const()[name = string("concat_75_axis_0"), val = int32(0)]; bool concat_75_interleave_0 = const()[name = string("concat_75_interleave_0"), val = bool(false)]; tensor concat_75 = concat(axis = concat_75_axis_0, interleave = concat_75_interleave_0, values = (expand_dims_91, expand_dims_92, position_id, concat_75_values3_0))[name = string("concat_75")]; tensor concat_76_values1_0 = const()[name = string("concat_76_values1_0"), val = tensor([0])]; tensor concat_76_values3_0 = const()[name = string("concat_76_values3_0"), val = tensor([0])]; int32 concat_76_axis_0 = const()[name = string("concat_76_axis_0"), val = int32(0)]; bool concat_76_interleave_0 = const()[name = string("concat_76_interleave_0"), val = bool(false)]; tensor concat_76 = concat(axis = concat_76_axis_0, interleave = concat_76_interleave_0, values = (expand_dims_94, concat_76_values1_0, var_426, concat_76_values3_0))[name = string("concat_76")]; tensor key_cache_internal_tensor_assign_10_stride_0 = const()[name = string("key_cache_internal_tensor_assign_10_stride_0"), val = tensor([1, 1, 1, 1])]; tensor key_cache_internal_tensor_assign_10_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_10_begin_mask_0"), val = tensor([false, false, false, true])]; tensor key_cache_internal_tensor_assign_10_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_10_end_mask_0"), val = tensor([false, true, false, true])]; tensor key_cache_internal_tensor_assign_10_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_10_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor key_cache_internal_tensor_assign_10_cast_fp16 = slice_update(begin = concat_75, begin_mask = key_cache_internal_tensor_assign_10_begin_mask_0, end = concat_76, end_mask = key_cache_internal_tensor_assign_10_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_10_squeeze_mask_0, stride = key_cache_internal_tensor_assign_10_stride_0, update = key_states_39_cast_fp16, x = coreml_update_state_64)[name = string("key_cache_internal_tensor_assign_10_cast_fp16")]; write_state(data = key_cache_internal_tensor_assign_10_cast_fp16, input = key_cache)[name = string("coreml_update_state_280_write_state")]; tensor coreml_update_state_66 = read_state(input = key_cache)[name = string("coreml_update_state_280")]; tensor value_cache_internal_tensor_assign_10_stride_0 = const()[name = string("value_cache_internal_tensor_assign_10_stride_0"), val = tensor([1, 1, 1, 1])]; tensor value_cache_internal_tensor_assign_10_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_10_begin_mask_0"), val = tensor([false, false, false, true])]; tensor value_cache_internal_tensor_assign_10_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_10_end_mask_0"), val = tensor([false, true, false, true])]; tensor value_cache_internal_tensor_assign_10_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_10_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor value_states_39_cast_fp16 = transpose(perm = value_states_39_perm_0, x = var_2840_cast_fp16)[name = string("transpose_43")]; tensor value_cache_internal_tensor_assign_10_cast_fp16 = slice_update(begin = concat_75, begin_mask = value_cache_internal_tensor_assign_10_begin_mask_0, end = concat_76, end_mask = value_cache_internal_tensor_assign_10_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_10_squeeze_mask_0, stride = value_cache_internal_tensor_assign_10_stride_0, update = value_states_39_cast_fp16, x = coreml_update_state_65)[name = string("value_cache_internal_tensor_assign_10_cast_fp16")]; write_state(data = value_cache_internal_tensor_assign_10_cast_fp16, input = value_cache)[name = string("coreml_update_state_281_write_state")]; tensor coreml_update_state_67 = read_state(input = value_cache)[name = string("coreml_update_state_281")]; tensor var_2904_begin_0 = const()[name = string("op_2904_begin_0"), val = tensor([9, 0, 0, 0])]; tensor var_2904_end_0 = const()[name = string("op_2904_end_0"), val = tensor([10, 2, 2048, 64])]; tensor var_2904_end_mask_0 = const()[name = string("op_2904_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_2904_cast_fp16 = slice_by_index(begin = var_2904_begin_0, end = var_2904_end_0, end_mask = var_2904_end_mask_0, x = coreml_update_state_66)[name = string("op_2904_cast_fp16")]; tensor tile_18 = const()[name = string("tile_18"), val = tensor([1, 1])]; int32 var_2907_axis_0 = const()[name = string("op_2907_axis_0"), val = int32(1)]; tensor var_2907_cast_fp16_0, tensor var_2907_cast_fp16_1 = split(axis = var_2907_axis_0, split_sizes = tile_18, x = var_2904_cast_fp16)[name = string("op_2907_cast_fp16")]; tensor var_2914_begin_0 = const()[name = string("op_2914_begin_0"), val = tensor([9, 0, 0, 0])]; tensor var_2914_end_0 = const()[name = string("op_2914_end_0"), val = tensor([10, 2, 2048, 64])]; tensor var_2914_end_mask_0 = const()[name = string("op_2914_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_2914_cast_fp16 = slice_by_index(begin = var_2914_begin_0, end = var_2914_end_0, end_mask = var_2914_end_mask_0, x = coreml_update_state_67)[name = string("op_2914_cast_fp16")]; tensor tile_19 = const()[name = string("tile_19"), val = tensor([1, 1])]; int32 var_2917_axis_0 = const()[name = string("op_2917_axis_0"), val = int32(1)]; tensor var_2917_cast_fp16_0, tensor var_2917_cast_fp16_1 = split(axis = var_2917_axis_0, split_sizes = tile_19, x = var_2914_cast_fp16)[name = string("op_2917_cast_fp16")]; tensor var_2920_split_sizes_0 = const()[name = string("op_2920_split_sizes_0"), val = tensor([8, 8])]; int32 var_2920_axis_0 = const()[name = string("op_2920_axis_0"), val = int32(1)]; tensor var_2920_cast_fp16_0, tensor var_2920_cast_fp16_1 = split(axis = var_2920_axis_0, split_sizes = var_2920_split_sizes_0, x = query_states_39_cast_fp16)[name = string("op_2920_cast_fp16")]; bool attn_weights_145_transpose_x_0 = const()[name = string("attn_weights_145_transpose_x_0"), val = bool(false)]; bool attn_weights_145_transpose_y_0 = const()[name = string("attn_weights_145_transpose_y_0"), val = bool(false)]; tensor attn_weights_145_cast_fp16 = matmul(transpose_x = attn_weights_145_transpose_x_0, transpose_y = attn_weights_145_transpose_y_0, x = var_2907_cast_fp16_0, y = var_2920_cast_fp16_0)[name = string("attn_weights_145_cast_fp16")]; fp16 _inversed_attn_weights_147_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_147_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_147_cast_fp16 = mul(x = attn_weights_145_cast_fp16, y = _inversed_attn_weights_147_y_0_to_fp16)[name = string("_inversed_attn_weights_147_cast_fp16")]; tensor attn_weights_149_cast_fp16 = add(x = _inversed_attn_weights_147_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_149_cast_fp16")]; int32 var_2927 = const()[name = string("op_2927"), val = int32(2)]; tensor attn_weights_151_cast_fp16 = softmax(axis = var_2927, x = attn_weights_149_cast_fp16)[name = string("attn_weights_151_cast_fp16")]; bool var_2933_transpose_x_1 = const()[name = string("op_2933_transpose_x_1"), val = bool(true)]; bool var_2933_transpose_y_1 = const()[name = string("op_2933_transpose_y_1"), val = bool(false)]; tensor var_2933_cast_fp16 = matmul(transpose_x = var_2933_transpose_x_1, transpose_y = var_2933_transpose_y_1, x = attn_weights_151_cast_fp16, y = var_2917_cast_fp16_0)[name = string("op_2933_cast_fp16")]; bool attn_weights_153_transpose_x_0 = const()[name = string("attn_weights_153_transpose_x_0"), val = bool(false)]; bool attn_weights_153_transpose_y_0 = const()[name = string("attn_weights_153_transpose_y_0"), val = bool(false)]; tensor attn_weights_153_cast_fp16 = matmul(transpose_x = attn_weights_153_transpose_x_0, transpose_y = attn_weights_153_transpose_y_0, x = var_2907_cast_fp16_1, y = var_2920_cast_fp16_1)[name = string("attn_weights_153_cast_fp16")]; fp16 _inversed_attn_weights_155_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_155_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_155_cast_fp16 = mul(x = attn_weights_153_cast_fp16, y = _inversed_attn_weights_155_y_0_to_fp16)[name = string("_inversed_attn_weights_155_cast_fp16")]; tensor attn_weights_157_cast_fp16 = add(x = _inversed_attn_weights_155_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_157_cast_fp16")]; int32 var_2939 = const()[name = string("op_2939"), val = int32(2)]; tensor attn_weights_159_cast_fp16 = softmax(axis = var_2939, x = attn_weights_157_cast_fp16)[name = string("attn_weights_159_cast_fp16")]; bool attn_output_55_transpose_x_1 = const()[name = string("attn_output_55_transpose_x_1"), val = bool(true)]; bool attn_output_55_transpose_y_1 = const()[name = string("attn_output_55_transpose_y_1"), val = bool(false)]; tensor attn_output_55_cast_fp16 = matmul(transpose_x = attn_output_55_transpose_x_1, transpose_y = attn_output_55_transpose_y_1, x = attn_weights_159_cast_fp16, y = var_2917_cast_fp16_1)[name = string("attn_output_55_cast_fp16")]; int32 var_2947 = const()[name = string("op_2947"), val = int32(1)]; bool attn_output_57_interleave_0 = const()[name = string("attn_output_57_interleave_0"), val = bool(false)]; tensor attn_output_57_cast_fp16 = concat(axis = var_2947, interleave = attn_output_57_interleave_0, values = (var_2933_cast_fp16, attn_output_55_cast_fp16))[name = string("attn_output_57_cast_fp16")]; tensor var_2951_perm_0 = const()[name = string("op_2951_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_2956 = const()[name = string("op_2956"), val = tensor([1, 1024, 1, 16])]; tensor var_2951_cast_fp16 = transpose(perm = var_2951_perm_0, x = attn_output_57_cast_fp16)[name = string("transpose_42")]; tensor x_167_cast_fp16 = reshape(shape = var_2956, x = var_2951_cast_fp16)[name = string("x_167_cast_fp16")]; string hidden_states_57_pad_type_0 = const()[name = string("hidden_states_57_pad_type_0"), val = string("valid")]; tensor hidden_states_57_strides_0 = const()[name = string("hidden_states_57_strides_0"), val = tensor([1, 1])]; tensor hidden_states_57_pad_0 = const()[name = string("hidden_states_57_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_57_dilations_0 = const()[name = string("hidden_states_57_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_57_groups_0 = const()[name = string("hidden_states_57_groups_0"), val = int32(1)]; tensor var_2963_to_fp16 = const()[name = string("op_2963_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(280065664)))]; tensor hidden_states_57_cast_fp16 = conv(dilations = hidden_states_57_dilations_0, groups = hidden_states_57_groups_0, pad = hidden_states_57_pad_0, pad_type = hidden_states_57_pad_type_0, strides = hidden_states_57_strides_0, weight = var_2963_to_fp16, x = x_167_cast_fp16)[name = string("hidden_states_57_cast_fp16")]; tensor x_169_cast_fp16 = add(x = x_161_cast_fp16, y = hidden_states_57_cast_fp16)[name = string("x_169_cast_fp16")]; int32 var_2975 = const()[name = string("op_2975"), val = int32(1)]; fp16 const_103_promoted_to_fp16 = const()[name = string("const_103_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_2978_cast_fp16 = mul(x = x_169_cast_fp16, y = const_103_promoted_to_fp16)[name = string("op_2978_cast_fp16")]; bool x_171_interleave_0 = const()[name = string("x_171_interleave_0"), val = bool(false)]; tensor x_171_cast_fp16 = concat(axis = var_2975, interleave = x_171_interleave_0, values = (x_169_cast_fp16, var_2978_cast_fp16))[name = string("x_171_cast_fp16")]; tensor out_115_axes_0 = const()[name = string("out_115_axes_0"), val = tensor([1])]; fp16 var_2988_to_fp16 = const()[name = string("op_2988_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_115_cast_fp16 = layer_norm(axes = out_115_axes_0, epsilon = var_2988_to_fp16, x = x_171_cast_fp16)[name = string("out_115_cast_fp16")]; tensor layer_layers_9_post_attention_layernorm_weight_to_fp16 = const()[name = string("layer_layers_9_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(282162880)))]; tensor out_117_cast_fp16 = mul(x = out_115_cast_fp16, y = layer_layers_9_post_attention_layernorm_weight_to_fp16)[name = string("out_117_cast_fp16")]; tensor var_2994_split_sizes_0 = const()[name = string("op_2994_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_2994_axis_0 = const()[name = string("op_2994_axis_0"), val = int32(1)]; tensor var_2994_cast_fp16_0, tensor var_2994_cast_fp16_1 = split(axis = var_2994_axis_0, split_sizes = var_2994_split_sizes_0, x = out_117_cast_fp16)[name = string("op_2994_cast_fp16")]; string input_19_pad_type_0 = const()[name = string("input_19_pad_type_0"), val = string("valid")]; tensor input_19_strides_0 = const()[name = string("input_19_strides_0"), val = tensor([1, 1])]; tensor input_19_pad_0 = const()[name = string("input_19_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_19_dilations_0 = const()[name = string("input_19_dilations_0"), val = tensor([1, 1])]; int32 input_19_groups_0 = const()[name = string("input_19_groups_0"), val = int32(1)]; tensor var_2999_to_fp16 = const()[name = string("op_2999_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(282167040)))]; tensor input_19_cast_fp16 = conv(dilations = input_19_dilations_0, groups = input_19_groups_0, pad = input_19_pad_0, pad_type = input_19_pad_type_0, strides = input_19_strides_0, weight = var_2999_to_fp16, x = var_2994_cast_fp16_0)[name = string("input_19_cast_fp16")]; tensor var_3010_cast_fp16 = silu(x = input_19_cast_fp16)[name = string("op_3010_cast_fp16")]; string var_3015_pad_type_0 = const()[name = string("op_3015_pad_type_0"), val = string("valid")]; tensor var_3015_strides_0 = const()[name = string("op_3015_strides_0"), val = tensor([1, 1])]; tensor var_3015_pad_0 = const()[name = string("op_3015_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_3015_dilations_0 = const()[name = string("op_3015_dilations_0"), val = tensor([1, 1])]; int32 var_3015_groups_0 = const()[name = string("op_3015_groups_0"), val = int32(1)]; tensor var_2998_to_fp16 = const()[name = string("op_2998_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(290555712)))]; tensor var_3015_cast_fp16 = conv(dilations = var_3015_dilations_0, groups = var_3015_groups_0, pad = var_3015_pad_0, pad_type = var_3015_pad_type_0, strides = var_3015_strides_0, weight = var_2998_to_fp16, x = var_2994_cast_fp16_0)[name = string("op_3015_cast_fp16")]; tensor x_177_cast_fp16 = mul(x = var_3010_cast_fp16, y = var_3015_cast_fp16)[name = string("x_177_cast_fp16")]; string hidden_states_59_pad_type_0 = const()[name = string("hidden_states_59_pad_type_0"), val = string("valid")]; tensor hidden_states_59_strides_0 = const()[name = string("hidden_states_59_strides_0"), val = tensor([1, 1])]; tensor hidden_states_59_pad_0 = const()[name = string("hidden_states_59_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_59_dilations_0 = const()[name = string("hidden_states_59_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_59_groups_0 = const()[name = string("hidden_states_59_groups_0"), val = int32(1)]; tensor var_2997_to_fp16 = const()[name = string("op_2997_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(298944384)))]; tensor hidden_states_59_cast_fp16 = conv(dilations = hidden_states_59_dilations_0, groups = hidden_states_59_groups_0, pad = hidden_states_59_pad_0, pad_type = hidden_states_59_pad_type_0, strides = hidden_states_59_strides_0, weight = var_2997_to_fp16, x = x_177_cast_fp16)[name = string("hidden_states_59_cast_fp16")]; tensor x_179_cast_fp16 = add(x = x_169_cast_fp16, y = hidden_states_59_cast_fp16)[name = string("x_179_cast_fp16")]; int32 var_3028 = const()[name = string("op_3028"), val = int32(1)]; fp16 const_104_promoted_to_fp16 = const()[name = string("const_104_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_3031_cast_fp16 = mul(x = x_179_cast_fp16, y = const_104_promoted_to_fp16)[name = string("op_3031_cast_fp16")]; bool x_181_interleave_0 = const()[name = string("x_181_interleave_0"), val = bool(false)]; tensor x_181_cast_fp16 = concat(axis = var_3028, interleave = x_181_interleave_0, values = (x_179_cast_fp16, var_3031_cast_fp16))[name = string("x_181_cast_fp16")]; tensor out_121_axes_0 = const()[name = string("out_121_axes_0"), val = tensor([1])]; fp16 var_3041_to_fp16 = const()[name = string("op_3041_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_121_cast_fp16 = layer_norm(axes = out_121_axes_0, epsilon = var_3041_to_fp16, x = x_181_cast_fp16)[name = string("out_121_cast_fp16")]; tensor layer_layers_10_input_layernorm_weight_to_fp16 = const()[name = string("layer_layers_10_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(307333056)))]; tensor out_123_cast_fp16 = mul(x = out_121_cast_fp16, y = layer_layers_10_input_layernorm_weight_to_fp16)[name = string("out_123_cast_fp16")]; tensor var_3047_split_sizes_0 = const()[name = string("op_3047_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_3047_axis_0 = const()[name = string("op_3047_axis_0"), val = int32(1)]; tensor var_3047_cast_fp16_0, tensor var_3047_cast_fp16_1 = split(axis = var_3047_axis_0, split_sizes = var_3047_split_sizes_0, x = out_123_cast_fp16)[name = string("op_3047_cast_fp16")]; string query_states_41_pad_type_0 = const()[name = string("query_states_41_pad_type_0"), val = string("valid")]; tensor query_states_41_strides_0 = const()[name = string("query_states_41_strides_0"), val = tensor([1, 1])]; tensor query_states_41_pad_0 = const()[name = string("query_states_41_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_states_41_dilations_0 = const()[name = string("query_states_41_dilations_0"), val = tensor([1, 1])]; int32 query_states_41_groups_0 = const()[name = string("query_states_41_groups_0"), val = int32(1)]; tensor var_3069_to_fp16 = const()[name = string("op_3069_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(307337216)))]; tensor query_states_41_cast_fp16 = conv(dilations = query_states_41_dilations_0, groups = query_states_41_groups_0, pad = query_states_41_pad_0, pad_type = query_states_41_pad_type_0, strides = query_states_41_strides_0, weight = var_3069_to_fp16, x = var_3047_cast_fp16_0)[name = string("query_states_41_cast_fp16")]; string key_states_41_pad_type_0 = const()[name = string("key_states_41_pad_type_0"), val = string("valid")]; tensor key_states_41_strides_0 = const()[name = string("key_states_41_strides_0"), val = tensor([1, 1])]; tensor key_states_41_pad_0 = const()[name = string("key_states_41_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_states_41_dilations_0 = const()[name = string("key_states_41_dilations_0"), val = tensor([1, 1])]; int32 key_states_41_groups_0 = const()[name = string("key_states_41_groups_0"), val = int32(1)]; tensor var_3080_to_fp16 = const()[name = string("op_3080_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(309434432)))]; tensor key_states_41_cast_fp16 = conv(dilations = key_states_41_dilations_0, groups = key_states_41_groups_0, pad = key_states_41_pad_0, pad_type = key_states_41_pad_type_0, strides = key_states_41_strides_0, weight = var_3080_to_fp16, x = var_3047_cast_fp16_0)[name = string("key_states_41_cast_fp16")]; string value_states_41_pad_type_0 = const()[name = string("value_states_41_pad_type_0"), val = string("valid")]; tensor value_states_41_strides_0 = const()[name = string("value_states_41_strides_0"), val = tensor([1, 1])]; tensor value_states_41_pad_0 = const()[name = string("value_states_41_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_states_41_dilations_0 = const()[name = string("value_states_41_dilations_0"), val = tensor([1, 1])]; int32 value_states_41_groups_0 = const()[name = string("value_states_41_groups_0"), val = int32(1)]; tensor var_3091_to_fp16 = const()[name = string("op_3091_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(309696640)))]; tensor value_states_41_cast_fp16 = conv(dilations = value_states_41_dilations_0, groups = value_states_41_groups_0, pad = value_states_41_pad_0, pad_type = value_states_41_pad_type_0, strides = value_states_41_strides_0, weight = var_3091_to_fp16, x = var_3047_cast_fp16_0)[name = string("value_states_41_cast_fp16")]; tensor var_3099 = const()[name = string("op_3099"), val = tensor([1, 16, 64, 16])]; tensor embed_41_cast_fp16 = reshape(shape = var_3099, x = query_states_41_cast_fp16)[name = string("embed_41_cast_fp16")]; tensor var_3103 = const()[name = string("op_3103"), val = tensor([1, 2, 64, 16])]; tensor var_3104_cast_fp16 = reshape(shape = var_3103, x = key_states_41_cast_fp16)[name = string("op_3104_cast_fp16")]; tensor embed_43_perm_0 = const()[name = string("embed_43_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_3110 = const()[name = string("op_3110"), val = tensor([1, 2, 64, 16])]; tensor var_3111_cast_fp16 = reshape(shape = var_3110, x = value_states_41_cast_fp16)[name = string("op_3111_cast_fp16")]; tensor value_states_43_perm_0 = const()[name = string("value_states_43_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_3115_cast_fp16 = mul(x = embed_41_cast_fp16, y = cos_1_cast_fp16)[name = string("op_3115_cast_fp16")]; tensor var_3116_split_sizes_0 = const()[name = string("op_3116_split_sizes_0"), val = tensor([32, 32])]; int32 var_3116_axis_0 = const()[name = string("op_3116_axis_0"), val = int32(-2)]; tensor var_3116_cast_fp16_0, tensor var_3116_cast_fp16_1 = split(axis = var_3116_axis_0, split_sizes = var_3116_split_sizes_0, x = embed_41_cast_fp16)[name = string("op_3116_cast_fp16")]; fp16 const_107_promoted_to_fp16 = const()[name = string("const_107_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_3118_cast_fp16 = mul(x = var_3116_cast_fp16_1, y = const_107_promoted_to_fp16)[name = string("op_3118_cast_fp16")]; int32 var_3120 = const()[name = string("op_3120"), val = int32(-2)]; bool var_3121_interleave_0 = const()[name = string("op_3121_interleave_0"), val = bool(false)]; tensor var_3121_cast_fp16 = concat(axis = var_3120, interleave = var_3121_interleave_0, values = (var_3118_cast_fp16, var_3116_cast_fp16_0))[name = string("op_3121_cast_fp16")]; tensor var_3122_cast_fp16 = mul(x = var_3121_cast_fp16, y = sin_1_cast_fp16)[name = string("op_3122_cast_fp16")]; tensor query_states_43_cast_fp16 = add(x = var_3115_cast_fp16, y = var_3122_cast_fp16)[name = string("query_states_43_cast_fp16")]; tensor embed_43_cast_fp16 = transpose(perm = embed_43_perm_0, x = var_3104_cast_fp16)[name = string("transpose_41")]; tensor var_3125_cast_fp16 = mul(x = embed_43_cast_fp16, y = cos_cast_fp16)[name = string("op_3125_cast_fp16")]; tensor var_3126_split_sizes_0 = const()[name = string("op_3126_split_sizes_0"), val = tensor([32, 32])]; int32 var_3126_axis_0 = const()[name = string("op_3126_axis_0"), val = int32(-1)]; tensor var_3126_cast_fp16_0, tensor var_3126_cast_fp16_1 = split(axis = var_3126_axis_0, split_sizes = var_3126_split_sizes_0, x = embed_43_cast_fp16)[name = string("op_3126_cast_fp16")]; fp16 const_108_promoted_to_fp16 = const()[name = string("const_108_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_3128_cast_fp16 = mul(x = var_3126_cast_fp16_1, y = const_108_promoted_to_fp16)[name = string("op_3128_cast_fp16")]; int32 var_3130 = const()[name = string("op_3130"), val = int32(-1)]; bool var_3131_interleave_0 = const()[name = string("op_3131_interleave_0"), val = bool(false)]; tensor var_3131_cast_fp16 = concat(axis = var_3130, interleave = var_3131_interleave_0, values = (var_3128_cast_fp16, var_3126_cast_fp16_0))[name = string("op_3131_cast_fp16")]; tensor var_3132_cast_fp16 = mul(x = var_3131_cast_fp16, y = sin_cast_fp16)[name = string("op_3132_cast_fp16")]; tensor key_states_43_cast_fp16 = add(x = var_3125_cast_fp16, y = var_3132_cast_fp16)[name = string("key_states_43_cast_fp16")]; tensor expand_dims_101 = const()[name = string("expand_dims_101"), val = tensor([10])]; tensor expand_dims_102 = const()[name = string("expand_dims_102"), val = tensor([0])]; tensor expand_dims_104 = const()[name = string("expand_dims_104"), val = tensor([11])]; tensor concat_83_values3_0 = const()[name = string("concat_83_values3_0"), val = tensor([0])]; int32 concat_83_axis_0 = const()[name = string("concat_83_axis_0"), val = int32(0)]; bool concat_83_interleave_0 = const()[name = string("concat_83_interleave_0"), val = bool(false)]; tensor concat_83 = concat(axis = concat_83_axis_0, interleave = concat_83_interleave_0, values = (expand_dims_101, expand_dims_102, position_id, concat_83_values3_0))[name = string("concat_83")]; tensor concat_84_values1_0 = const()[name = string("concat_84_values1_0"), val = tensor([0])]; tensor concat_84_values3_0 = const()[name = string("concat_84_values3_0"), val = tensor([0])]; int32 concat_84_axis_0 = const()[name = string("concat_84_axis_0"), val = int32(0)]; bool concat_84_interleave_0 = const()[name = string("concat_84_interleave_0"), val = bool(false)]; tensor concat_84 = concat(axis = concat_84_axis_0, interleave = concat_84_interleave_0, values = (expand_dims_104, concat_84_values1_0, var_426, concat_84_values3_0))[name = string("concat_84")]; tensor key_cache_internal_tensor_assign_11_stride_0 = const()[name = string("key_cache_internal_tensor_assign_11_stride_0"), val = tensor([1, 1, 1, 1])]; tensor key_cache_internal_tensor_assign_11_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_11_begin_mask_0"), val = tensor([false, false, false, true])]; tensor key_cache_internal_tensor_assign_11_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_11_end_mask_0"), val = tensor([false, true, false, true])]; tensor key_cache_internal_tensor_assign_11_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_11_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor key_cache_internal_tensor_assign_11_cast_fp16 = slice_update(begin = concat_83, begin_mask = key_cache_internal_tensor_assign_11_begin_mask_0, end = concat_84, end_mask = key_cache_internal_tensor_assign_11_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_11_squeeze_mask_0, stride = key_cache_internal_tensor_assign_11_stride_0, update = key_states_43_cast_fp16, x = coreml_update_state_66)[name = string("key_cache_internal_tensor_assign_11_cast_fp16")]; write_state(data = key_cache_internal_tensor_assign_11_cast_fp16, input = key_cache)[name = string("coreml_update_state_282_write_state")]; tensor coreml_update_state_68 = read_state(input = key_cache)[name = string("coreml_update_state_282")]; tensor value_cache_internal_tensor_assign_11_stride_0 = const()[name = string("value_cache_internal_tensor_assign_11_stride_0"), val = tensor([1, 1, 1, 1])]; tensor value_cache_internal_tensor_assign_11_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_11_begin_mask_0"), val = tensor([false, false, false, true])]; tensor value_cache_internal_tensor_assign_11_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_11_end_mask_0"), val = tensor([false, true, false, true])]; tensor value_cache_internal_tensor_assign_11_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_11_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor value_states_43_cast_fp16 = transpose(perm = value_states_43_perm_0, x = var_3111_cast_fp16)[name = string("transpose_40")]; tensor value_cache_internal_tensor_assign_11_cast_fp16 = slice_update(begin = concat_83, begin_mask = value_cache_internal_tensor_assign_11_begin_mask_0, end = concat_84, end_mask = value_cache_internal_tensor_assign_11_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_11_squeeze_mask_0, stride = value_cache_internal_tensor_assign_11_stride_0, update = value_states_43_cast_fp16, x = coreml_update_state_67)[name = string("value_cache_internal_tensor_assign_11_cast_fp16")]; write_state(data = value_cache_internal_tensor_assign_11_cast_fp16, input = value_cache)[name = string("coreml_update_state_283_write_state")]; tensor coreml_update_state_69 = read_state(input = value_cache)[name = string("coreml_update_state_283")]; tensor var_3175_begin_0 = const()[name = string("op_3175_begin_0"), val = tensor([10, 0, 0, 0])]; tensor var_3175_end_0 = const()[name = string("op_3175_end_0"), val = tensor([11, 2, 2048, 64])]; tensor var_3175_end_mask_0 = const()[name = string("op_3175_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_3175_cast_fp16 = slice_by_index(begin = var_3175_begin_0, end = var_3175_end_0, end_mask = var_3175_end_mask_0, x = coreml_update_state_68)[name = string("op_3175_cast_fp16")]; tensor tile_20 = const()[name = string("tile_20"), val = tensor([1, 1])]; int32 var_3178_axis_0 = const()[name = string("op_3178_axis_0"), val = int32(1)]; tensor var_3178_cast_fp16_0, tensor var_3178_cast_fp16_1 = split(axis = var_3178_axis_0, split_sizes = tile_20, x = var_3175_cast_fp16)[name = string("op_3178_cast_fp16")]; tensor var_3185_begin_0 = const()[name = string("op_3185_begin_0"), val = tensor([10, 0, 0, 0])]; tensor var_3185_end_0 = const()[name = string("op_3185_end_0"), val = tensor([11, 2, 2048, 64])]; tensor var_3185_end_mask_0 = const()[name = string("op_3185_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_3185_cast_fp16 = slice_by_index(begin = var_3185_begin_0, end = var_3185_end_0, end_mask = var_3185_end_mask_0, x = coreml_update_state_69)[name = string("op_3185_cast_fp16")]; tensor tile_21 = const()[name = string("tile_21"), val = tensor([1, 1])]; int32 var_3188_axis_0 = const()[name = string("op_3188_axis_0"), val = int32(1)]; tensor var_3188_cast_fp16_0, tensor var_3188_cast_fp16_1 = split(axis = var_3188_axis_0, split_sizes = tile_21, x = var_3185_cast_fp16)[name = string("op_3188_cast_fp16")]; tensor var_3191_split_sizes_0 = const()[name = string("op_3191_split_sizes_0"), val = tensor([8, 8])]; int32 var_3191_axis_0 = const()[name = string("op_3191_axis_0"), val = int32(1)]; tensor var_3191_cast_fp16_0, tensor var_3191_cast_fp16_1 = split(axis = var_3191_axis_0, split_sizes = var_3191_split_sizes_0, x = query_states_43_cast_fp16)[name = string("op_3191_cast_fp16")]; bool attn_weights_161_transpose_x_0 = const()[name = string("attn_weights_161_transpose_x_0"), val = bool(false)]; bool attn_weights_161_transpose_y_0 = const()[name = string("attn_weights_161_transpose_y_0"), val = bool(false)]; tensor attn_weights_161_cast_fp16 = matmul(transpose_x = attn_weights_161_transpose_x_0, transpose_y = attn_weights_161_transpose_y_0, x = var_3178_cast_fp16_0, y = var_3191_cast_fp16_0)[name = string("attn_weights_161_cast_fp16")]; fp16 _inversed_attn_weights_163_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_163_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_163_cast_fp16 = mul(x = attn_weights_161_cast_fp16, y = _inversed_attn_weights_163_y_0_to_fp16)[name = string("_inversed_attn_weights_163_cast_fp16")]; tensor attn_weights_165_cast_fp16 = add(x = _inversed_attn_weights_163_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_165_cast_fp16")]; int32 var_3198 = const()[name = string("op_3198"), val = int32(2)]; tensor attn_weights_167_cast_fp16 = softmax(axis = var_3198, x = attn_weights_165_cast_fp16)[name = string("attn_weights_167_cast_fp16")]; bool var_3204_transpose_x_1 = const()[name = string("op_3204_transpose_x_1"), val = bool(true)]; bool var_3204_transpose_y_1 = const()[name = string("op_3204_transpose_y_1"), val = bool(false)]; tensor var_3204_cast_fp16 = matmul(transpose_x = var_3204_transpose_x_1, transpose_y = var_3204_transpose_y_1, x = attn_weights_167_cast_fp16, y = var_3188_cast_fp16_0)[name = string("op_3204_cast_fp16")]; bool attn_weights_169_transpose_x_0 = const()[name = string("attn_weights_169_transpose_x_0"), val = bool(false)]; bool attn_weights_169_transpose_y_0 = const()[name = string("attn_weights_169_transpose_y_0"), val = bool(false)]; tensor attn_weights_169_cast_fp16 = matmul(transpose_x = attn_weights_169_transpose_x_0, transpose_y = attn_weights_169_transpose_y_0, x = var_3178_cast_fp16_1, y = var_3191_cast_fp16_1)[name = string("attn_weights_169_cast_fp16")]; fp16 _inversed_attn_weights_171_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_171_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_171_cast_fp16 = mul(x = attn_weights_169_cast_fp16, y = _inversed_attn_weights_171_y_0_to_fp16)[name = string("_inversed_attn_weights_171_cast_fp16")]; tensor attn_weights_173_cast_fp16 = add(x = _inversed_attn_weights_171_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_173_cast_fp16")]; int32 var_3210 = const()[name = string("op_3210"), val = int32(2)]; tensor attn_weights_175_cast_fp16 = softmax(axis = var_3210, x = attn_weights_173_cast_fp16)[name = string("attn_weights_175_cast_fp16")]; bool attn_output_61_transpose_x_1 = const()[name = string("attn_output_61_transpose_x_1"), val = bool(true)]; bool attn_output_61_transpose_y_1 = const()[name = string("attn_output_61_transpose_y_1"), val = bool(false)]; tensor attn_output_61_cast_fp16 = matmul(transpose_x = attn_output_61_transpose_x_1, transpose_y = attn_output_61_transpose_y_1, x = attn_weights_175_cast_fp16, y = var_3188_cast_fp16_1)[name = string("attn_output_61_cast_fp16")]; int32 var_3218 = const()[name = string("op_3218"), val = int32(1)]; bool attn_output_63_interleave_0 = const()[name = string("attn_output_63_interleave_0"), val = bool(false)]; tensor attn_output_63_cast_fp16 = concat(axis = var_3218, interleave = attn_output_63_interleave_0, values = (var_3204_cast_fp16, attn_output_61_cast_fp16))[name = string("attn_output_63_cast_fp16")]; tensor var_3222_perm_0 = const()[name = string("op_3222_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_3227 = const()[name = string("op_3227"), val = tensor([1, 1024, 1, 16])]; tensor var_3222_cast_fp16 = transpose(perm = var_3222_perm_0, x = attn_output_63_cast_fp16)[name = string("transpose_39")]; tensor x_185_cast_fp16 = reshape(shape = var_3227, x = var_3222_cast_fp16)[name = string("x_185_cast_fp16")]; string hidden_states_63_pad_type_0 = const()[name = string("hidden_states_63_pad_type_0"), val = string("valid")]; tensor hidden_states_63_strides_0 = const()[name = string("hidden_states_63_strides_0"), val = tensor([1, 1])]; tensor hidden_states_63_pad_0 = const()[name = string("hidden_states_63_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_63_dilations_0 = const()[name = string("hidden_states_63_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_63_groups_0 = const()[name = string("hidden_states_63_groups_0"), val = int32(1)]; tensor var_3234_to_fp16 = const()[name = string("op_3234_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(309958848)))]; tensor hidden_states_63_cast_fp16 = conv(dilations = hidden_states_63_dilations_0, groups = hidden_states_63_groups_0, pad = hidden_states_63_pad_0, pad_type = hidden_states_63_pad_type_0, strides = hidden_states_63_strides_0, weight = var_3234_to_fp16, x = x_185_cast_fp16)[name = string("hidden_states_63_cast_fp16")]; tensor x_187_cast_fp16 = add(x = x_179_cast_fp16, y = hidden_states_63_cast_fp16)[name = string("x_187_cast_fp16")]; int32 var_3246 = const()[name = string("op_3246"), val = int32(1)]; fp16 const_113_promoted_to_fp16 = const()[name = string("const_113_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_3249_cast_fp16 = mul(x = x_187_cast_fp16, y = const_113_promoted_to_fp16)[name = string("op_3249_cast_fp16")]; bool x_189_interleave_0 = const()[name = string("x_189_interleave_0"), val = bool(false)]; tensor x_189_cast_fp16 = concat(axis = var_3246, interleave = x_189_interleave_0, values = (x_187_cast_fp16, var_3249_cast_fp16))[name = string("x_189_cast_fp16")]; tensor out_127_axes_0 = const()[name = string("out_127_axes_0"), val = tensor([1])]; fp16 var_3259_to_fp16 = const()[name = string("op_3259_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_127_cast_fp16 = layer_norm(axes = out_127_axes_0, epsilon = var_3259_to_fp16, x = x_189_cast_fp16)[name = string("out_127_cast_fp16")]; tensor layer_layers_10_post_attention_layernorm_weight_to_fp16 = const()[name = string("layer_layers_10_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(312056064)))]; tensor out_129_cast_fp16 = mul(x = out_127_cast_fp16, y = layer_layers_10_post_attention_layernorm_weight_to_fp16)[name = string("out_129_cast_fp16")]; tensor var_3265_split_sizes_0 = const()[name = string("op_3265_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_3265_axis_0 = const()[name = string("op_3265_axis_0"), val = int32(1)]; tensor var_3265_cast_fp16_0, tensor var_3265_cast_fp16_1 = split(axis = var_3265_axis_0, split_sizes = var_3265_split_sizes_0, x = out_129_cast_fp16)[name = string("op_3265_cast_fp16")]; string input_21_pad_type_0 = const()[name = string("input_21_pad_type_0"), val = string("valid")]; tensor input_21_strides_0 = const()[name = string("input_21_strides_0"), val = tensor([1, 1])]; tensor input_21_pad_0 = const()[name = string("input_21_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_21_dilations_0 = const()[name = string("input_21_dilations_0"), val = tensor([1, 1])]; int32 input_21_groups_0 = const()[name = string("input_21_groups_0"), val = int32(1)]; tensor var_3270_to_fp16 = const()[name = string("op_3270_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(312060224)))]; tensor input_21_cast_fp16 = conv(dilations = input_21_dilations_0, groups = input_21_groups_0, pad = input_21_pad_0, pad_type = input_21_pad_type_0, strides = input_21_strides_0, weight = var_3270_to_fp16, x = var_3265_cast_fp16_0)[name = string("input_21_cast_fp16")]; tensor var_3281_cast_fp16 = silu(x = input_21_cast_fp16)[name = string("op_3281_cast_fp16")]; string var_3286_pad_type_0 = const()[name = string("op_3286_pad_type_0"), val = string("valid")]; tensor var_3286_strides_0 = const()[name = string("op_3286_strides_0"), val = tensor([1, 1])]; tensor var_3286_pad_0 = const()[name = string("op_3286_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_3286_dilations_0 = const()[name = string("op_3286_dilations_0"), val = tensor([1, 1])]; int32 var_3286_groups_0 = const()[name = string("op_3286_groups_0"), val = int32(1)]; tensor var_3269_to_fp16 = const()[name = string("op_3269_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(320448896)))]; tensor var_3286_cast_fp16 = conv(dilations = var_3286_dilations_0, groups = var_3286_groups_0, pad = var_3286_pad_0, pad_type = var_3286_pad_type_0, strides = var_3286_strides_0, weight = var_3269_to_fp16, x = var_3265_cast_fp16_0)[name = string("op_3286_cast_fp16")]; tensor x_195_cast_fp16 = mul(x = var_3281_cast_fp16, y = var_3286_cast_fp16)[name = string("x_195_cast_fp16")]; string hidden_states_65_pad_type_0 = const()[name = string("hidden_states_65_pad_type_0"), val = string("valid")]; tensor hidden_states_65_strides_0 = const()[name = string("hidden_states_65_strides_0"), val = tensor([1, 1])]; tensor hidden_states_65_pad_0 = const()[name = string("hidden_states_65_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_65_dilations_0 = const()[name = string("hidden_states_65_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_65_groups_0 = const()[name = string("hidden_states_65_groups_0"), val = int32(1)]; tensor var_3268_to_fp16 = const()[name = string("op_3268_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(328837568)))]; tensor hidden_states_65_cast_fp16 = conv(dilations = hidden_states_65_dilations_0, groups = hidden_states_65_groups_0, pad = hidden_states_65_pad_0, pad_type = hidden_states_65_pad_type_0, strides = hidden_states_65_strides_0, weight = var_3268_to_fp16, x = x_195_cast_fp16)[name = string("hidden_states_65_cast_fp16")]; tensor x_197_cast_fp16 = add(x = x_187_cast_fp16, y = hidden_states_65_cast_fp16)[name = string("x_197_cast_fp16")]; int32 var_3299 = const()[name = string("op_3299"), val = int32(1)]; fp16 const_114_promoted_to_fp16 = const()[name = string("const_114_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_3302_cast_fp16 = mul(x = x_197_cast_fp16, y = const_114_promoted_to_fp16)[name = string("op_3302_cast_fp16")]; bool x_199_interleave_0 = const()[name = string("x_199_interleave_0"), val = bool(false)]; tensor x_199_cast_fp16 = concat(axis = var_3299, interleave = x_199_interleave_0, values = (x_197_cast_fp16, var_3302_cast_fp16))[name = string("x_199_cast_fp16")]; tensor out_133_axes_0 = const()[name = string("out_133_axes_0"), val = tensor([1])]; fp16 var_3312_to_fp16 = const()[name = string("op_3312_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_133_cast_fp16 = layer_norm(axes = out_133_axes_0, epsilon = var_3312_to_fp16, x = x_199_cast_fp16)[name = string("out_133_cast_fp16")]; tensor layer_layers_11_input_layernorm_weight_to_fp16 = const()[name = string("layer_layers_11_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(337226240)))]; tensor out_135_cast_fp16 = mul(x = out_133_cast_fp16, y = layer_layers_11_input_layernorm_weight_to_fp16)[name = string("out_135_cast_fp16")]; tensor var_3318_split_sizes_0 = const()[name = string("op_3318_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_3318_axis_0 = const()[name = string("op_3318_axis_0"), val = int32(1)]; tensor var_3318_cast_fp16_0, tensor var_3318_cast_fp16_1 = split(axis = var_3318_axis_0, split_sizes = var_3318_split_sizes_0, x = out_135_cast_fp16)[name = string("op_3318_cast_fp16")]; string query_states_45_pad_type_0 = const()[name = string("query_states_45_pad_type_0"), val = string("valid")]; tensor query_states_45_strides_0 = const()[name = string("query_states_45_strides_0"), val = tensor([1, 1])]; tensor query_states_45_pad_0 = const()[name = string("query_states_45_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_states_45_dilations_0 = const()[name = string("query_states_45_dilations_0"), val = tensor([1, 1])]; int32 query_states_45_groups_0 = const()[name = string("query_states_45_groups_0"), val = int32(1)]; tensor var_3340_to_fp16 = const()[name = string("op_3340_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(337230400)))]; tensor query_states_45_cast_fp16 = conv(dilations = query_states_45_dilations_0, groups = query_states_45_groups_0, pad = query_states_45_pad_0, pad_type = query_states_45_pad_type_0, strides = query_states_45_strides_0, weight = var_3340_to_fp16, x = var_3318_cast_fp16_0)[name = string("query_states_45_cast_fp16")]; string key_states_45_pad_type_0 = const()[name = string("key_states_45_pad_type_0"), val = string("valid")]; tensor key_states_45_strides_0 = const()[name = string("key_states_45_strides_0"), val = tensor([1, 1])]; tensor key_states_45_pad_0 = const()[name = string("key_states_45_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_states_45_dilations_0 = const()[name = string("key_states_45_dilations_0"), val = tensor([1, 1])]; int32 key_states_45_groups_0 = const()[name = string("key_states_45_groups_0"), val = int32(1)]; tensor var_3351_to_fp16 = const()[name = string("op_3351_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(339327616)))]; tensor key_states_45_cast_fp16 = conv(dilations = key_states_45_dilations_0, groups = key_states_45_groups_0, pad = key_states_45_pad_0, pad_type = key_states_45_pad_type_0, strides = key_states_45_strides_0, weight = var_3351_to_fp16, x = var_3318_cast_fp16_0)[name = string("key_states_45_cast_fp16")]; string value_states_45_pad_type_0 = const()[name = string("value_states_45_pad_type_0"), val = string("valid")]; tensor value_states_45_strides_0 = const()[name = string("value_states_45_strides_0"), val = tensor([1, 1])]; tensor value_states_45_pad_0 = const()[name = string("value_states_45_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_states_45_dilations_0 = const()[name = string("value_states_45_dilations_0"), val = tensor([1, 1])]; int32 value_states_45_groups_0 = const()[name = string("value_states_45_groups_0"), val = int32(1)]; tensor var_3362_to_fp16 = const()[name = string("op_3362_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(339589824)))]; tensor value_states_45_cast_fp16 = conv(dilations = value_states_45_dilations_0, groups = value_states_45_groups_0, pad = value_states_45_pad_0, pad_type = value_states_45_pad_type_0, strides = value_states_45_strides_0, weight = var_3362_to_fp16, x = var_3318_cast_fp16_0)[name = string("value_states_45_cast_fp16")]; tensor var_3370 = const()[name = string("op_3370"), val = tensor([1, 16, 64, 16])]; tensor embed_45_cast_fp16 = reshape(shape = var_3370, x = query_states_45_cast_fp16)[name = string("embed_45_cast_fp16")]; tensor var_3374 = const()[name = string("op_3374"), val = tensor([1, 2, 64, 16])]; tensor var_3375_cast_fp16 = reshape(shape = var_3374, x = key_states_45_cast_fp16)[name = string("op_3375_cast_fp16")]; tensor embed_47_perm_0 = const()[name = string("embed_47_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_3381 = const()[name = string("op_3381"), val = tensor([1, 2, 64, 16])]; tensor var_3382_cast_fp16 = reshape(shape = var_3381, x = value_states_45_cast_fp16)[name = string("op_3382_cast_fp16")]; tensor value_states_47_perm_0 = const()[name = string("value_states_47_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_3386_cast_fp16 = mul(x = embed_45_cast_fp16, y = cos_1_cast_fp16)[name = string("op_3386_cast_fp16")]; tensor var_3387_split_sizes_0 = const()[name = string("op_3387_split_sizes_0"), val = tensor([32, 32])]; int32 var_3387_axis_0 = const()[name = string("op_3387_axis_0"), val = int32(-2)]; tensor var_3387_cast_fp16_0, tensor var_3387_cast_fp16_1 = split(axis = var_3387_axis_0, split_sizes = var_3387_split_sizes_0, x = embed_45_cast_fp16)[name = string("op_3387_cast_fp16")]; fp16 const_117_promoted_to_fp16 = const()[name = string("const_117_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_3389_cast_fp16 = mul(x = var_3387_cast_fp16_1, y = const_117_promoted_to_fp16)[name = string("op_3389_cast_fp16")]; int32 var_3391 = const()[name = string("op_3391"), val = int32(-2)]; bool var_3392_interleave_0 = const()[name = string("op_3392_interleave_0"), val = bool(false)]; tensor var_3392_cast_fp16 = concat(axis = var_3391, interleave = var_3392_interleave_0, values = (var_3389_cast_fp16, var_3387_cast_fp16_0))[name = string("op_3392_cast_fp16")]; tensor var_3393_cast_fp16 = mul(x = var_3392_cast_fp16, y = sin_1_cast_fp16)[name = string("op_3393_cast_fp16")]; tensor query_states_47_cast_fp16 = add(x = var_3386_cast_fp16, y = var_3393_cast_fp16)[name = string("query_states_47_cast_fp16")]; tensor embed_47_cast_fp16 = transpose(perm = embed_47_perm_0, x = var_3375_cast_fp16)[name = string("transpose_38")]; tensor var_3396_cast_fp16 = mul(x = embed_47_cast_fp16, y = cos_cast_fp16)[name = string("op_3396_cast_fp16")]; tensor var_3397_split_sizes_0 = const()[name = string("op_3397_split_sizes_0"), val = tensor([32, 32])]; int32 var_3397_axis_0 = const()[name = string("op_3397_axis_0"), val = int32(-1)]; tensor var_3397_cast_fp16_0, tensor var_3397_cast_fp16_1 = split(axis = var_3397_axis_0, split_sizes = var_3397_split_sizes_0, x = embed_47_cast_fp16)[name = string("op_3397_cast_fp16")]; fp16 const_118_promoted_to_fp16 = const()[name = string("const_118_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_3399_cast_fp16 = mul(x = var_3397_cast_fp16_1, y = const_118_promoted_to_fp16)[name = string("op_3399_cast_fp16")]; int32 var_3401 = const()[name = string("op_3401"), val = int32(-1)]; bool var_3402_interleave_0 = const()[name = string("op_3402_interleave_0"), val = bool(false)]; tensor var_3402_cast_fp16 = concat(axis = var_3401, interleave = var_3402_interleave_0, values = (var_3399_cast_fp16, var_3397_cast_fp16_0))[name = string("op_3402_cast_fp16")]; tensor var_3403_cast_fp16 = mul(x = var_3402_cast_fp16, y = sin_cast_fp16)[name = string("op_3403_cast_fp16")]; tensor key_states_47_cast_fp16 = add(x = var_3396_cast_fp16, y = var_3403_cast_fp16)[name = string("key_states_47_cast_fp16")]; tensor expand_dims_111 = const()[name = string("expand_dims_111"), val = tensor([11])]; tensor expand_dims_112 = const()[name = string("expand_dims_112"), val = tensor([0])]; tensor expand_dims_114 = const()[name = string("expand_dims_114"), val = tensor([12])]; tensor concat_91_values3_0 = const()[name = string("concat_91_values3_0"), val = tensor([0])]; int32 concat_91_axis_0 = const()[name = string("concat_91_axis_0"), val = int32(0)]; bool concat_91_interleave_0 = const()[name = string("concat_91_interleave_0"), val = bool(false)]; tensor concat_91 = concat(axis = concat_91_axis_0, interleave = concat_91_interleave_0, values = (expand_dims_111, expand_dims_112, position_id, concat_91_values3_0))[name = string("concat_91")]; tensor concat_92_values1_0 = const()[name = string("concat_92_values1_0"), val = tensor([0])]; tensor concat_92_values3_0 = const()[name = string("concat_92_values3_0"), val = tensor([0])]; int32 concat_92_axis_0 = const()[name = string("concat_92_axis_0"), val = int32(0)]; bool concat_92_interleave_0 = const()[name = string("concat_92_interleave_0"), val = bool(false)]; tensor concat_92 = concat(axis = concat_92_axis_0, interleave = concat_92_interleave_0, values = (expand_dims_114, concat_92_values1_0, var_426, concat_92_values3_0))[name = string("concat_92")]; tensor key_cache_internal_tensor_assign_12_stride_0 = const()[name = string("key_cache_internal_tensor_assign_12_stride_0"), val = tensor([1, 1, 1, 1])]; tensor key_cache_internal_tensor_assign_12_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_12_begin_mask_0"), val = tensor([false, false, false, true])]; tensor key_cache_internal_tensor_assign_12_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_12_end_mask_0"), val = tensor([false, true, false, true])]; tensor key_cache_internal_tensor_assign_12_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_12_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor key_cache_internal_tensor_assign_12_cast_fp16 = slice_update(begin = concat_91, begin_mask = key_cache_internal_tensor_assign_12_begin_mask_0, end = concat_92, end_mask = key_cache_internal_tensor_assign_12_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_12_squeeze_mask_0, stride = key_cache_internal_tensor_assign_12_stride_0, update = key_states_47_cast_fp16, x = coreml_update_state_68)[name = string("key_cache_internal_tensor_assign_12_cast_fp16")]; write_state(data = key_cache_internal_tensor_assign_12_cast_fp16, input = key_cache)[name = string("coreml_update_state_284_write_state")]; tensor coreml_update_state_70 = read_state(input = key_cache)[name = string("coreml_update_state_284")]; tensor value_cache_internal_tensor_assign_12_stride_0 = const()[name = string("value_cache_internal_tensor_assign_12_stride_0"), val = tensor([1, 1, 1, 1])]; tensor value_cache_internal_tensor_assign_12_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_12_begin_mask_0"), val = tensor([false, false, false, true])]; tensor value_cache_internal_tensor_assign_12_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_12_end_mask_0"), val = tensor([false, true, false, true])]; tensor value_cache_internal_tensor_assign_12_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_12_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor value_states_47_cast_fp16 = transpose(perm = value_states_47_perm_0, x = var_3382_cast_fp16)[name = string("transpose_37")]; tensor value_cache_internal_tensor_assign_12_cast_fp16 = slice_update(begin = concat_91, begin_mask = value_cache_internal_tensor_assign_12_begin_mask_0, end = concat_92, end_mask = value_cache_internal_tensor_assign_12_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_12_squeeze_mask_0, stride = value_cache_internal_tensor_assign_12_stride_0, update = value_states_47_cast_fp16, x = coreml_update_state_69)[name = string("value_cache_internal_tensor_assign_12_cast_fp16")]; write_state(data = value_cache_internal_tensor_assign_12_cast_fp16, input = value_cache)[name = string("coreml_update_state_285_write_state")]; tensor coreml_update_state_71 = read_state(input = value_cache)[name = string("coreml_update_state_285")]; tensor var_3446_begin_0 = const()[name = string("op_3446_begin_0"), val = tensor([11, 0, 0, 0])]; tensor var_3446_end_0 = const()[name = string("op_3446_end_0"), val = tensor([12, 2, 2048, 64])]; tensor var_3446_end_mask_0 = const()[name = string("op_3446_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_3446_cast_fp16 = slice_by_index(begin = var_3446_begin_0, end = var_3446_end_0, end_mask = var_3446_end_mask_0, x = coreml_update_state_70)[name = string("op_3446_cast_fp16")]; tensor tile_22 = const()[name = string("tile_22"), val = tensor([1, 1])]; int32 var_3449_axis_0 = const()[name = string("op_3449_axis_0"), val = int32(1)]; tensor var_3449_cast_fp16_0, tensor var_3449_cast_fp16_1 = split(axis = var_3449_axis_0, split_sizes = tile_22, x = var_3446_cast_fp16)[name = string("op_3449_cast_fp16")]; tensor var_3456_begin_0 = const()[name = string("op_3456_begin_0"), val = tensor([11, 0, 0, 0])]; tensor var_3456_end_0 = const()[name = string("op_3456_end_0"), val = tensor([12, 2, 2048, 64])]; tensor var_3456_end_mask_0 = const()[name = string("op_3456_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_3456_cast_fp16 = slice_by_index(begin = var_3456_begin_0, end = var_3456_end_0, end_mask = var_3456_end_mask_0, x = coreml_update_state_71)[name = string("op_3456_cast_fp16")]; tensor tile_23 = const()[name = string("tile_23"), val = tensor([1, 1])]; int32 var_3459_axis_0 = const()[name = string("op_3459_axis_0"), val = int32(1)]; tensor var_3459_cast_fp16_0, tensor var_3459_cast_fp16_1 = split(axis = var_3459_axis_0, split_sizes = tile_23, x = var_3456_cast_fp16)[name = string("op_3459_cast_fp16")]; tensor var_3462_split_sizes_0 = const()[name = string("op_3462_split_sizes_0"), val = tensor([8, 8])]; int32 var_3462_axis_0 = const()[name = string("op_3462_axis_0"), val = int32(1)]; tensor var_3462_cast_fp16_0, tensor var_3462_cast_fp16_1 = split(axis = var_3462_axis_0, split_sizes = var_3462_split_sizes_0, x = query_states_47_cast_fp16)[name = string("op_3462_cast_fp16")]; bool attn_weights_177_transpose_x_0 = const()[name = string("attn_weights_177_transpose_x_0"), val = bool(false)]; bool attn_weights_177_transpose_y_0 = const()[name = string("attn_weights_177_transpose_y_0"), val = bool(false)]; tensor attn_weights_177_cast_fp16 = matmul(transpose_x = attn_weights_177_transpose_x_0, transpose_y = attn_weights_177_transpose_y_0, x = var_3449_cast_fp16_0, y = var_3462_cast_fp16_0)[name = string("attn_weights_177_cast_fp16")]; fp16 _inversed_attn_weights_179_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_179_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_179_cast_fp16 = mul(x = attn_weights_177_cast_fp16, y = _inversed_attn_weights_179_y_0_to_fp16)[name = string("_inversed_attn_weights_179_cast_fp16")]; tensor attn_weights_181_cast_fp16 = add(x = _inversed_attn_weights_179_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_181_cast_fp16")]; int32 var_3469 = const()[name = string("op_3469"), val = int32(2)]; tensor attn_weights_183_cast_fp16 = softmax(axis = var_3469, x = attn_weights_181_cast_fp16)[name = string("attn_weights_183_cast_fp16")]; bool var_3475_transpose_x_1 = const()[name = string("op_3475_transpose_x_1"), val = bool(true)]; bool var_3475_transpose_y_1 = const()[name = string("op_3475_transpose_y_1"), val = bool(false)]; tensor var_3475_cast_fp16 = matmul(transpose_x = var_3475_transpose_x_1, transpose_y = var_3475_transpose_y_1, x = attn_weights_183_cast_fp16, y = var_3459_cast_fp16_0)[name = string("op_3475_cast_fp16")]; bool attn_weights_185_transpose_x_0 = const()[name = string("attn_weights_185_transpose_x_0"), val = bool(false)]; bool attn_weights_185_transpose_y_0 = const()[name = string("attn_weights_185_transpose_y_0"), val = bool(false)]; tensor attn_weights_185_cast_fp16 = matmul(transpose_x = attn_weights_185_transpose_x_0, transpose_y = attn_weights_185_transpose_y_0, x = var_3449_cast_fp16_1, y = var_3462_cast_fp16_1)[name = string("attn_weights_185_cast_fp16")]; fp16 _inversed_attn_weights_187_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_187_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_187_cast_fp16 = mul(x = attn_weights_185_cast_fp16, y = _inversed_attn_weights_187_y_0_to_fp16)[name = string("_inversed_attn_weights_187_cast_fp16")]; tensor attn_weights_189_cast_fp16 = add(x = _inversed_attn_weights_187_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_189_cast_fp16")]; int32 var_3481 = const()[name = string("op_3481"), val = int32(2)]; tensor attn_weights_191_cast_fp16 = softmax(axis = var_3481, x = attn_weights_189_cast_fp16)[name = string("attn_weights_191_cast_fp16")]; bool attn_output_67_transpose_x_1 = const()[name = string("attn_output_67_transpose_x_1"), val = bool(true)]; bool attn_output_67_transpose_y_1 = const()[name = string("attn_output_67_transpose_y_1"), val = bool(false)]; tensor attn_output_67_cast_fp16 = matmul(transpose_x = attn_output_67_transpose_x_1, transpose_y = attn_output_67_transpose_y_1, x = attn_weights_191_cast_fp16, y = var_3459_cast_fp16_1)[name = string("attn_output_67_cast_fp16")]; int32 var_3489 = const()[name = string("op_3489"), val = int32(1)]; bool attn_output_69_interleave_0 = const()[name = string("attn_output_69_interleave_0"), val = bool(false)]; tensor attn_output_69_cast_fp16 = concat(axis = var_3489, interleave = attn_output_69_interleave_0, values = (var_3475_cast_fp16, attn_output_67_cast_fp16))[name = string("attn_output_69_cast_fp16")]; tensor var_3493_perm_0 = const()[name = string("op_3493_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_3498 = const()[name = string("op_3498"), val = tensor([1, 1024, 1, 16])]; tensor var_3493_cast_fp16 = transpose(perm = var_3493_perm_0, x = attn_output_69_cast_fp16)[name = string("transpose_36")]; tensor x_203_cast_fp16 = reshape(shape = var_3498, x = var_3493_cast_fp16)[name = string("x_203_cast_fp16")]; string hidden_states_69_pad_type_0 = const()[name = string("hidden_states_69_pad_type_0"), val = string("valid")]; tensor hidden_states_69_strides_0 = const()[name = string("hidden_states_69_strides_0"), val = tensor([1, 1])]; tensor hidden_states_69_pad_0 = const()[name = string("hidden_states_69_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_69_dilations_0 = const()[name = string("hidden_states_69_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_69_groups_0 = const()[name = string("hidden_states_69_groups_0"), val = int32(1)]; tensor var_3505_to_fp16 = const()[name = string("op_3505_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(339852032)))]; tensor hidden_states_69_cast_fp16 = conv(dilations = hidden_states_69_dilations_0, groups = hidden_states_69_groups_0, pad = hidden_states_69_pad_0, pad_type = hidden_states_69_pad_type_0, strides = hidden_states_69_strides_0, weight = var_3505_to_fp16, x = x_203_cast_fp16)[name = string("hidden_states_69_cast_fp16")]; tensor x_205_cast_fp16 = add(x = x_197_cast_fp16, y = hidden_states_69_cast_fp16)[name = string("x_205_cast_fp16")]; int32 var_3517 = const()[name = string("op_3517"), val = int32(1)]; fp16 const_123_promoted_to_fp16 = const()[name = string("const_123_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_3520_cast_fp16 = mul(x = x_205_cast_fp16, y = const_123_promoted_to_fp16)[name = string("op_3520_cast_fp16")]; bool x_207_interleave_0 = const()[name = string("x_207_interleave_0"), val = bool(false)]; tensor x_207_cast_fp16 = concat(axis = var_3517, interleave = x_207_interleave_0, values = (x_205_cast_fp16, var_3520_cast_fp16))[name = string("x_207_cast_fp16")]; tensor out_139_axes_0 = const()[name = string("out_139_axes_0"), val = tensor([1])]; fp16 var_3530_to_fp16 = const()[name = string("op_3530_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_139_cast_fp16 = layer_norm(axes = out_139_axes_0, epsilon = var_3530_to_fp16, x = x_207_cast_fp16)[name = string("out_139_cast_fp16")]; tensor layer_layers_11_post_attention_layernorm_weight_to_fp16 = const()[name = string("layer_layers_11_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(341949248)))]; tensor out_141_cast_fp16 = mul(x = out_139_cast_fp16, y = layer_layers_11_post_attention_layernorm_weight_to_fp16)[name = string("out_141_cast_fp16")]; tensor var_3536_split_sizes_0 = const()[name = string("op_3536_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_3536_axis_0 = const()[name = string("op_3536_axis_0"), val = int32(1)]; tensor var_3536_cast_fp16_0, tensor var_3536_cast_fp16_1 = split(axis = var_3536_axis_0, split_sizes = var_3536_split_sizes_0, x = out_141_cast_fp16)[name = string("op_3536_cast_fp16")]; string input_23_pad_type_0 = const()[name = string("input_23_pad_type_0"), val = string("valid")]; tensor input_23_strides_0 = const()[name = string("input_23_strides_0"), val = tensor([1, 1])]; tensor input_23_pad_0 = const()[name = string("input_23_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_23_dilations_0 = const()[name = string("input_23_dilations_0"), val = tensor([1, 1])]; int32 input_23_groups_0 = const()[name = string("input_23_groups_0"), val = int32(1)]; tensor var_3541_to_fp16 = const()[name = string("op_3541_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(341953408)))]; tensor input_23_cast_fp16 = conv(dilations = input_23_dilations_0, groups = input_23_groups_0, pad = input_23_pad_0, pad_type = input_23_pad_type_0, strides = input_23_strides_0, weight = var_3541_to_fp16, x = var_3536_cast_fp16_0)[name = string("input_23_cast_fp16")]; tensor var_3552_cast_fp16 = silu(x = input_23_cast_fp16)[name = string("op_3552_cast_fp16")]; string var_3557_pad_type_0 = const()[name = string("op_3557_pad_type_0"), val = string("valid")]; tensor var_3557_strides_0 = const()[name = string("op_3557_strides_0"), val = tensor([1, 1])]; tensor var_3557_pad_0 = const()[name = string("op_3557_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_3557_dilations_0 = const()[name = string("op_3557_dilations_0"), val = tensor([1, 1])]; int32 var_3557_groups_0 = const()[name = string("op_3557_groups_0"), val = int32(1)]; tensor var_3540_to_fp16 = const()[name = string("op_3540_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(350342080)))]; tensor var_3557_cast_fp16 = conv(dilations = var_3557_dilations_0, groups = var_3557_groups_0, pad = var_3557_pad_0, pad_type = var_3557_pad_type_0, strides = var_3557_strides_0, weight = var_3540_to_fp16, x = var_3536_cast_fp16_0)[name = string("op_3557_cast_fp16")]; tensor x_213_cast_fp16 = mul(x = var_3552_cast_fp16, y = var_3557_cast_fp16)[name = string("x_213_cast_fp16")]; string hidden_states_71_pad_type_0 = const()[name = string("hidden_states_71_pad_type_0"), val = string("valid")]; tensor hidden_states_71_strides_0 = const()[name = string("hidden_states_71_strides_0"), val = tensor([1, 1])]; tensor hidden_states_71_pad_0 = const()[name = string("hidden_states_71_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_71_dilations_0 = const()[name = string("hidden_states_71_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_71_groups_0 = const()[name = string("hidden_states_71_groups_0"), val = int32(1)]; tensor var_3539_to_fp16 = const()[name = string("op_3539_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(358730752)))]; tensor hidden_states_71_cast_fp16 = conv(dilations = hidden_states_71_dilations_0, groups = hidden_states_71_groups_0, pad = hidden_states_71_pad_0, pad_type = hidden_states_71_pad_type_0, strides = hidden_states_71_strides_0, weight = var_3539_to_fp16, x = x_213_cast_fp16)[name = string("hidden_states_71_cast_fp16")]; tensor x_215_cast_fp16 = add(x = x_205_cast_fp16, y = hidden_states_71_cast_fp16)[name = string("x_215_cast_fp16")]; int32 var_3570 = const()[name = string("op_3570"), val = int32(1)]; fp16 const_124_promoted_to_fp16 = const()[name = string("const_124_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_3573_cast_fp16 = mul(x = x_215_cast_fp16, y = const_124_promoted_to_fp16)[name = string("op_3573_cast_fp16")]; bool x_217_interleave_0 = const()[name = string("x_217_interleave_0"), val = bool(false)]; tensor x_217_cast_fp16 = concat(axis = var_3570, interleave = x_217_interleave_0, values = (x_215_cast_fp16, var_3573_cast_fp16))[name = string("x_217_cast_fp16")]; tensor out_145_axes_0 = const()[name = string("out_145_axes_0"), val = tensor([1])]; fp16 var_3583_to_fp16 = const()[name = string("op_3583_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_145_cast_fp16 = layer_norm(axes = out_145_axes_0, epsilon = var_3583_to_fp16, x = x_217_cast_fp16)[name = string("out_145_cast_fp16")]; tensor layer_layers_12_input_layernorm_weight_to_fp16 = const()[name = string("layer_layers_12_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(367119424)))]; tensor out_147_cast_fp16 = mul(x = out_145_cast_fp16, y = layer_layers_12_input_layernorm_weight_to_fp16)[name = string("out_147_cast_fp16")]; tensor var_3589_split_sizes_0 = const()[name = string("op_3589_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_3589_axis_0 = const()[name = string("op_3589_axis_0"), val = int32(1)]; tensor var_3589_cast_fp16_0, tensor var_3589_cast_fp16_1 = split(axis = var_3589_axis_0, split_sizes = var_3589_split_sizes_0, x = out_147_cast_fp16)[name = string("op_3589_cast_fp16")]; string query_states_49_pad_type_0 = const()[name = string("query_states_49_pad_type_0"), val = string("valid")]; tensor query_states_49_strides_0 = const()[name = string("query_states_49_strides_0"), val = tensor([1, 1])]; tensor query_states_49_pad_0 = const()[name = string("query_states_49_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_states_49_dilations_0 = const()[name = string("query_states_49_dilations_0"), val = tensor([1, 1])]; int32 query_states_49_groups_0 = const()[name = string("query_states_49_groups_0"), val = int32(1)]; tensor var_3611_to_fp16 = const()[name = string("op_3611_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(367123584)))]; tensor query_states_49_cast_fp16 = conv(dilations = query_states_49_dilations_0, groups = query_states_49_groups_0, pad = query_states_49_pad_0, pad_type = query_states_49_pad_type_0, strides = query_states_49_strides_0, weight = var_3611_to_fp16, x = var_3589_cast_fp16_0)[name = string("query_states_49_cast_fp16")]; string key_states_49_pad_type_0 = const()[name = string("key_states_49_pad_type_0"), val = string("valid")]; tensor key_states_49_strides_0 = const()[name = string("key_states_49_strides_0"), val = tensor([1, 1])]; tensor key_states_49_pad_0 = const()[name = string("key_states_49_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_states_49_dilations_0 = const()[name = string("key_states_49_dilations_0"), val = tensor([1, 1])]; int32 key_states_49_groups_0 = const()[name = string("key_states_49_groups_0"), val = int32(1)]; tensor var_3622_to_fp16 = const()[name = string("op_3622_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(369220800)))]; tensor key_states_49_cast_fp16 = conv(dilations = key_states_49_dilations_0, groups = key_states_49_groups_0, pad = key_states_49_pad_0, pad_type = key_states_49_pad_type_0, strides = key_states_49_strides_0, weight = var_3622_to_fp16, x = var_3589_cast_fp16_0)[name = string("key_states_49_cast_fp16")]; string value_states_49_pad_type_0 = const()[name = string("value_states_49_pad_type_0"), val = string("valid")]; tensor value_states_49_strides_0 = const()[name = string("value_states_49_strides_0"), val = tensor([1, 1])]; tensor value_states_49_pad_0 = const()[name = string("value_states_49_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_states_49_dilations_0 = const()[name = string("value_states_49_dilations_0"), val = tensor([1, 1])]; int32 value_states_49_groups_0 = const()[name = string("value_states_49_groups_0"), val = int32(1)]; tensor var_3633_to_fp16 = const()[name = string("op_3633_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(369483008)))]; tensor value_states_49_cast_fp16 = conv(dilations = value_states_49_dilations_0, groups = value_states_49_groups_0, pad = value_states_49_pad_0, pad_type = value_states_49_pad_type_0, strides = value_states_49_strides_0, weight = var_3633_to_fp16, x = var_3589_cast_fp16_0)[name = string("value_states_49_cast_fp16")]; tensor var_3641 = const()[name = string("op_3641"), val = tensor([1, 16, 64, 16])]; tensor embed_49_cast_fp16 = reshape(shape = var_3641, x = query_states_49_cast_fp16)[name = string("embed_49_cast_fp16")]; tensor var_3645 = const()[name = string("op_3645"), val = tensor([1, 2, 64, 16])]; tensor var_3646_cast_fp16 = reshape(shape = var_3645, x = key_states_49_cast_fp16)[name = string("op_3646_cast_fp16")]; tensor embed_51_perm_0 = const()[name = string("embed_51_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_3652 = const()[name = string("op_3652"), val = tensor([1, 2, 64, 16])]; tensor var_3653_cast_fp16 = reshape(shape = var_3652, x = value_states_49_cast_fp16)[name = string("op_3653_cast_fp16")]; tensor value_states_51_perm_0 = const()[name = string("value_states_51_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_3657_cast_fp16 = mul(x = embed_49_cast_fp16, y = cos_1_cast_fp16)[name = string("op_3657_cast_fp16")]; tensor var_3658_split_sizes_0 = const()[name = string("op_3658_split_sizes_0"), val = tensor([32, 32])]; int32 var_3658_axis_0 = const()[name = string("op_3658_axis_0"), val = int32(-2)]; tensor var_3658_cast_fp16_0, tensor var_3658_cast_fp16_1 = split(axis = var_3658_axis_0, split_sizes = var_3658_split_sizes_0, x = embed_49_cast_fp16)[name = string("op_3658_cast_fp16")]; fp16 const_127_promoted_to_fp16 = const()[name = string("const_127_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_3660_cast_fp16 = mul(x = var_3658_cast_fp16_1, y = const_127_promoted_to_fp16)[name = string("op_3660_cast_fp16")]; int32 var_3662 = const()[name = string("op_3662"), val = int32(-2)]; bool var_3663_interleave_0 = const()[name = string("op_3663_interleave_0"), val = bool(false)]; tensor var_3663_cast_fp16 = concat(axis = var_3662, interleave = var_3663_interleave_0, values = (var_3660_cast_fp16, var_3658_cast_fp16_0))[name = string("op_3663_cast_fp16")]; tensor var_3664_cast_fp16 = mul(x = var_3663_cast_fp16, y = sin_1_cast_fp16)[name = string("op_3664_cast_fp16")]; tensor query_states_51_cast_fp16 = add(x = var_3657_cast_fp16, y = var_3664_cast_fp16)[name = string("query_states_51_cast_fp16")]; tensor embed_51_cast_fp16 = transpose(perm = embed_51_perm_0, x = var_3646_cast_fp16)[name = string("transpose_35")]; tensor var_3667_cast_fp16 = mul(x = embed_51_cast_fp16, y = cos_cast_fp16)[name = string("op_3667_cast_fp16")]; tensor var_3668_split_sizes_0 = const()[name = string("op_3668_split_sizes_0"), val = tensor([32, 32])]; int32 var_3668_axis_0 = const()[name = string("op_3668_axis_0"), val = int32(-1)]; tensor var_3668_cast_fp16_0, tensor var_3668_cast_fp16_1 = split(axis = var_3668_axis_0, split_sizes = var_3668_split_sizes_0, x = embed_51_cast_fp16)[name = string("op_3668_cast_fp16")]; fp16 const_128_promoted_to_fp16 = const()[name = string("const_128_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_3670_cast_fp16 = mul(x = var_3668_cast_fp16_1, y = const_128_promoted_to_fp16)[name = string("op_3670_cast_fp16")]; int32 var_3672 = const()[name = string("op_3672"), val = int32(-1)]; bool var_3673_interleave_0 = const()[name = string("op_3673_interleave_0"), val = bool(false)]; tensor var_3673_cast_fp16 = concat(axis = var_3672, interleave = var_3673_interleave_0, values = (var_3670_cast_fp16, var_3668_cast_fp16_0))[name = string("op_3673_cast_fp16")]; tensor var_3674_cast_fp16 = mul(x = var_3673_cast_fp16, y = sin_cast_fp16)[name = string("op_3674_cast_fp16")]; tensor key_states_51_cast_fp16 = add(x = var_3667_cast_fp16, y = var_3674_cast_fp16)[name = string("key_states_51_cast_fp16")]; tensor expand_dims_121 = const()[name = string("expand_dims_121"), val = tensor([12])]; tensor expand_dims_122 = const()[name = string("expand_dims_122"), val = tensor([0])]; tensor expand_dims_124 = const()[name = string("expand_dims_124"), val = tensor([13])]; tensor concat_99_values3_0 = const()[name = string("concat_99_values3_0"), val = tensor([0])]; int32 concat_99_axis_0 = const()[name = string("concat_99_axis_0"), val = int32(0)]; bool concat_99_interleave_0 = const()[name = string("concat_99_interleave_0"), val = bool(false)]; tensor concat_99 = concat(axis = concat_99_axis_0, interleave = concat_99_interleave_0, values = (expand_dims_121, expand_dims_122, position_id, concat_99_values3_0))[name = string("concat_99")]; tensor concat_100_values1_0 = const()[name = string("concat_100_values1_0"), val = tensor([0])]; tensor concat_100_values3_0 = const()[name = string("concat_100_values3_0"), val = tensor([0])]; int32 concat_100_axis_0 = const()[name = string("concat_100_axis_0"), val = int32(0)]; bool concat_100_interleave_0 = const()[name = string("concat_100_interleave_0"), val = bool(false)]; tensor concat_100 = concat(axis = concat_100_axis_0, interleave = concat_100_interleave_0, values = (expand_dims_124, concat_100_values1_0, var_426, concat_100_values3_0))[name = string("concat_100")]; tensor key_cache_internal_tensor_assign_13_stride_0 = const()[name = string("key_cache_internal_tensor_assign_13_stride_0"), val = tensor([1, 1, 1, 1])]; tensor key_cache_internal_tensor_assign_13_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_13_begin_mask_0"), val = tensor([false, false, false, true])]; tensor key_cache_internal_tensor_assign_13_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_13_end_mask_0"), val = tensor([false, true, false, true])]; tensor key_cache_internal_tensor_assign_13_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_13_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor key_cache_internal_tensor_assign_13_cast_fp16 = slice_update(begin = concat_99, begin_mask = key_cache_internal_tensor_assign_13_begin_mask_0, end = concat_100, end_mask = key_cache_internal_tensor_assign_13_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_13_squeeze_mask_0, stride = key_cache_internal_tensor_assign_13_stride_0, update = key_states_51_cast_fp16, x = coreml_update_state_70)[name = string("key_cache_internal_tensor_assign_13_cast_fp16")]; write_state(data = key_cache_internal_tensor_assign_13_cast_fp16, input = key_cache)[name = string("coreml_update_state_286_write_state")]; tensor coreml_update_state_72 = read_state(input = key_cache)[name = string("coreml_update_state_286")]; tensor value_cache_internal_tensor_assign_13_stride_0 = const()[name = string("value_cache_internal_tensor_assign_13_stride_0"), val = tensor([1, 1, 1, 1])]; tensor value_cache_internal_tensor_assign_13_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_13_begin_mask_0"), val = tensor([false, false, false, true])]; tensor value_cache_internal_tensor_assign_13_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_13_end_mask_0"), val = tensor([false, true, false, true])]; tensor value_cache_internal_tensor_assign_13_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_13_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor value_states_51_cast_fp16 = transpose(perm = value_states_51_perm_0, x = var_3653_cast_fp16)[name = string("transpose_34")]; tensor value_cache_internal_tensor_assign_13_cast_fp16 = slice_update(begin = concat_99, begin_mask = value_cache_internal_tensor_assign_13_begin_mask_0, end = concat_100, end_mask = value_cache_internal_tensor_assign_13_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_13_squeeze_mask_0, stride = value_cache_internal_tensor_assign_13_stride_0, update = value_states_51_cast_fp16, x = coreml_update_state_71)[name = string("value_cache_internal_tensor_assign_13_cast_fp16")]; write_state(data = value_cache_internal_tensor_assign_13_cast_fp16, input = value_cache)[name = string("coreml_update_state_287_write_state")]; tensor coreml_update_state_73 = read_state(input = value_cache)[name = string("coreml_update_state_287")]; tensor var_3717_begin_0 = const()[name = string("op_3717_begin_0"), val = tensor([12, 0, 0, 0])]; tensor var_3717_end_0 = const()[name = string("op_3717_end_0"), val = tensor([13, 2, 2048, 64])]; tensor var_3717_end_mask_0 = const()[name = string("op_3717_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_3717_cast_fp16 = slice_by_index(begin = var_3717_begin_0, end = var_3717_end_0, end_mask = var_3717_end_mask_0, x = coreml_update_state_72)[name = string("op_3717_cast_fp16")]; tensor tile_24 = const()[name = string("tile_24"), val = tensor([1, 1])]; int32 var_3720_axis_0 = const()[name = string("op_3720_axis_0"), val = int32(1)]; tensor var_3720_cast_fp16_0, tensor var_3720_cast_fp16_1 = split(axis = var_3720_axis_0, split_sizes = tile_24, x = var_3717_cast_fp16)[name = string("op_3720_cast_fp16")]; tensor var_3727_begin_0 = const()[name = string("op_3727_begin_0"), val = tensor([12, 0, 0, 0])]; tensor var_3727_end_0 = const()[name = string("op_3727_end_0"), val = tensor([13, 2, 2048, 64])]; tensor var_3727_end_mask_0 = const()[name = string("op_3727_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_3727_cast_fp16 = slice_by_index(begin = var_3727_begin_0, end = var_3727_end_0, end_mask = var_3727_end_mask_0, x = coreml_update_state_73)[name = string("op_3727_cast_fp16")]; tensor tile_25 = const()[name = string("tile_25"), val = tensor([1, 1])]; int32 var_3730_axis_0 = const()[name = string("op_3730_axis_0"), val = int32(1)]; tensor var_3730_cast_fp16_0, tensor var_3730_cast_fp16_1 = split(axis = var_3730_axis_0, split_sizes = tile_25, x = var_3727_cast_fp16)[name = string("op_3730_cast_fp16")]; tensor var_3733_split_sizes_0 = const()[name = string("op_3733_split_sizes_0"), val = tensor([8, 8])]; int32 var_3733_axis_0 = const()[name = string("op_3733_axis_0"), val = int32(1)]; tensor var_3733_cast_fp16_0, tensor var_3733_cast_fp16_1 = split(axis = var_3733_axis_0, split_sizes = var_3733_split_sizes_0, x = query_states_51_cast_fp16)[name = string("op_3733_cast_fp16")]; bool attn_weights_193_transpose_x_0 = const()[name = string("attn_weights_193_transpose_x_0"), val = bool(false)]; bool attn_weights_193_transpose_y_0 = const()[name = string("attn_weights_193_transpose_y_0"), val = bool(false)]; tensor attn_weights_193_cast_fp16 = matmul(transpose_x = attn_weights_193_transpose_x_0, transpose_y = attn_weights_193_transpose_y_0, x = var_3720_cast_fp16_0, y = var_3733_cast_fp16_0)[name = string("attn_weights_193_cast_fp16")]; fp16 _inversed_attn_weights_195_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_195_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_195_cast_fp16 = mul(x = attn_weights_193_cast_fp16, y = _inversed_attn_weights_195_y_0_to_fp16)[name = string("_inversed_attn_weights_195_cast_fp16")]; tensor attn_weights_197_cast_fp16 = add(x = _inversed_attn_weights_195_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_197_cast_fp16")]; int32 var_3740 = const()[name = string("op_3740"), val = int32(2)]; tensor attn_weights_199_cast_fp16 = softmax(axis = var_3740, x = attn_weights_197_cast_fp16)[name = string("attn_weights_199_cast_fp16")]; bool var_3746_transpose_x_1 = const()[name = string("op_3746_transpose_x_1"), val = bool(true)]; bool var_3746_transpose_y_1 = const()[name = string("op_3746_transpose_y_1"), val = bool(false)]; tensor var_3746_cast_fp16 = matmul(transpose_x = var_3746_transpose_x_1, transpose_y = var_3746_transpose_y_1, x = attn_weights_199_cast_fp16, y = var_3730_cast_fp16_0)[name = string("op_3746_cast_fp16")]; bool attn_weights_201_transpose_x_0 = const()[name = string("attn_weights_201_transpose_x_0"), val = bool(false)]; bool attn_weights_201_transpose_y_0 = const()[name = string("attn_weights_201_transpose_y_0"), val = bool(false)]; tensor attn_weights_201_cast_fp16 = matmul(transpose_x = attn_weights_201_transpose_x_0, transpose_y = attn_weights_201_transpose_y_0, x = var_3720_cast_fp16_1, y = var_3733_cast_fp16_1)[name = string("attn_weights_201_cast_fp16")]; fp16 _inversed_attn_weights_203_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_203_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_203_cast_fp16 = mul(x = attn_weights_201_cast_fp16, y = _inversed_attn_weights_203_y_0_to_fp16)[name = string("_inversed_attn_weights_203_cast_fp16")]; tensor attn_weights_205_cast_fp16 = add(x = _inversed_attn_weights_203_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_205_cast_fp16")]; int32 var_3752 = const()[name = string("op_3752"), val = int32(2)]; tensor attn_weights_207_cast_fp16 = softmax(axis = var_3752, x = attn_weights_205_cast_fp16)[name = string("attn_weights_207_cast_fp16")]; bool attn_output_73_transpose_x_1 = const()[name = string("attn_output_73_transpose_x_1"), val = bool(true)]; bool attn_output_73_transpose_y_1 = const()[name = string("attn_output_73_transpose_y_1"), val = bool(false)]; tensor attn_output_73_cast_fp16 = matmul(transpose_x = attn_output_73_transpose_x_1, transpose_y = attn_output_73_transpose_y_1, x = attn_weights_207_cast_fp16, y = var_3730_cast_fp16_1)[name = string("attn_output_73_cast_fp16")]; int32 var_3760 = const()[name = string("op_3760"), val = int32(1)]; bool attn_output_75_interleave_0 = const()[name = string("attn_output_75_interleave_0"), val = bool(false)]; tensor attn_output_75_cast_fp16 = concat(axis = var_3760, interleave = attn_output_75_interleave_0, values = (var_3746_cast_fp16, attn_output_73_cast_fp16))[name = string("attn_output_75_cast_fp16")]; tensor var_3764_perm_0 = const()[name = string("op_3764_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_3769 = const()[name = string("op_3769"), val = tensor([1, 1024, 1, 16])]; tensor var_3764_cast_fp16 = transpose(perm = var_3764_perm_0, x = attn_output_75_cast_fp16)[name = string("transpose_33")]; tensor x_221_cast_fp16 = reshape(shape = var_3769, x = var_3764_cast_fp16)[name = string("x_221_cast_fp16")]; string hidden_states_75_pad_type_0 = const()[name = string("hidden_states_75_pad_type_0"), val = string("valid")]; tensor hidden_states_75_strides_0 = const()[name = string("hidden_states_75_strides_0"), val = tensor([1, 1])]; tensor hidden_states_75_pad_0 = const()[name = string("hidden_states_75_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_75_dilations_0 = const()[name = string("hidden_states_75_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_75_groups_0 = const()[name = string("hidden_states_75_groups_0"), val = int32(1)]; tensor var_3776_to_fp16 = const()[name = string("op_3776_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(369745216)))]; tensor hidden_states_75_cast_fp16 = conv(dilations = hidden_states_75_dilations_0, groups = hidden_states_75_groups_0, pad = hidden_states_75_pad_0, pad_type = hidden_states_75_pad_type_0, strides = hidden_states_75_strides_0, weight = var_3776_to_fp16, x = x_221_cast_fp16)[name = string("hidden_states_75_cast_fp16")]; tensor x_223_cast_fp16 = add(x = x_215_cast_fp16, y = hidden_states_75_cast_fp16)[name = string("x_223_cast_fp16")]; int32 var_3788 = const()[name = string("op_3788"), val = int32(1)]; fp16 const_133_promoted_to_fp16 = const()[name = string("const_133_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_3791_cast_fp16 = mul(x = x_223_cast_fp16, y = const_133_promoted_to_fp16)[name = string("op_3791_cast_fp16")]; bool x_225_interleave_0 = const()[name = string("x_225_interleave_0"), val = bool(false)]; tensor x_225_cast_fp16 = concat(axis = var_3788, interleave = x_225_interleave_0, values = (x_223_cast_fp16, var_3791_cast_fp16))[name = string("x_225_cast_fp16")]; tensor out_151_axes_0 = const()[name = string("out_151_axes_0"), val = tensor([1])]; fp16 var_3801_to_fp16 = const()[name = string("op_3801_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_151_cast_fp16 = layer_norm(axes = out_151_axes_0, epsilon = var_3801_to_fp16, x = x_225_cast_fp16)[name = string("out_151_cast_fp16")]; tensor layer_layers_12_post_attention_layernorm_weight_to_fp16 = const()[name = string("layer_layers_12_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(371842432)))]; tensor out_153_cast_fp16 = mul(x = out_151_cast_fp16, y = layer_layers_12_post_attention_layernorm_weight_to_fp16)[name = string("out_153_cast_fp16")]; tensor var_3807_split_sizes_0 = const()[name = string("op_3807_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_3807_axis_0 = const()[name = string("op_3807_axis_0"), val = int32(1)]; tensor var_3807_cast_fp16_0, tensor var_3807_cast_fp16_1 = split(axis = var_3807_axis_0, split_sizes = var_3807_split_sizes_0, x = out_153_cast_fp16)[name = string("op_3807_cast_fp16")]; string input_25_pad_type_0 = const()[name = string("input_25_pad_type_0"), val = string("valid")]; tensor input_25_strides_0 = const()[name = string("input_25_strides_0"), val = tensor([1, 1])]; tensor input_25_pad_0 = const()[name = string("input_25_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_25_dilations_0 = const()[name = string("input_25_dilations_0"), val = tensor([1, 1])]; int32 input_25_groups_0 = const()[name = string("input_25_groups_0"), val = int32(1)]; tensor var_3812_to_fp16 = const()[name = string("op_3812_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(371846592)))]; tensor input_25_cast_fp16 = conv(dilations = input_25_dilations_0, groups = input_25_groups_0, pad = input_25_pad_0, pad_type = input_25_pad_type_0, strides = input_25_strides_0, weight = var_3812_to_fp16, x = var_3807_cast_fp16_0)[name = string("input_25_cast_fp16")]; tensor var_3823_cast_fp16 = silu(x = input_25_cast_fp16)[name = string("op_3823_cast_fp16")]; string var_3828_pad_type_0 = const()[name = string("op_3828_pad_type_0"), val = string("valid")]; tensor var_3828_strides_0 = const()[name = string("op_3828_strides_0"), val = tensor([1, 1])]; tensor var_3828_pad_0 = const()[name = string("op_3828_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_3828_dilations_0 = const()[name = string("op_3828_dilations_0"), val = tensor([1, 1])]; int32 var_3828_groups_0 = const()[name = string("op_3828_groups_0"), val = int32(1)]; tensor var_3811_to_fp16 = const()[name = string("op_3811_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(380235264)))]; tensor var_3828_cast_fp16 = conv(dilations = var_3828_dilations_0, groups = var_3828_groups_0, pad = var_3828_pad_0, pad_type = var_3828_pad_type_0, strides = var_3828_strides_0, weight = var_3811_to_fp16, x = var_3807_cast_fp16_0)[name = string("op_3828_cast_fp16")]; tensor x_231_cast_fp16 = mul(x = var_3823_cast_fp16, y = var_3828_cast_fp16)[name = string("x_231_cast_fp16")]; string hidden_states_77_pad_type_0 = const()[name = string("hidden_states_77_pad_type_0"), val = string("valid")]; tensor hidden_states_77_strides_0 = const()[name = string("hidden_states_77_strides_0"), val = tensor([1, 1])]; tensor hidden_states_77_pad_0 = const()[name = string("hidden_states_77_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_77_dilations_0 = const()[name = string("hidden_states_77_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_77_groups_0 = const()[name = string("hidden_states_77_groups_0"), val = int32(1)]; tensor var_3810_to_fp16 = const()[name = string("op_3810_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(388623936)))]; tensor hidden_states_77_cast_fp16 = conv(dilations = hidden_states_77_dilations_0, groups = hidden_states_77_groups_0, pad = hidden_states_77_pad_0, pad_type = hidden_states_77_pad_type_0, strides = hidden_states_77_strides_0, weight = var_3810_to_fp16, x = x_231_cast_fp16)[name = string("hidden_states_77_cast_fp16")]; tensor x_233_cast_fp16 = add(x = x_223_cast_fp16, y = hidden_states_77_cast_fp16)[name = string("x_233_cast_fp16")]; int32 var_3841 = const()[name = string("op_3841"), val = int32(1)]; fp16 const_134_promoted_to_fp16 = const()[name = string("const_134_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_3844_cast_fp16 = mul(x = x_233_cast_fp16, y = const_134_promoted_to_fp16)[name = string("op_3844_cast_fp16")]; bool x_235_interleave_0 = const()[name = string("x_235_interleave_0"), val = bool(false)]; tensor x_235_cast_fp16 = concat(axis = var_3841, interleave = x_235_interleave_0, values = (x_233_cast_fp16, var_3844_cast_fp16))[name = string("x_235_cast_fp16")]; tensor out_157_axes_0 = const()[name = string("out_157_axes_0"), val = tensor([1])]; fp16 var_3854_to_fp16 = const()[name = string("op_3854_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_157_cast_fp16 = layer_norm(axes = out_157_axes_0, epsilon = var_3854_to_fp16, x = x_235_cast_fp16)[name = string("out_157_cast_fp16")]; tensor layer_layers_13_input_layernorm_weight_to_fp16 = const()[name = string("layer_layers_13_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(397012608)))]; tensor out_159_cast_fp16 = mul(x = out_157_cast_fp16, y = layer_layers_13_input_layernorm_weight_to_fp16)[name = string("out_159_cast_fp16")]; tensor var_3860_split_sizes_0 = const()[name = string("op_3860_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_3860_axis_0 = const()[name = string("op_3860_axis_0"), val = int32(1)]; tensor var_3860_cast_fp16_0, tensor var_3860_cast_fp16_1 = split(axis = var_3860_axis_0, split_sizes = var_3860_split_sizes_0, x = out_159_cast_fp16)[name = string("op_3860_cast_fp16")]; string query_states_53_pad_type_0 = const()[name = string("query_states_53_pad_type_0"), val = string("valid")]; tensor query_states_53_strides_0 = const()[name = string("query_states_53_strides_0"), val = tensor([1, 1])]; tensor query_states_53_pad_0 = const()[name = string("query_states_53_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_states_53_dilations_0 = const()[name = string("query_states_53_dilations_0"), val = tensor([1, 1])]; int32 query_states_53_groups_0 = const()[name = string("query_states_53_groups_0"), val = int32(1)]; tensor var_3882_to_fp16 = const()[name = string("op_3882_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(397016768)))]; tensor query_states_53_cast_fp16 = conv(dilations = query_states_53_dilations_0, groups = query_states_53_groups_0, pad = query_states_53_pad_0, pad_type = query_states_53_pad_type_0, strides = query_states_53_strides_0, weight = var_3882_to_fp16, x = var_3860_cast_fp16_0)[name = string("query_states_53_cast_fp16")]; string key_states_53_pad_type_0 = const()[name = string("key_states_53_pad_type_0"), val = string("valid")]; tensor key_states_53_strides_0 = const()[name = string("key_states_53_strides_0"), val = tensor([1, 1])]; tensor key_states_53_pad_0 = const()[name = string("key_states_53_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_states_53_dilations_0 = const()[name = string("key_states_53_dilations_0"), val = tensor([1, 1])]; int32 key_states_53_groups_0 = const()[name = string("key_states_53_groups_0"), val = int32(1)]; tensor var_3893_to_fp16 = const()[name = string("op_3893_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(399113984)))]; tensor key_states_53_cast_fp16 = conv(dilations = key_states_53_dilations_0, groups = key_states_53_groups_0, pad = key_states_53_pad_0, pad_type = key_states_53_pad_type_0, strides = key_states_53_strides_0, weight = var_3893_to_fp16, x = var_3860_cast_fp16_0)[name = string("key_states_53_cast_fp16")]; string value_states_53_pad_type_0 = const()[name = string("value_states_53_pad_type_0"), val = string("valid")]; tensor value_states_53_strides_0 = const()[name = string("value_states_53_strides_0"), val = tensor([1, 1])]; tensor value_states_53_pad_0 = const()[name = string("value_states_53_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_states_53_dilations_0 = const()[name = string("value_states_53_dilations_0"), val = tensor([1, 1])]; int32 value_states_53_groups_0 = const()[name = string("value_states_53_groups_0"), val = int32(1)]; tensor var_3904_to_fp16 = const()[name = string("op_3904_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(399376192)))]; tensor value_states_53_cast_fp16 = conv(dilations = value_states_53_dilations_0, groups = value_states_53_groups_0, pad = value_states_53_pad_0, pad_type = value_states_53_pad_type_0, strides = value_states_53_strides_0, weight = var_3904_to_fp16, x = var_3860_cast_fp16_0)[name = string("value_states_53_cast_fp16")]; tensor var_3912 = const()[name = string("op_3912"), val = tensor([1, 16, 64, 16])]; tensor embed_53_cast_fp16 = reshape(shape = var_3912, x = query_states_53_cast_fp16)[name = string("embed_53_cast_fp16")]; tensor var_3916 = const()[name = string("op_3916"), val = tensor([1, 2, 64, 16])]; tensor var_3917_cast_fp16 = reshape(shape = var_3916, x = key_states_53_cast_fp16)[name = string("op_3917_cast_fp16")]; tensor embed_55_perm_0 = const()[name = string("embed_55_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_3923 = const()[name = string("op_3923"), val = tensor([1, 2, 64, 16])]; tensor var_3924_cast_fp16 = reshape(shape = var_3923, x = value_states_53_cast_fp16)[name = string("op_3924_cast_fp16")]; tensor value_states_55_perm_0 = const()[name = string("value_states_55_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_3928_cast_fp16 = mul(x = embed_53_cast_fp16, y = cos_1_cast_fp16)[name = string("op_3928_cast_fp16")]; tensor var_3929_split_sizes_0 = const()[name = string("op_3929_split_sizes_0"), val = tensor([32, 32])]; int32 var_3929_axis_0 = const()[name = string("op_3929_axis_0"), val = int32(-2)]; tensor var_3929_cast_fp16_0, tensor var_3929_cast_fp16_1 = split(axis = var_3929_axis_0, split_sizes = var_3929_split_sizes_0, x = embed_53_cast_fp16)[name = string("op_3929_cast_fp16")]; fp16 const_137_promoted_to_fp16 = const()[name = string("const_137_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_3931_cast_fp16 = mul(x = var_3929_cast_fp16_1, y = const_137_promoted_to_fp16)[name = string("op_3931_cast_fp16")]; int32 var_3933 = const()[name = string("op_3933"), val = int32(-2)]; bool var_3934_interleave_0 = const()[name = string("op_3934_interleave_0"), val = bool(false)]; tensor var_3934_cast_fp16 = concat(axis = var_3933, interleave = var_3934_interleave_0, values = (var_3931_cast_fp16, var_3929_cast_fp16_0))[name = string("op_3934_cast_fp16")]; tensor var_3935_cast_fp16 = mul(x = var_3934_cast_fp16, y = sin_1_cast_fp16)[name = string("op_3935_cast_fp16")]; tensor query_states_55_cast_fp16 = add(x = var_3928_cast_fp16, y = var_3935_cast_fp16)[name = string("query_states_55_cast_fp16")]; tensor embed_55_cast_fp16 = transpose(perm = embed_55_perm_0, x = var_3917_cast_fp16)[name = string("transpose_32")]; tensor var_3938_cast_fp16 = mul(x = embed_55_cast_fp16, y = cos_cast_fp16)[name = string("op_3938_cast_fp16")]; tensor var_3939_split_sizes_0 = const()[name = string("op_3939_split_sizes_0"), val = tensor([32, 32])]; int32 var_3939_axis_0 = const()[name = string("op_3939_axis_0"), val = int32(-1)]; tensor var_3939_cast_fp16_0, tensor var_3939_cast_fp16_1 = split(axis = var_3939_axis_0, split_sizes = var_3939_split_sizes_0, x = embed_55_cast_fp16)[name = string("op_3939_cast_fp16")]; fp16 const_138_promoted_to_fp16 = const()[name = string("const_138_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_3941_cast_fp16 = mul(x = var_3939_cast_fp16_1, y = const_138_promoted_to_fp16)[name = string("op_3941_cast_fp16")]; int32 var_3943 = const()[name = string("op_3943"), val = int32(-1)]; bool var_3944_interleave_0 = const()[name = string("op_3944_interleave_0"), val = bool(false)]; tensor var_3944_cast_fp16 = concat(axis = var_3943, interleave = var_3944_interleave_0, values = (var_3941_cast_fp16, var_3939_cast_fp16_0))[name = string("op_3944_cast_fp16")]; tensor var_3945_cast_fp16 = mul(x = var_3944_cast_fp16, y = sin_cast_fp16)[name = string("op_3945_cast_fp16")]; tensor key_states_55_cast_fp16 = add(x = var_3938_cast_fp16, y = var_3945_cast_fp16)[name = string("key_states_55_cast_fp16")]; tensor expand_dims_131 = const()[name = string("expand_dims_131"), val = tensor([13])]; tensor expand_dims_132 = const()[name = string("expand_dims_132"), val = tensor([0])]; tensor expand_dims_134 = const()[name = string("expand_dims_134"), val = tensor([14])]; tensor concat_107_values3_0 = const()[name = string("concat_107_values3_0"), val = tensor([0])]; int32 concat_107_axis_0 = const()[name = string("concat_107_axis_0"), val = int32(0)]; bool concat_107_interleave_0 = const()[name = string("concat_107_interleave_0"), val = bool(false)]; tensor concat_107 = concat(axis = concat_107_axis_0, interleave = concat_107_interleave_0, values = (expand_dims_131, expand_dims_132, position_id, concat_107_values3_0))[name = string("concat_107")]; tensor concat_108_values1_0 = const()[name = string("concat_108_values1_0"), val = tensor([0])]; tensor concat_108_values3_0 = const()[name = string("concat_108_values3_0"), val = tensor([0])]; int32 concat_108_axis_0 = const()[name = string("concat_108_axis_0"), val = int32(0)]; bool concat_108_interleave_0 = const()[name = string("concat_108_interleave_0"), val = bool(false)]; tensor concat_108 = concat(axis = concat_108_axis_0, interleave = concat_108_interleave_0, values = (expand_dims_134, concat_108_values1_0, var_426, concat_108_values3_0))[name = string("concat_108")]; tensor key_cache_internal_tensor_assign_14_stride_0 = const()[name = string("key_cache_internal_tensor_assign_14_stride_0"), val = tensor([1, 1, 1, 1])]; tensor key_cache_internal_tensor_assign_14_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_14_begin_mask_0"), val = tensor([false, false, false, true])]; tensor key_cache_internal_tensor_assign_14_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_14_end_mask_0"), val = tensor([false, true, false, true])]; tensor key_cache_internal_tensor_assign_14_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_14_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor key_cache_internal_tensor_assign_14_cast_fp16 = slice_update(begin = concat_107, begin_mask = key_cache_internal_tensor_assign_14_begin_mask_0, end = concat_108, end_mask = key_cache_internal_tensor_assign_14_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_14_squeeze_mask_0, stride = key_cache_internal_tensor_assign_14_stride_0, update = key_states_55_cast_fp16, x = coreml_update_state_72)[name = string("key_cache_internal_tensor_assign_14_cast_fp16")]; write_state(data = key_cache_internal_tensor_assign_14_cast_fp16, input = key_cache)[name = string("coreml_update_state_288_write_state")]; tensor coreml_update_state_74 = read_state(input = key_cache)[name = string("coreml_update_state_288")]; tensor value_cache_internal_tensor_assign_14_stride_0 = const()[name = string("value_cache_internal_tensor_assign_14_stride_0"), val = tensor([1, 1, 1, 1])]; tensor value_cache_internal_tensor_assign_14_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_14_begin_mask_0"), val = tensor([false, false, false, true])]; tensor value_cache_internal_tensor_assign_14_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_14_end_mask_0"), val = tensor([false, true, false, true])]; tensor value_cache_internal_tensor_assign_14_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_14_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor value_states_55_cast_fp16 = transpose(perm = value_states_55_perm_0, x = var_3924_cast_fp16)[name = string("transpose_31")]; tensor value_cache_internal_tensor_assign_14_cast_fp16 = slice_update(begin = concat_107, begin_mask = value_cache_internal_tensor_assign_14_begin_mask_0, end = concat_108, end_mask = value_cache_internal_tensor_assign_14_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_14_squeeze_mask_0, stride = value_cache_internal_tensor_assign_14_stride_0, update = value_states_55_cast_fp16, x = coreml_update_state_73)[name = string("value_cache_internal_tensor_assign_14_cast_fp16")]; write_state(data = value_cache_internal_tensor_assign_14_cast_fp16, input = value_cache)[name = string("coreml_update_state_289_write_state")]; tensor coreml_update_state_75 = read_state(input = value_cache)[name = string("coreml_update_state_289")]; tensor var_3988_begin_0 = const()[name = string("op_3988_begin_0"), val = tensor([13, 0, 0, 0])]; tensor var_3988_end_0 = const()[name = string("op_3988_end_0"), val = tensor([14, 2, 2048, 64])]; tensor var_3988_end_mask_0 = const()[name = string("op_3988_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_3988_cast_fp16 = slice_by_index(begin = var_3988_begin_0, end = var_3988_end_0, end_mask = var_3988_end_mask_0, x = coreml_update_state_74)[name = string("op_3988_cast_fp16")]; tensor tile_26 = const()[name = string("tile_26"), val = tensor([1, 1])]; int32 var_3991_axis_0 = const()[name = string("op_3991_axis_0"), val = int32(1)]; tensor var_3991_cast_fp16_0, tensor var_3991_cast_fp16_1 = split(axis = var_3991_axis_0, split_sizes = tile_26, x = var_3988_cast_fp16)[name = string("op_3991_cast_fp16")]; tensor var_3998_begin_0 = const()[name = string("op_3998_begin_0"), val = tensor([13, 0, 0, 0])]; tensor var_3998_end_0 = const()[name = string("op_3998_end_0"), val = tensor([14, 2, 2048, 64])]; tensor var_3998_end_mask_0 = const()[name = string("op_3998_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_3998_cast_fp16 = slice_by_index(begin = var_3998_begin_0, end = var_3998_end_0, end_mask = var_3998_end_mask_0, x = coreml_update_state_75)[name = string("op_3998_cast_fp16")]; tensor tile_27 = const()[name = string("tile_27"), val = tensor([1, 1])]; int32 var_4001_axis_0 = const()[name = string("op_4001_axis_0"), val = int32(1)]; tensor var_4001_cast_fp16_0, tensor var_4001_cast_fp16_1 = split(axis = var_4001_axis_0, split_sizes = tile_27, x = var_3998_cast_fp16)[name = string("op_4001_cast_fp16")]; tensor var_4004_split_sizes_0 = const()[name = string("op_4004_split_sizes_0"), val = tensor([8, 8])]; int32 var_4004_axis_0 = const()[name = string("op_4004_axis_0"), val = int32(1)]; tensor var_4004_cast_fp16_0, tensor var_4004_cast_fp16_1 = split(axis = var_4004_axis_0, split_sizes = var_4004_split_sizes_0, x = query_states_55_cast_fp16)[name = string("op_4004_cast_fp16")]; bool attn_weights_209_transpose_x_0 = const()[name = string("attn_weights_209_transpose_x_0"), val = bool(false)]; bool attn_weights_209_transpose_y_0 = const()[name = string("attn_weights_209_transpose_y_0"), val = bool(false)]; tensor attn_weights_209_cast_fp16 = matmul(transpose_x = attn_weights_209_transpose_x_0, transpose_y = attn_weights_209_transpose_y_0, x = var_3991_cast_fp16_0, y = var_4004_cast_fp16_0)[name = string("attn_weights_209_cast_fp16")]; fp16 _inversed_attn_weights_211_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_211_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_211_cast_fp16 = mul(x = attn_weights_209_cast_fp16, y = _inversed_attn_weights_211_y_0_to_fp16)[name = string("_inversed_attn_weights_211_cast_fp16")]; tensor attn_weights_213_cast_fp16 = add(x = _inversed_attn_weights_211_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_213_cast_fp16")]; int32 var_4011 = const()[name = string("op_4011"), val = int32(2)]; tensor attn_weights_215_cast_fp16 = softmax(axis = var_4011, x = attn_weights_213_cast_fp16)[name = string("attn_weights_215_cast_fp16")]; bool var_4017_transpose_x_1 = const()[name = string("op_4017_transpose_x_1"), val = bool(true)]; bool var_4017_transpose_y_1 = const()[name = string("op_4017_transpose_y_1"), val = bool(false)]; tensor var_4017_cast_fp16 = matmul(transpose_x = var_4017_transpose_x_1, transpose_y = var_4017_transpose_y_1, x = attn_weights_215_cast_fp16, y = var_4001_cast_fp16_0)[name = string("op_4017_cast_fp16")]; bool attn_weights_217_transpose_x_0 = const()[name = string("attn_weights_217_transpose_x_0"), val = bool(false)]; bool attn_weights_217_transpose_y_0 = const()[name = string("attn_weights_217_transpose_y_0"), val = bool(false)]; tensor attn_weights_217_cast_fp16 = matmul(transpose_x = attn_weights_217_transpose_x_0, transpose_y = attn_weights_217_transpose_y_0, x = var_3991_cast_fp16_1, y = var_4004_cast_fp16_1)[name = string("attn_weights_217_cast_fp16")]; fp16 _inversed_attn_weights_219_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_219_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_219_cast_fp16 = mul(x = attn_weights_217_cast_fp16, y = _inversed_attn_weights_219_y_0_to_fp16)[name = string("_inversed_attn_weights_219_cast_fp16")]; tensor attn_weights_221_cast_fp16 = add(x = _inversed_attn_weights_219_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_221_cast_fp16")]; int32 var_4023 = const()[name = string("op_4023"), val = int32(2)]; tensor attn_weights_223_cast_fp16 = softmax(axis = var_4023, x = attn_weights_221_cast_fp16)[name = string("attn_weights_223_cast_fp16")]; bool attn_output_79_transpose_x_1 = const()[name = string("attn_output_79_transpose_x_1"), val = bool(true)]; bool attn_output_79_transpose_y_1 = const()[name = string("attn_output_79_transpose_y_1"), val = bool(false)]; tensor attn_output_79_cast_fp16 = matmul(transpose_x = attn_output_79_transpose_x_1, transpose_y = attn_output_79_transpose_y_1, x = attn_weights_223_cast_fp16, y = var_4001_cast_fp16_1)[name = string("attn_output_79_cast_fp16")]; int32 var_4031 = const()[name = string("op_4031"), val = int32(1)]; bool attn_output_81_interleave_0 = const()[name = string("attn_output_81_interleave_0"), val = bool(false)]; tensor attn_output_81_cast_fp16 = concat(axis = var_4031, interleave = attn_output_81_interleave_0, values = (var_4017_cast_fp16, attn_output_79_cast_fp16))[name = string("attn_output_81_cast_fp16")]; tensor var_4035_perm_0 = const()[name = string("op_4035_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_4040 = const()[name = string("op_4040"), val = tensor([1, 1024, 1, 16])]; tensor var_4035_cast_fp16 = transpose(perm = var_4035_perm_0, x = attn_output_81_cast_fp16)[name = string("transpose_30")]; tensor x_239_cast_fp16 = reshape(shape = var_4040, x = var_4035_cast_fp16)[name = string("x_239_cast_fp16")]; string hidden_states_81_pad_type_0 = const()[name = string("hidden_states_81_pad_type_0"), val = string("valid")]; tensor hidden_states_81_strides_0 = const()[name = string("hidden_states_81_strides_0"), val = tensor([1, 1])]; tensor hidden_states_81_pad_0 = const()[name = string("hidden_states_81_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_81_dilations_0 = const()[name = string("hidden_states_81_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_81_groups_0 = const()[name = string("hidden_states_81_groups_0"), val = int32(1)]; tensor var_4047_to_fp16 = const()[name = string("op_4047_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(399638400)))]; tensor hidden_states_81_cast_fp16 = conv(dilations = hidden_states_81_dilations_0, groups = hidden_states_81_groups_0, pad = hidden_states_81_pad_0, pad_type = hidden_states_81_pad_type_0, strides = hidden_states_81_strides_0, weight = var_4047_to_fp16, x = x_239_cast_fp16)[name = string("hidden_states_81_cast_fp16")]; tensor x_241_cast_fp16 = add(x = x_233_cast_fp16, y = hidden_states_81_cast_fp16)[name = string("x_241_cast_fp16")]; int32 var_4059 = const()[name = string("op_4059"), val = int32(1)]; fp16 const_143_promoted_to_fp16 = const()[name = string("const_143_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_4062_cast_fp16 = mul(x = x_241_cast_fp16, y = const_143_promoted_to_fp16)[name = string("op_4062_cast_fp16")]; bool x_243_interleave_0 = const()[name = string("x_243_interleave_0"), val = bool(false)]; tensor x_243_cast_fp16 = concat(axis = var_4059, interleave = x_243_interleave_0, values = (x_241_cast_fp16, var_4062_cast_fp16))[name = string("x_243_cast_fp16")]; tensor out_163_axes_0 = const()[name = string("out_163_axes_0"), val = tensor([1])]; fp16 var_4072_to_fp16 = const()[name = string("op_4072_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_163_cast_fp16 = layer_norm(axes = out_163_axes_0, epsilon = var_4072_to_fp16, x = x_243_cast_fp16)[name = string("out_163_cast_fp16")]; tensor layer_layers_13_post_attention_layernorm_weight_to_fp16 = const()[name = string("layer_layers_13_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(401735616)))]; tensor out_165_cast_fp16 = mul(x = out_163_cast_fp16, y = layer_layers_13_post_attention_layernorm_weight_to_fp16)[name = string("out_165_cast_fp16")]; tensor var_4078_split_sizes_0 = const()[name = string("op_4078_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_4078_axis_0 = const()[name = string("op_4078_axis_0"), val = int32(1)]; tensor var_4078_cast_fp16_0, tensor var_4078_cast_fp16_1 = split(axis = var_4078_axis_0, split_sizes = var_4078_split_sizes_0, x = out_165_cast_fp16)[name = string("op_4078_cast_fp16")]; string input_27_pad_type_0 = const()[name = string("input_27_pad_type_0"), val = string("valid")]; tensor input_27_strides_0 = const()[name = string("input_27_strides_0"), val = tensor([1, 1])]; tensor input_27_pad_0 = const()[name = string("input_27_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_27_dilations_0 = const()[name = string("input_27_dilations_0"), val = tensor([1, 1])]; int32 input_27_groups_0 = const()[name = string("input_27_groups_0"), val = int32(1)]; tensor var_4083_to_fp16 = const()[name = string("op_4083_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(401739776)))]; tensor input_27_cast_fp16 = conv(dilations = input_27_dilations_0, groups = input_27_groups_0, pad = input_27_pad_0, pad_type = input_27_pad_type_0, strides = input_27_strides_0, weight = var_4083_to_fp16, x = var_4078_cast_fp16_0)[name = string("input_27_cast_fp16")]; tensor var_4094_cast_fp16 = silu(x = input_27_cast_fp16)[name = string("op_4094_cast_fp16")]; string var_4099_pad_type_0 = const()[name = string("op_4099_pad_type_0"), val = string("valid")]; tensor var_4099_strides_0 = const()[name = string("op_4099_strides_0"), val = tensor([1, 1])]; tensor var_4099_pad_0 = const()[name = string("op_4099_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_4099_dilations_0 = const()[name = string("op_4099_dilations_0"), val = tensor([1, 1])]; int32 var_4099_groups_0 = const()[name = string("op_4099_groups_0"), val = int32(1)]; tensor var_4082_to_fp16 = const()[name = string("op_4082_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(410128448)))]; tensor var_4099_cast_fp16 = conv(dilations = var_4099_dilations_0, groups = var_4099_groups_0, pad = var_4099_pad_0, pad_type = var_4099_pad_type_0, strides = var_4099_strides_0, weight = var_4082_to_fp16, x = var_4078_cast_fp16_0)[name = string("op_4099_cast_fp16")]; tensor x_249_cast_fp16 = mul(x = var_4094_cast_fp16, y = var_4099_cast_fp16)[name = string("x_249_cast_fp16")]; string hidden_states_83_pad_type_0 = const()[name = string("hidden_states_83_pad_type_0"), val = string("valid")]; tensor hidden_states_83_strides_0 = const()[name = string("hidden_states_83_strides_0"), val = tensor([1, 1])]; tensor hidden_states_83_pad_0 = const()[name = string("hidden_states_83_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_83_dilations_0 = const()[name = string("hidden_states_83_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_83_groups_0 = const()[name = string("hidden_states_83_groups_0"), val = int32(1)]; tensor var_4081_to_fp16 = const()[name = string("op_4081_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(418517120)))]; tensor hidden_states_83_cast_fp16 = conv(dilations = hidden_states_83_dilations_0, groups = hidden_states_83_groups_0, pad = hidden_states_83_pad_0, pad_type = hidden_states_83_pad_type_0, strides = hidden_states_83_strides_0, weight = var_4081_to_fp16, x = x_249_cast_fp16)[name = string("hidden_states_83_cast_fp16")]; tensor x_251_cast_fp16 = add(x = x_241_cast_fp16, y = hidden_states_83_cast_fp16)[name = string("x_251_cast_fp16")]; int32 var_4112 = const()[name = string("op_4112"), val = int32(1)]; fp16 const_144_promoted_to_fp16 = const()[name = string("const_144_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_4115_cast_fp16 = mul(x = x_251_cast_fp16, y = const_144_promoted_to_fp16)[name = string("op_4115_cast_fp16")]; bool x_253_interleave_0 = const()[name = string("x_253_interleave_0"), val = bool(false)]; tensor x_253_cast_fp16 = concat(axis = var_4112, interleave = x_253_interleave_0, values = (x_251_cast_fp16, var_4115_cast_fp16))[name = string("x_253_cast_fp16")]; tensor out_169_axes_0 = const()[name = string("out_169_axes_0"), val = tensor([1])]; fp16 var_4125_to_fp16 = const()[name = string("op_4125_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_169_cast_fp16 = layer_norm(axes = out_169_axes_0, epsilon = var_4125_to_fp16, x = x_253_cast_fp16)[name = string("out_169_cast_fp16")]; tensor layer_layers_14_input_layernorm_weight_to_fp16 = const()[name = string("layer_layers_14_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(426905792)))]; tensor out_171_cast_fp16 = mul(x = out_169_cast_fp16, y = layer_layers_14_input_layernorm_weight_to_fp16)[name = string("out_171_cast_fp16")]; tensor var_4131_split_sizes_0 = const()[name = string("op_4131_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_4131_axis_0 = const()[name = string("op_4131_axis_0"), val = int32(1)]; tensor var_4131_cast_fp16_0, tensor var_4131_cast_fp16_1 = split(axis = var_4131_axis_0, split_sizes = var_4131_split_sizes_0, x = out_171_cast_fp16)[name = string("op_4131_cast_fp16")]; string query_states_57_pad_type_0 = const()[name = string("query_states_57_pad_type_0"), val = string("valid")]; tensor query_states_57_strides_0 = const()[name = string("query_states_57_strides_0"), val = tensor([1, 1])]; tensor query_states_57_pad_0 = const()[name = string("query_states_57_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_states_57_dilations_0 = const()[name = string("query_states_57_dilations_0"), val = tensor([1, 1])]; int32 query_states_57_groups_0 = const()[name = string("query_states_57_groups_0"), val = int32(1)]; tensor var_4153_to_fp16 = const()[name = string("op_4153_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(426909952)))]; tensor query_states_57_cast_fp16 = conv(dilations = query_states_57_dilations_0, groups = query_states_57_groups_0, pad = query_states_57_pad_0, pad_type = query_states_57_pad_type_0, strides = query_states_57_strides_0, weight = var_4153_to_fp16, x = var_4131_cast_fp16_0)[name = string("query_states_57_cast_fp16")]; string key_states_57_pad_type_0 = const()[name = string("key_states_57_pad_type_0"), val = string("valid")]; tensor key_states_57_strides_0 = const()[name = string("key_states_57_strides_0"), val = tensor([1, 1])]; tensor key_states_57_pad_0 = const()[name = string("key_states_57_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_states_57_dilations_0 = const()[name = string("key_states_57_dilations_0"), val = tensor([1, 1])]; int32 key_states_57_groups_0 = const()[name = string("key_states_57_groups_0"), val = int32(1)]; tensor var_4164_to_fp16 = const()[name = string("op_4164_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(429007168)))]; tensor key_states_57_cast_fp16 = conv(dilations = key_states_57_dilations_0, groups = key_states_57_groups_0, pad = key_states_57_pad_0, pad_type = key_states_57_pad_type_0, strides = key_states_57_strides_0, weight = var_4164_to_fp16, x = var_4131_cast_fp16_0)[name = string("key_states_57_cast_fp16")]; string value_states_57_pad_type_0 = const()[name = string("value_states_57_pad_type_0"), val = string("valid")]; tensor value_states_57_strides_0 = const()[name = string("value_states_57_strides_0"), val = tensor([1, 1])]; tensor value_states_57_pad_0 = const()[name = string("value_states_57_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_states_57_dilations_0 = const()[name = string("value_states_57_dilations_0"), val = tensor([1, 1])]; int32 value_states_57_groups_0 = const()[name = string("value_states_57_groups_0"), val = int32(1)]; tensor var_4175_to_fp16 = const()[name = string("op_4175_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(429269376)))]; tensor value_states_57_cast_fp16 = conv(dilations = value_states_57_dilations_0, groups = value_states_57_groups_0, pad = value_states_57_pad_0, pad_type = value_states_57_pad_type_0, strides = value_states_57_strides_0, weight = var_4175_to_fp16, x = var_4131_cast_fp16_0)[name = string("value_states_57_cast_fp16")]; tensor var_4183 = const()[name = string("op_4183"), val = tensor([1, 16, 64, 16])]; tensor embed_57_cast_fp16 = reshape(shape = var_4183, x = query_states_57_cast_fp16)[name = string("embed_57_cast_fp16")]; tensor var_4187 = const()[name = string("op_4187"), val = tensor([1, 2, 64, 16])]; tensor var_4188_cast_fp16 = reshape(shape = var_4187, x = key_states_57_cast_fp16)[name = string("op_4188_cast_fp16")]; tensor embed_59_perm_0 = const()[name = string("embed_59_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_4194 = const()[name = string("op_4194"), val = tensor([1, 2, 64, 16])]; tensor var_4195_cast_fp16 = reshape(shape = var_4194, x = value_states_57_cast_fp16)[name = string("op_4195_cast_fp16")]; tensor value_states_59_perm_0 = const()[name = string("value_states_59_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_4199_cast_fp16 = mul(x = embed_57_cast_fp16, y = cos_1_cast_fp16)[name = string("op_4199_cast_fp16")]; tensor var_4200_split_sizes_0 = const()[name = string("op_4200_split_sizes_0"), val = tensor([32, 32])]; int32 var_4200_axis_0 = const()[name = string("op_4200_axis_0"), val = int32(-2)]; tensor var_4200_cast_fp16_0, tensor var_4200_cast_fp16_1 = split(axis = var_4200_axis_0, split_sizes = var_4200_split_sizes_0, x = embed_57_cast_fp16)[name = string("op_4200_cast_fp16")]; fp16 const_147_promoted_to_fp16 = const()[name = string("const_147_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_4202_cast_fp16 = mul(x = var_4200_cast_fp16_1, y = const_147_promoted_to_fp16)[name = string("op_4202_cast_fp16")]; int32 var_4204 = const()[name = string("op_4204"), val = int32(-2)]; bool var_4205_interleave_0 = const()[name = string("op_4205_interleave_0"), val = bool(false)]; tensor var_4205_cast_fp16 = concat(axis = var_4204, interleave = var_4205_interleave_0, values = (var_4202_cast_fp16, var_4200_cast_fp16_0))[name = string("op_4205_cast_fp16")]; tensor var_4206_cast_fp16 = mul(x = var_4205_cast_fp16, y = sin_1_cast_fp16)[name = string("op_4206_cast_fp16")]; tensor query_states_59_cast_fp16 = add(x = var_4199_cast_fp16, y = var_4206_cast_fp16)[name = string("query_states_59_cast_fp16")]; tensor embed_59_cast_fp16 = transpose(perm = embed_59_perm_0, x = var_4188_cast_fp16)[name = string("transpose_29")]; tensor var_4209_cast_fp16 = mul(x = embed_59_cast_fp16, y = cos_cast_fp16)[name = string("op_4209_cast_fp16")]; tensor var_4210_split_sizes_0 = const()[name = string("op_4210_split_sizes_0"), val = tensor([32, 32])]; int32 var_4210_axis_0 = const()[name = string("op_4210_axis_0"), val = int32(-1)]; tensor var_4210_cast_fp16_0, tensor var_4210_cast_fp16_1 = split(axis = var_4210_axis_0, split_sizes = var_4210_split_sizes_0, x = embed_59_cast_fp16)[name = string("op_4210_cast_fp16")]; fp16 const_148_promoted_to_fp16 = const()[name = string("const_148_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_4212_cast_fp16 = mul(x = var_4210_cast_fp16_1, y = const_148_promoted_to_fp16)[name = string("op_4212_cast_fp16")]; int32 var_4214 = const()[name = string("op_4214"), val = int32(-1)]; bool var_4215_interleave_0 = const()[name = string("op_4215_interleave_0"), val = bool(false)]; tensor var_4215_cast_fp16 = concat(axis = var_4214, interleave = var_4215_interleave_0, values = (var_4212_cast_fp16, var_4210_cast_fp16_0))[name = string("op_4215_cast_fp16")]; tensor var_4216_cast_fp16 = mul(x = var_4215_cast_fp16, y = sin_cast_fp16)[name = string("op_4216_cast_fp16")]; tensor key_states_59_cast_fp16 = add(x = var_4209_cast_fp16, y = var_4216_cast_fp16)[name = string("key_states_59_cast_fp16")]; tensor expand_dims_141 = const()[name = string("expand_dims_141"), val = tensor([14])]; tensor expand_dims_142 = const()[name = string("expand_dims_142"), val = tensor([0])]; tensor expand_dims_144 = const()[name = string("expand_dims_144"), val = tensor([15])]; tensor concat_115_values3_0 = const()[name = string("concat_115_values3_0"), val = tensor([0])]; int32 concat_115_axis_0 = const()[name = string("concat_115_axis_0"), val = int32(0)]; bool concat_115_interleave_0 = const()[name = string("concat_115_interleave_0"), val = bool(false)]; tensor concat_115 = concat(axis = concat_115_axis_0, interleave = concat_115_interleave_0, values = (expand_dims_141, expand_dims_142, position_id, concat_115_values3_0))[name = string("concat_115")]; tensor concat_116_values1_0 = const()[name = string("concat_116_values1_0"), val = tensor([0])]; tensor concat_116_values3_0 = const()[name = string("concat_116_values3_0"), val = tensor([0])]; int32 concat_116_axis_0 = const()[name = string("concat_116_axis_0"), val = int32(0)]; bool concat_116_interleave_0 = const()[name = string("concat_116_interleave_0"), val = bool(false)]; tensor concat_116 = concat(axis = concat_116_axis_0, interleave = concat_116_interleave_0, values = (expand_dims_144, concat_116_values1_0, var_426, concat_116_values3_0))[name = string("concat_116")]; tensor key_cache_internal_tensor_assign_15_stride_0 = const()[name = string("key_cache_internal_tensor_assign_15_stride_0"), val = tensor([1, 1, 1, 1])]; tensor key_cache_internal_tensor_assign_15_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_15_begin_mask_0"), val = tensor([false, false, false, true])]; tensor key_cache_internal_tensor_assign_15_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_15_end_mask_0"), val = tensor([false, true, false, true])]; tensor key_cache_internal_tensor_assign_15_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_15_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor key_cache_internal_tensor_assign_15_cast_fp16 = slice_update(begin = concat_115, begin_mask = key_cache_internal_tensor_assign_15_begin_mask_0, end = concat_116, end_mask = key_cache_internal_tensor_assign_15_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_15_squeeze_mask_0, stride = key_cache_internal_tensor_assign_15_stride_0, update = key_states_59_cast_fp16, x = coreml_update_state_74)[name = string("key_cache_internal_tensor_assign_15_cast_fp16")]; write_state(data = key_cache_internal_tensor_assign_15_cast_fp16, input = key_cache)[name = string("coreml_update_state_290_write_state")]; tensor coreml_update_state_76 = read_state(input = key_cache)[name = string("coreml_update_state_290")]; tensor value_cache_internal_tensor_assign_15_stride_0 = const()[name = string("value_cache_internal_tensor_assign_15_stride_0"), val = tensor([1, 1, 1, 1])]; tensor value_cache_internal_tensor_assign_15_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_15_begin_mask_0"), val = tensor([false, false, false, true])]; tensor value_cache_internal_tensor_assign_15_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_15_end_mask_0"), val = tensor([false, true, false, true])]; tensor value_cache_internal_tensor_assign_15_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_15_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor value_states_59_cast_fp16 = transpose(perm = value_states_59_perm_0, x = var_4195_cast_fp16)[name = string("transpose_28")]; tensor value_cache_internal_tensor_assign_15_cast_fp16 = slice_update(begin = concat_115, begin_mask = value_cache_internal_tensor_assign_15_begin_mask_0, end = concat_116, end_mask = value_cache_internal_tensor_assign_15_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_15_squeeze_mask_0, stride = value_cache_internal_tensor_assign_15_stride_0, update = value_states_59_cast_fp16, x = coreml_update_state_75)[name = string("value_cache_internal_tensor_assign_15_cast_fp16")]; write_state(data = value_cache_internal_tensor_assign_15_cast_fp16, input = value_cache)[name = string("coreml_update_state_291_write_state")]; tensor coreml_update_state_77 = read_state(input = value_cache)[name = string("coreml_update_state_291")]; tensor var_4259_begin_0 = const()[name = string("op_4259_begin_0"), val = tensor([14, 0, 0, 0])]; tensor var_4259_end_0 = const()[name = string("op_4259_end_0"), val = tensor([15, 2, 2048, 64])]; tensor var_4259_end_mask_0 = const()[name = string("op_4259_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_4259_cast_fp16 = slice_by_index(begin = var_4259_begin_0, end = var_4259_end_0, end_mask = var_4259_end_mask_0, x = coreml_update_state_76)[name = string("op_4259_cast_fp16")]; tensor tile_28 = const()[name = string("tile_28"), val = tensor([1, 1])]; int32 var_4262_axis_0 = const()[name = string("op_4262_axis_0"), val = int32(1)]; tensor var_4262_cast_fp16_0, tensor var_4262_cast_fp16_1 = split(axis = var_4262_axis_0, split_sizes = tile_28, x = var_4259_cast_fp16)[name = string("op_4262_cast_fp16")]; tensor var_4269_begin_0 = const()[name = string("op_4269_begin_0"), val = tensor([14, 0, 0, 0])]; tensor var_4269_end_0 = const()[name = string("op_4269_end_0"), val = tensor([15, 2, 2048, 64])]; tensor var_4269_end_mask_0 = const()[name = string("op_4269_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_4269_cast_fp16 = slice_by_index(begin = var_4269_begin_0, end = var_4269_end_0, end_mask = var_4269_end_mask_0, x = coreml_update_state_77)[name = string("op_4269_cast_fp16")]; tensor tile_29 = const()[name = string("tile_29"), val = tensor([1, 1])]; int32 var_4272_axis_0 = const()[name = string("op_4272_axis_0"), val = int32(1)]; tensor var_4272_cast_fp16_0, tensor var_4272_cast_fp16_1 = split(axis = var_4272_axis_0, split_sizes = tile_29, x = var_4269_cast_fp16)[name = string("op_4272_cast_fp16")]; tensor var_4275_split_sizes_0 = const()[name = string("op_4275_split_sizes_0"), val = tensor([8, 8])]; int32 var_4275_axis_0 = const()[name = string("op_4275_axis_0"), val = int32(1)]; tensor var_4275_cast_fp16_0, tensor var_4275_cast_fp16_1 = split(axis = var_4275_axis_0, split_sizes = var_4275_split_sizes_0, x = query_states_59_cast_fp16)[name = string("op_4275_cast_fp16")]; bool attn_weights_225_transpose_x_0 = const()[name = string("attn_weights_225_transpose_x_0"), val = bool(false)]; bool attn_weights_225_transpose_y_0 = const()[name = string("attn_weights_225_transpose_y_0"), val = bool(false)]; tensor attn_weights_225_cast_fp16 = matmul(transpose_x = attn_weights_225_transpose_x_0, transpose_y = attn_weights_225_transpose_y_0, x = var_4262_cast_fp16_0, y = var_4275_cast_fp16_0)[name = string("attn_weights_225_cast_fp16")]; fp16 _inversed_attn_weights_227_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_227_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_227_cast_fp16 = mul(x = attn_weights_225_cast_fp16, y = _inversed_attn_weights_227_y_0_to_fp16)[name = string("_inversed_attn_weights_227_cast_fp16")]; tensor attn_weights_229_cast_fp16 = add(x = _inversed_attn_weights_227_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_229_cast_fp16")]; int32 var_4282 = const()[name = string("op_4282"), val = int32(2)]; tensor attn_weights_231_cast_fp16 = softmax(axis = var_4282, x = attn_weights_229_cast_fp16)[name = string("attn_weights_231_cast_fp16")]; bool var_4288_transpose_x_1 = const()[name = string("op_4288_transpose_x_1"), val = bool(true)]; bool var_4288_transpose_y_1 = const()[name = string("op_4288_transpose_y_1"), val = bool(false)]; tensor var_4288_cast_fp16 = matmul(transpose_x = var_4288_transpose_x_1, transpose_y = var_4288_transpose_y_1, x = attn_weights_231_cast_fp16, y = var_4272_cast_fp16_0)[name = string("op_4288_cast_fp16")]; bool attn_weights_233_transpose_x_0 = const()[name = string("attn_weights_233_transpose_x_0"), val = bool(false)]; bool attn_weights_233_transpose_y_0 = const()[name = string("attn_weights_233_transpose_y_0"), val = bool(false)]; tensor attn_weights_233_cast_fp16 = matmul(transpose_x = attn_weights_233_transpose_x_0, transpose_y = attn_weights_233_transpose_y_0, x = var_4262_cast_fp16_1, y = var_4275_cast_fp16_1)[name = string("attn_weights_233_cast_fp16")]; fp16 _inversed_attn_weights_235_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_235_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_235_cast_fp16 = mul(x = attn_weights_233_cast_fp16, y = _inversed_attn_weights_235_y_0_to_fp16)[name = string("_inversed_attn_weights_235_cast_fp16")]; tensor attn_weights_237_cast_fp16 = add(x = _inversed_attn_weights_235_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_237_cast_fp16")]; int32 var_4294 = const()[name = string("op_4294"), val = int32(2)]; tensor attn_weights_239_cast_fp16 = softmax(axis = var_4294, x = attn_weights_237_cast_fp16)[name = string("attn_weights_239_cast_fp16")]; bool attn_output_85_transpose_x_1 = const()[name = string("attn_output_85_transpose_x_1"), val = bool(true)]; bool attn_output_85_transpose_y_1 = const()[name = string("attn_output_85_transpose_y_1"), val = bool(false)]; tensor attn_output_85_cast_fp16 = matmul(transpose_x = attn_output_85_transpose_x_1, transpose_y = attn_output_85_transpose_y_1, x = attn_weights_239_cast_fp16, y = var_4272_cast_fp16_1)[name = string("attn_output_85_cast_fp16")]; int32 var_4302 = const()[name = string("op_4302"), val = int32(1)]; bool attn_output_87_interleave_0 = const()[name = string("attn_output_87_interleave_0"), val = bool(false)]; tensor attn_output_87_cast_fp16 = concat(axis = var_4302, interleave = attn_output_87_interleave_0, values = (var_4288_cast_fp16, attn_output_85_cast_fp16))[name = string("attn_output_87_cast_fp16")]; tensor var_4306_perm_0 = const()[name = string("op_4306_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_4311 = const()[name = string("op_4311"), val = tensor([1, 1024, 1, 16])]; tensor var_4306_cast_fp16 = transpose(perm = var_4306_perm_0, x = attn_output_87_cast_fp16)[name = string("transpose_27")]; tensor x_257_cast_fp16 = reshape(shape = var_4311, x = var_4306_cast_fp16)[name = string("x_257_cast_fp16")]; string hidden_states_87_pad_type_0 = const()[name = string("hidden_states_87_pad_type_0"), val = string("valid")]; tensor hidden_states_87_strides_0 = const()[name = string("hidden_states_87_strides_0"), val = tensor([1, 1])]; tensor hidden_states_87_pad_0 = const()[name = string("hidden_states_87_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_87_dilations_0 = const()[name = string("hidden_states_87_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_87_groups_0 = const()[name = string("hidden_states_87_groups_0"), val = int32(1)]; tensor var_4318_to_fp16 = const()[name = string("op_4318_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(429531584)))]; tensor hidden_states_87_cast_fp16 = conv(dilations = hidden_states_87_dilations_0, groups = hidden_states_87_groups_0, pad = hidden_states_87_pad_0, pad_type = hidden_states_87_pad_type_0, strides = hidden_states_87_strides_0, weight = var_4318_to_fp16, x = x_257_cast_fp16)[name = string("hidden_states_87_cast_fp16")]; tensor x_259_cast_fp16 = add(x = x_251_cast_fp16, y = hidden_states_87_cast_fp16)[name = string("x_259_cast_fp16")]; int32 var_4330 = const()[name = string("op_4330"), val = int32(1)]; fp16 const_153_promoted_to_fp16 = const()[name = string("const_153_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_4333_cast_fp16 = mul(x = x_259_cast_fp16, y = const_153_promoted_to_fp16)[name = string("op_4333_cast_fp16")]; bool x_261_interleave_0 = const()[name = string("x_261_interleave_0"), val = bool(false)]; tensor x_261_cast_fp16 = concat(axis = var_4330, interleave = x_261_interleave_0, values = (x_259_cast_fp16, var_4333_cast_fp16))[name = string("x_261_cast_fp16")]; tensor out_175_axes_0 = const()[name = string("out_175_axes_0"), val = tensor([1])]; fp16 var_4343_to_fp16 = const()[name = string("op_4343_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_175_cast_fp16 = layer_norm(axes = out_175_axes_0, epsilon = var_4343_to_fp16, x = x_261_cast_fp16)[name = string("out_175_cast_fp16")]; tensor layer_layers_14_post_attention_layernorm_weight_to_fp16 = const()[name = string("layer_layers_14_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(431628800)))]; tensor out_177_cast_fp16 = mul(x = out_175_cast_fp16, y = layer_layers_14_post_attention_layernorm_weight_to_fp16)[name = string("out_177_cast_fp16")]; tensor var_4349_split_sizes_0 = const()[name = string("op_4349_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_4349_axis_0 = const()[name = string("op_4349_axis_0"), val = int32(1)]; tensor var_4349_cast_fp16_0, tensor var_4349_cast_fp16_1 = split(axis = var_4349_axis_0, split_sizes = var_4349_split_sizes_0, x = out_177_cast_fp16)[name = string("op_4349_cast_fp16")]; string input_29_pad_type_0 = const()[name = string("input_29_pad_type_0"), val = string("valid")]; tensor input_29_strides_0 = const()[name = string("input_29_strides_0"), val = tensor([1, 1])]; tensor input_29_pad_0 = const()[name = string("input_29_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_29_dilations_0 = const()[name = string("input_29_dilations_0"), val = tensor([1, 1])]; int32 input_29_groups_0 = const()[name = string("input_29_groups_0"), val = int32(1)]; tensor var_4354_to_fp16 = const()[name = string("op_4354_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(431632960)))]; tensor input_29_cast_fp16 = conv(dilations = input_29_dilations_0, groups = input_29_groups_0, pad = input_29_pad_0, pad_type = input_29_pad_type_0, strides = input_29_strides_0, weight = var_4354_to_fp16, x = var_4349_cast_fp16_0)[name = string("input_29_cast_fp16")]; tensor var_4365_cast_fp16 = silu(x = input_29_cast_fp16)[name = string("op_4365_cast_fp16")]; string var_4370_pad_type_0 = const()[name = string("op_4370_pad_type_0"), val = string("valid")]; tensor var_4370_strides_0 = const()[name = string("op_4370_strides_0"), val = tensor([1, 1])]; tensor var_4370_pad_0 = const()[name = string("op_4370_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_4370_dilations_0 = const()[name = string("op_4370_dilations_0"), val = tensor([1, 1])]; int32 var_4370_groups_0 = const()[name = string("op_4370_groups_0"), val = int32(1)]; tensor var_4353_to_fp16 = const()[name = string("op_4353_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(440021632)))]; tensor var_4370_cast_fp16 = conv(dilations = var_4370_dilations_0, groups = var_4370_groups_0, pad = var_4370_pad_0, pad_type = var_4370_pad_type_0, strides = var_4370_strides_0, weight = var_4353_to_fp16, x = var_4349_cast_fp16_0)[name = string("op_4370_cast_fp16")]; tensor x_267_cast_fp16 = mul(x = var_4365_cast_fp16, y = var_4370_cast_fp16)[name = string("x_267_cast_fp16")]; string hidden_states_89_pad_type_0 = const()[name = string("hidden_states_89_pad_type_0"), val = string("valid")]; tensor hidden_states_89_strides_0 = const()[name = string("hidden_states_89_strides_0"), val = tensor([1, 1])]; tensor hidden_states_89_pad_0 = const()[name = string("hidden_states_89_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_89_dilations_0 = const()[name = string("hidden_states_89_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_89_groups_0 = const()[name = string("hidden_states_89_groups_0"), val = int32(1)]; tensor var_4352_to_fp16 = const()[name = string("op_4352_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(448410304)))]; tensor hidden_states_89_cast_fp16 = conv(dilations = hidden_states_89_dilations_0, groups = hidden_states_89_groups_0, pad = hidden_states_89_pad_0, pad_type = hidden_states_89_pad_type_0, strides = hidden_states_89_strides_0, weight = var_4352_to_fp16, x = x_267_cast_fp16)[name = string("hidden_states_89_cast_fp16")]; tensor x_269_cast_fp16 = add(x = x_259_cast_fp16, y = hidden_states_89_cast_fp16)[name = string("x_269_cast_fp16")]; int32 var_4383 = const()[name = string("op_4383"), val = int32(1)]; fp16 const_154_promoted_to_fp16 = const()[name = string("const_154_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_4386_cast_fp16 = mul(x = x_269_cast_fp16, y = const_154_promoted_to_fp16)[name = string("op_4386_cast_fp16")]; bool x_271_interleave_0 = const()[name = string("x_271_interleave_0"), val = bool(false)]; tensor x_271_cast_fp16 = concat(axis = var_4383, interleave = x_271_interleave_0, values = (x_269_cast_fp16, var_4386_cast_fp16))[name = string("x_271_cast_fp16")]; tensor out_181_axes_0 = const()[name = string("out_181_axes_0"), val = tensor([1])]; fp16 var_4396_to_fp16 = const()[name = string("op_4396_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_181_cast_fp16 = layer_norm(axes = out_181_axes_0, epsilon = var_4396_to_fp16, x = x_271_cast_fp16)[name = string("out_181_cast_fp16")]; tensor layer_layers_15_input_layernorm_weight_to_fp16 = const()[name = string("layer_layers_15_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(456798976)))]; tensor out_183_cast_fp16 = mul(x = out_181_cast_fp16, y = layer_layers_15_input_layernorm_weight_to_fp16)[name = string("out_183_cast_fp16")]; tensor var_4402_split_sizes_0 = const()[name = string("op_4402_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_4402_axis_0 = const()[name = string("op_4402_axis_0"), val = int32(1)]; tensor var_4402_cast_fp16_0, tensor var_4402_cast_fp16_1 = split(axis = var_4402_axis_0, split_sizes = var_4402_split_sizes_0, x = out_183_cast_fp16)[name = string("op_4402_cast_fp16")]; string query_states_61_pad_type_0 = const()[name = string("query_states_61_pad_type_0"), val = string("valid")]; tensor query_states_61_strides_0 = const()[name = string("query_states_61_strides_0"), val = tensor([1, 1])]; tensor query_states_61_pad_0 = const()[name = string("query_states_61_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_states_61_dilations_0 = const()[name = string("query_states_61_dilations_0"), val = tensor([1, 1])]; int32 query_states_61_groups_0 = const()[name = string("query_states_61_groups_0"), val = int32(1)]; tensor var_4424_to_fp16 = const()[name = string("op_4424_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(456803136)))]; tensor query_states_61_cast_fp16 = conv(dilations = query_states_61_dilations_0, groups = query_states_61_groups_0, pad = query_states_61_pad_0, pad_type = query_states_61_pad_type_0, strides = query_states_61_strides_0, weight = var_4424_to_fp16, x = var_4402_cast_fp16_0)[name = string("query_states_61_cast_fp16")]; string key_states_61_pad_type_0 = const()[name = string("key_states_61_pad_type_0"), val = string("valid")]; tensor key_states_61_strides_0 = const()[name = string("key_states_61_strides_0"), val = tensor([1, 1])]; tensor key_states_61_pad_0 = const()[name = string("key_states_61_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_states_61_dilations_0 = const()[name = string("key_states_61_dilations_0"), val = tensor([1, 1])]; int32 key_states_61_groups_0 = const()[name = string("key_states_61_groups_0"), val = int32(1)]; tensor var_4435_to_fp16 = const()[name = string("op_4435_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(458900352)))]; tensor key_states_61_cast_fp16 = conv(dilations = key_states_61_dilations_0, groups = key_states_61_groups_0, pad = key_states_61_pad_0, pad_type = key_states_61_pad_type_0, strides = key_states_61_strides_0, weight = var_4435_to_fp16, x = var_4402_cast_fp16_0)[name = string("key_states_61_cast_fp16")]; string value_states_61_pad_type_0 = const()[name = string("value_states_61_pad_type_0"), val = string("valid")]; tensor value_states_61_strides_0 = const()[name = string("value_states_61_strides_0"), val = tensor([1, 1])]; tensor value_states_61_pad_0 = const()[name = string("value_states_61_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_states_61_dilations_0 = const()[name = string("value_states_61_dilations_0"), val = tensor([1, 1])]; int32 value_states_61_groups_0 = const()[name = string("value_states_61_groups_0"), val = int32(1)]; tensor var_4446_to_fp16 = const()[name = string("op_4446_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(459162560)))]; tensor value_states_61_cast_fp16 = conv(dilations = value_states_61_dilations_0, groups = value_states_61_groups_0, pad = value_states_61_pad_0, pad_type = value_states_61_pad_type_0, strides = value_states_61_strides_0, weight = var_4446_to_fp16, x = var_4402_cast_fp16_0)[name = string("value_states_61_cast_fp16")]; tensor var_4454 = const()[name = string("op_4454"), val = tensor([1, 16, 64, 16])]; tensor embed_61_cast_fp16 = reshape(shape = var_4454, x = query_states_61_cast_fp16)[name = string("embed_61_cast_fp16")]; tensor var_4458 = const()[name = string("op_4458"), val = tensor([1, 2, 64, 16])]; tensor var_4459_cast_fp16 = reshape(shape = var_4458, x = key_states_61_cast_fp16)[name = string("op_4459_cast_fp16")]; tensor embed_63_perm_0 = const()[name = string("embed_63_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_4465 = const()[name = string("op_4465"), val = tensor([1, 2, 64, 16])]; tensor var_4466_cast_fp16 = reshape(shape = var_4465, x = value_states_61_cast_fp16)[name = string("op_4466_cast_fp16")]; tensor value_states_63_perm_0 = const()[name = string("value_states_63_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_4470_cast_fp16 = mul(x = embed_61_cast_fp16, y = cos_1_cast_fp16)[name = string("op_4470_cast_fp16")]; tensor var_4471_split_sizes_0 = const()[name = string("op_4471_split_sizes_0"), val = tensor([32, 32])]; int32 var_4471_axis_0 = const()[name = string("op_4471_axis_0"), val = int32(-2)]; tensor var_4471_cast_fp16_0, tensor var_4471_cast_fp16_1 = split(axis = var_4471_axis_0, split_sizes = var_4471_split_sizes_0, x = embed_61_cast_fp16)[name = string("op_4471_cast_fp16")]; fp16 const_157_promoted_to_fp16 = const()[name = string("const_157_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_4473_cast_fp16 = mul(x = var_4471_cast_fp16_1, y = const_157_promoted_to_fp16)[name = string("op_4473_cast_fp16")]; int32 var_4475 = const()[name = string("op_4475"), val = int32(-2)]; bool var_4476_interleave_0 = const()[name = string("op_4476_interleave_0"), val = bool(false)]; tensor var_4476_cast_fp16 = concat(axis = var_4475, interleave = var_4476_interleave_0, values = (var_4473_cast_fp16, var_4471_cast_fp16_0))[name = string("op_4476_cast_fp16")]; tensor var_4477_cast_fp16 = mul(x = var_4476_cast_fp16, y = sin_1_cast_fp16)[name = string("op_4477_cast_fp16")]; tensor query_states_63_cast_fp16 = add(x = var_4470_cast_fp16, y = var_4477_cast_fp16)[name = string("query_states_63_cast_fp16")]; tensor embed_63_cast_fp16 = transpose(perm = embed_63_perm_0, x = var_4459_cast_fp16)[name = string("transpose_26")]; tensor var_4480_cast_fp16 = mul(x = embed_63_cast_fp16, y = cos_cast_fp16)[name = string("op_4480_cast_fp16")]; tensor var_4481_split_sizes_0 = const()[name = string("op_4481_split_sizes_0"), val = tensor([32, 32])]; int32 var_4481_axis_0 = const()[name = string("op_4481_axis_0"), val = int32(-1)]; tensor var_4481_cast_fp16_0, tensor var_4481_cast_fp16_1 = split(axis = var_4481_axis_0, split_sizes = var_4481_split_sizes_0, x = embed_63_cast_fp16)[name = string("op_4481_cast_fp16")]; fp16 const_158_promoted_to_fp16 = const()[name = string("const_158_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_4483_cast_fp16 = mul(x = var_4481_cast_fp16_1, y = const_158_promoted_to_fp16)[name = string("op_4483_cast_fp16")]; int32 var_4485 = const()[name = string("op_4485"), val = int32(-1)]; bool var_4486_interleave_0 = const()[name = string("op_4486_interleave_0"), val = bool(false)]; tensor var_4486_cast_fp16 = concat(axis = var_4485, interleave = var_4486_interleave_0, values = (var_4483_cast_fp16, var_4481_cast_fp16_0))[name = string("op_4486_cast_fp16")]; tensor var_4487_cast_fp16 = mul(x = var_4486_cast_fp16, y = sin_cast_fp16)[name = string("op_4487_cast_fp16")]; tensor key_states_63_cast_fp16 = add(x = var_4480_cast_fp16, y = var_4487_cast_fp16)[name = string("key_states_63_cast_fp16")]; tensor expand_dims_151 = const()[name = string("expand_dims_151"), val = tensor([15])]; tensor expand_dims_152 = const()[name = string("expand_dims_152"), val = tensor([0])]; tensor expand_dims_154 = const()[name = string("expand_dims_154"), val = tensor([16])]; tensor concat_123_values3_0 = const()[name = string("concat_123_values3_0"), val = tensor([0])]; int32 concat_123_axis_0 = const()[name = string("concat_123_axis_0"), val = int32(0)]; bool concat_123_interleave_0 = const()[name = string("concat_123_interleave_0"), val = bool(false)]; tensor concat_123 = concat(axis = concat_123_axis_0, interleave = concat_123_interleave_0, values = (expand_dims_151, expand_dims_152, position_id, concat_123_values3_0))[name = string("concat_123")]; tensor concat_124_values1_0 = const()[name = string("concat_124_values1_0"), val = tensor([0])]; tensor concat_124_values3_0 = const()[name = string("concat_124_values3_0"), val = tensor([0])]; int32 concat_124_axis_0 = const()[name = string("concat_124_axis_0"), val = int32(0)]; bool concat_124_interleave_0 = const()[name = string("concat_124_interleave_0"), val = bool(false)]; tensor concat_124 = concat(axis = concat_124_axis_0, interleave = concat_124_interleave_0, values = (expand_dims_154, concat_124_values1_0, var_426, concat_124_values3_0))[name = string("concat_124")]; tensor key_cache_internal_tensor_assign_16_stride_0 = const()[name = string("key_cache_internal_tensor_assign_16_stride_0"), val = tensor([1, 1, 1, 1])]; tensor key_cache_internal_tensor_assign_16_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_16_begin_mask_0"), val = tensor([false, false, false, true])]; tensor key_cache_internal_tensor_assign_16_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_16_end_mask_0"), val = tensor([false, true, false, true])]; tensor key_cache_internal_tensor_assign_16_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_16_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor key_cache_internal_tensor_assign_16_cast_fp16 = slice_update(begin = concat_123, begin_mask = key_cache_internal_tensor_assign_16_begin_mask_0, end = concat_124, end_mask = key_cache_internal_tensor_assign_16_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_16_squeeze_mask_0, stride = key_cache_internal_tensor_assign_16_stride_0, update = key_states_63_cast_fp16, x = coreml_update_state_76)[name = string("key_cache_internal_tensor_assign_16_cast_fp16")]; write_state(data = key_cache_internal_tensor_assign_16_cast_fp16, input = key_cache)[name = string("coreml_update_state_292_write_state")]; tensor coreml_update_state_78 = read_state(input = key_cache)[name = string("coreml_update_state_292")]; tensor value_cache_internal_tensor_assign_16_stride_0 = const()[name = string("value_cache_internal_tensor_assign_16_stride_0"), val = tensor([1, 1, 1, 1])]; tensor value_cache_internal_tensor_assign_16_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_16_begin_mask_0"), val = tensor([false, false, false, true])]; tensor value_cache_internal_tensor_assign_16_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_16_end_mask_0"), val = tensor([false, true, false, true])]; tensor value_cache_internal_tensor_assign_16_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_16_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor value_states_63_cast_fp16 = transpose(perm = value_states_63_perm_0, x = var_4466_cast_fp16)[name = string("transpose_25")]; tensor value_cache_internal_tensor_assign_16_cast_fp16 = slice_update(begin = concat_123, begin_mask = value_cache_internal_tensor_assign_16_begin_mask_0, end = concat_124, end_mask = value_cache_internal_tensor_assign_16_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_16_squeeze_mask_0, stride = value_cache_internal_tensor_assign_16_stride_0, update = value_states_63_cast_fp16, x = coreml_update_state_77)[name = string("value_cache_internal_tensor_assign_16_cast_fp16")]; write_state(data = value_cache_internal_tensor_assign_16_cast_fp16, input = value_cache)[name = string("coreml_update_state_293_write_state")]; tensor coreml_update_state_79 = read_state(input = value_cache)[name = string("coreml_update_state_293")]; tensor var_4530_begin_0 = const()[name = string("op_4530_begin_0"), val = tensor([15, 0, 0, 0])]; tensor var_4530_end_0 = const()[name = string("op_4530_end_0"), val = tensor([16, 2, 2048, 64])]; tensor var_4530_end_mask_0 = const()[name = string("op_4530_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_4530_cast_fp16 = slice_by_index(begin = var_4530_begin_0, end = var_4530_end_0, end_mask = var_4530_end_mask_0, x = coreml_update_state_78)[name = string("op_4530_cast_fp16")]; tensor tile_30 = const()[name = string("tile_30"), val = tensor([1, 1])]; int32 var_4533_axis_0 = const()[name = string("op_4533_axis_0"), val = int32(1)]; tensor var_4533_cast_fp16_0, tensor var_4533_cast_fp16_1 = split(axis = var_4533_axis_0, split_sizes = tile_30, x = var_4530_cast_fp16)[name = string("op_4533_cast_fp16")]; tensor var_4540_begin_0 = const()[name = string("op_4540_begin_0"), val = tensor([15, 0, 0, 0])]; tensor var_4540_end_0 = const()[name = string("op_4540_end_0"), val = tensor([16, 2, 2048, 64])]; tensor var_4540_end_mask_0 = const()[name = string("op_4540_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_4540_cast_fp16 = slice_by_index(begin = var_4540_begin_0, end = var_4540_end_0, end_mask = var_4540_end_mask_0, x = coreml_update_state_79)[name = string("op_4540_cast_fp16")]; tensor tile_31 = const()[name = string("tile_31"), val = tensor([1, 1])]; int32 var_4543_axis_0 = const()[name = string("op_4543_axis_0"), val = int32(1)]; tensor var_4543_cast_fp16_0, tensor var_4543_cast_fp16_1 = split(axis = var_4543_axis_0, split_sizes = tile_31, x = var_4540_cast_fp16)[name = string("op_4543_cast_fp16")]; tensor var_4546_split_sizes_0 = const()[name = string("op_4546_split_sizes_0"), val = tensor([8, 8])]; int32 var_4546_axis_0 = const()[name = string("op_4546_axis_0"), val = int32(1)]; tensor var_4546_cast_fp16_0, tensor var_4546_cast_fp16_1 = split(axis = var_4546_axis_0, split_sizes = var_4546_split_sizes_0, x = query_states_63_cast_fp16)[name = string("op_4546_cast_fp16")]; bool attn_weights_241_transpose_x_0 = const()[name = string("attn_weights_241_transpose_x_0"), val = bool(false)]; bool attn_weights_241_transpose_y_0 = const()[name = string("attn_weights_241_transpose_y_0"), val = bool(false)]; tensor attn_weights_241_cast_fp16 = matmul(transpose_x = attn_weights_241_transpose_x_0, transpose_y = attn_weights_241_transpose_y_0, x = var_4533_cast_fp16_0, y = var_4546_cast_fp16_0)[name = string("attn_weights_241_cast_fp16")]; fp16 _inversed_attn_weights_243_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_243_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_243_cast_fp16 = mul(x = attn_weights_241_cast_fp16, y = _inversed_attn_weights_243_y_0_to_fp16)[name = string("_inversed_attn_weights_243_cast_fp16")]; tensor attn_weights_245_cast_fp16 = add(x = _inversed_attn_weights_243_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_245_cast_fp16")]; int32 var_4553 = const()[name = string("op_4553"), val = int32(2)]; tensor attn_weights_247_cast_fp16 = softmax(axis = var_4553, x = attn_weights_245_cast_fp16)[name = string("attn_weights_247_cast_fp16")]; bool var_4559_transpose_x_1 = const()[name = string("op_4559_transpose_x_1"), val = bool(true)]; bool var_4559_transpose_y_1 = const()[name = string("op_4559_transpose_y_1"), val = bool(false)]; tensor var_4559_cast_fp16 = matmul(transpose_x = var_4559_transpose_x_1, transpose_y = var_4559_transpose_y_1, x = attn_weights_247_cast_fp16, y = var_4543_cast_fp16_0)[name = string("op_4559_cast_fp16")]; bool attn_weights_249_transpose_x_0 = const()[name = string("attn_weights_249_transpose_x_0"), val = bool(false)]; bool attn_weights_249_transpose_y_0 = const()[name = string("attn_weights_249_transpose_y_0"), val = bool(false)]; tensor attn_weights_249_cast_fp16 = matmul(transpose_x = attn_weights_249_transpose_x_0, transpose_y = attn_weights_249_transpose_y_0, x = var_4533_cast_fp16_1, y = var_4546_cast_fp16_1)[name = string("attn_weights_249_cast_fp16")]; fp16 _inversed_attn_weights_251_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_251_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_251_cast_fp16 = mul(x = attn_weights_249_cast_fp16, y = _inversed_attn_weights_251_y_0_to_fp16)[name = string("_inversed_attn_weights_251_cast_fp16")]; tensor attn_weights_253_cast_fp16 = add(x = _inversed_attn_weights_251_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_253_cast_fp16")]; int32 var_4565 = const()[name = string("op_4565"), val = int32(2)]; tensor attn_weights_255_cast_fp16 = softmax(axis = var_4565, x = attn_weights_253_cast_fp16)[name = string("attn_weights_255_cast_fp16")]; bool attn_output_91_transpose_x_1 = const()[name = string("attn_output_91_transpose_x_1"), val = bool(true)]; bool attn_output_91_transpose_y_1 = const()[name = string("attn_output_91_transpose_y_1"), val = bool(false)]; tensor attn_output_91_cast_fp16 = matmul(transpose_x = attn_output_91_transpose_x_1, transpose_y = attn_output_91_transpose_y_1, x = attn_weights_255_cast_fp16, y = var_4543_cast_fp16_1)[name = string("attn_output_91_cast_fp16")]; int32 var_4573 = const()[name = string("op_4573"), val = int32(1)]; bool attn_output_93_interleave_0 = const()[name = string("attn_output_93_interleave_0"), val = bool(false)]; tensor attn_output_93_cast_fp16 = concat(axis = var_4573, interleave = attn_output_93_interleave_0, values = (var_4559_cast_fp16, attn_output_91_cast_fp16))[name = string("attn_output_93_cast_fp16")]; tensor var_4577_perm_0 = const()[name = string("op_4577_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_4582 = const()[name = string("op_4582"), val = tensor([1, 1024, 1, 16])]; tensor var_4577_cast_fp16 = transpose(perm = var_4577_perm_0, x = attn_output_93_cast_fp16)[name = string("transpose_24")]; tensor x_275_cast_fp16 = reshape(shape = var_4582, x = var_4577_cast_fp16)[name = string("x_275_cast_fp16")]; string hidden_states_93_pad_type_0 = const()[name = string("hidden_states_93_pad_type_0"), val = string("valid")]; tensor hidden_states_93_strides_0 = const()[name = string("hidden_states_93_strides_0"), val = tensor([1, 1])]; tensor hidden_states_93_pad_0 = const()[name = string("hidden_states_93_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_93_dilations_0 = const()[name = string("hidden_states_93_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_93_groups_0 = const()[name = string("hidden_states_93_groups_0"), val = int32(1)]; tensor var_4589_to_fp16 = const()[name = string("op_4589_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(459424768)))]; tensor hidden_states_93_cast_fp16 = conv(dilations = hidden_states_93_dilations_0, groups = hidden_states_93_groups_0, pad = hidden_states_93_pad_0, pad_type = hidden_states_93_pad_type_0, strides = hidden_states_93_strides_0, weight = var_4589_to_fp16, x = x_275_cast_fp16)[name = string("hidden_states_93_cast_fp16")]; tensor x_277_cast_fp16 = add(x = x_269_cast_fp16, y = hidden_states_93_cast_fp16)[name = string("x_277_cast_fp16")]; int32 var_4601 = const()[name = string("op_4601"), val = int32(1)]; fp16 const_163_promoted_to_fp16 = const()[name = string("const_163_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_4604_cast_fp16 = mul(x = x_277_cast_fp16, y = const_163_promoted_to_fp16)[name = string("op_4604_cast_fp16")]; bool x_279_interleave_0 = const()[name = string("x_279_interleave_0"), val = bool(false)]; tensor x_279_cast_fp16 = concat(axis = var_4601, interleave = x_279_interleave_0, values = (x_277_cast_fp16, var_4604_cast_fp16))[name = string("x_279_cast_fp16")]; tensor out_187_axes_0 = const()[name = string("out_187_axes_0"), val = tensor([1])]; fp16 var_4614_to_fp16 = const()[name = string("op_4614_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_187_cast_fp16 = layer_norm(axes = out_187_axes_0, epsilon = var_4614_to_fp16, x = x_279_cast_fp16)[name = string("out_187_cast_fp16")]; tensor layer_layers_15_post_attention_layernorm_weight_to_fp16 = const()[name = string("layer_layers_15_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(461521984)))]; tensor out_189_cast_fp16 = mul(x = out_187_cast_fp16, y = layer_layers_15_post_attention_layernorm_weight_to_fp16)[name = string("out_189_cast_fp16")]; tensor var_4620_split_sizes_0 = const()[name = string("op_4620_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_4620_axis_0 = const()[name = string("op_4620_axis_0"), val = int32(1)]; tensor var_4620_cast_fp16_0, tensor var_4620_cast_fp16_1 = split(axis = var_4620_axis_0, split_sizes = var_4620_split_sizes_0, x = out_189_cast_fp16)[name = string("op_4620_cast_fp16")]; string input_31_pad_type_0 = const()[name = string("input_31_pad_type_0"), val = string("valid")]; tensor input_31_strides_0 = const()[name = string("input_31_strides_0"), val = tensor([1, 1])]; tensor input_31_pad_0 = const()[name = string("input_31_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_31_dilations_0 = const()[name = string("input_31_dilations_0"), val = tensor([1, 1])]; int32 input_31_groups_0 = const()[name = string("input_31_groups_0"), val = int32(1)]; tensor var_4625_to_fp16 = const()[name = string("op_4625_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(461526144)))]; tensor input_31_cast_fp16 = conv(dilations = input_31_dilations_0, groups = input_31_groups_0, pad = input_31_pad_0, pad_type = input_31_pad_type_0, strides = input_31_strides_0, weight = var_4625_to_fp16, x = var_4620_cast_fp16_0)[name = string("input_31_cast_fp16")]; tensor var_4636_cast_fp16 = silu(x = input_31_cast_fp16)[name = string("op_4636_cast_fp16")]; string var_4641_pad_type_0 = const()[name = string("op_4641_pad_type_0"), val = string("valid")]; tensor var_4641_strides_0 = const()[name = string("op_4641_strides_0"), val = tensor([1, 1])]; tensor var_4641_pad_0 = const()[name = string("op_4641_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_4641_dilations_0 = const()[name = string("op_4641_dilations_0"), val = tensor([1, 1])]; int32 var_4641_groups_0 = const()[name = string("op_4641_groups_0"), val = int32(1)]; tensor var_4624_to_fp16 = const()[name = string("op_4624_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(469914816)))]; tensor var_4641_cast_fp16 = conv(dilations = var_4641_dilations_0, groups = var_4641_groups_0, pad = var_4641_pad_0, pad_type = var_4641_pad_type_0, strides = var_4641_strides_0, weight = var_4624_to_fp16, x = var_4620_cast_fp16_0)[name = string("op_4641_cast_fp16")]; tensor x_285_cast_fp16 = mul(x = var_4636_cast_fp16, y = var_4641_cast_fp16)[name = string("x_285_cast_fp16")]; string hidden_states_95_pad_type_0 = const()[name = string("hidden_states_95_pad_type_0"), val = string("valid")]; tensor hidden_states_95_strides_0 = const()[name = string("hidden_states_95_strides_0"), val = tensor([1, 1])]; tensor hidden_states_95_pad_0 = const()[name = string("hidden_states_95_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_95_dilations_0 = const()[name = string("hidden_states_95_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_95_groups_0 = const()[name = string("hidden_states_95_groups_0"), val = int32(1)]; tensor var_4623_to_fp16 = const()[name = string("op_4623_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(478303488)))]; tensor hidden_states_95_cast_fp16 = conv(dilations = hidden_states_95_dilations_0, groups = hidden_states_95_groups_0, pad = hidden_states_95_pad_0, pad_type = hidden_states_95_pad_type_0, strides = hidden_states_95_strides_0, weight = var_4623_to_fp16, x = x_285_cast_fp16)[name = string("hidden_states_95_cast_fp16")]; tensor x_287_cast_fp16 = add(x = x_277_cast_fp16, y = hidden_states_95_cast_fp16)[name = string("x_287_cast_fp16")]; int32 var_4654 = const()[name = string("op_4654"), val = int32(1)]; fp16 const_164_promoted_to_fp16 = const()[name = string("const_164_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_4657_cast_fp16 = mul(x = x_287_cast_fp16, y = const_164_promoted_to_fp16)[name = string("op_4657_cast_fp16")]; bool x_289_interleave_0 = const()[name = string("x_289_interleave_0"), val = bool(false)]; tensor x_289_cast_fp16 = concat(axis = var_4654, interleave = x_289_interleave_0, values = (x_287_cast_fp16, var_4657_cast_fp16))[name = string("x_289_cast_fp16")]; tensor out_193_axes_0 = const()[name = string("out_193_axes_0"), val = tensor([1])]; fp16 var_4667_to_fp16 = const()[name = string("op_4667_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_193_cast_fp16 = layer_norm(axes = out_193_axes_0, epsilon = var_4667_to_fp16, x = x_289_cast_fp16)[name = string("out_193_cast_fp16")]; tensor layer_layers_16_input_layernorm_weight_to_fp16 = const()[name = string("layer_layers_16_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(486692160)))]; tensor out_195_cast_fp16 = mul(x = out_193_cast_fp16, y = layer_layers_16_input_layernorm_weight_to_fp16)[name = string("out_195_cast_fp16")]; tensor var_4673_split_sizes_0 = const()[name = string("op_4673_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_4673_axis_0 = const()[name = string("op_4673_axis_0"), val = int32(1)]; tensor var_4673_cast_fp16_0, tensor var_4673_cast_fp16_1 = split(axis = var_4673_axis_0, split_sizes = var_4673_split_sizes_0, x = out_195_cast_fp16)[name = string("op_4673_cast_fp16")]; string query_states_65_pad_type_0 = const()[name = string("query_states_65_pad_type_0"), val = string("valid")]; tensor query_states_65_strides_0 = const()[name = string("query_states_65_strides_0"), val = tensor([1, 1])]; tensor query_states_65_pad_0 = const()[name = string("query_states_65_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_states_65_dilations_0 = const()[name = string("query_states_65_dilations_0"), val = tensor([1, 1])]; int32 query_states_65_groups_0 = const()[name = string("query_states_65_groups_0"), val = int32(1)]; tensor var_4695_to_fp16 = const()[name = string("op_4695_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(486696320)))]; tensor query_states_65_cast_fp16 = conv(dilations = query_states_65_dilations_0, groups = query_states_65_groups_0, pad = query_states_65_pad_0, pad_type = query_states_65_pad_type_0, strides = query_states_65_strides_0, weight = var_4695_to_fp16, x = var_4673_cast_fp16_0)[name = string("query_states_65_cast_fp16")]; string key_states_65_pad_type_0 = const()[name = string("key_states_65_pad_type_0"), val = string("valid")]; tensor key_states_65_strides_0 = const()[name = string("key_states_65_strides_0"), val = tensor([1, 1])]; tensor key_states_65_pad_0 = const()[name = string("key_states_65_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_states_65_dilations_0 = const()[name = string("key_states_65_dilations_0"), val = tensor([1, 1])]; int32 key_states_65_groups_0 = const()[name = string("key_states_65_groups_0"), val = int32(1)]; tensor var_4706_to_fp16 = const()[name = string("op_4706_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(488793536)))]; tensor key_states_65_cast_fp16 = conv(dilations = key_states_65_dilations_0, groups = key_states_65_groups_0, pad = key_states_65_pad_0, pad_type = key_states_65_pad_type_0, strides = key_states_65_strides_0, weight = var_4706_to_fp16, x = var_4673_cast_fp16_0)[name = string("key_states_65_cast_fp16")]; string value_states_65_pad_type_0 = const()[name = string("value_states_65_pad_type_0"), val = string("valid")]; tensor value_states_65_strides_0 = const()[name = string("value_states_65_strides_0"), val = tensor([1, 1])]; tensor value_states_65_pad_0 = const()[name = string("value_states_65_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_states_65_dilations_0 = const()[name = string("value_states_65_dilations_0"), val = tensor([1, 1])]; int32 value_states_65_groups_0 = const()[name = string("value_states_65_groups_0"), val = int32(1)]; tensor var_4717_to_fp16 = const()[name = string("op_4717_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(489055744)))]; tensor value_states_65_cast_fp16 = conv(dilations = value_states_65_dilations_0, groups = value_states_65_groups_0, pad = value_states_65_pad_0, pad_type = value_states_65_pad_type_0, strides = value_states_65_strides_0, weight = var_4717_to_fp16, x = var_4673_cast_fp16_0)[name = string("value_states_65_cast_fp16")]; tensor var_4725 = const()[name = string("op_4725"), val = tensor([1, 16, 64, 16])]; tensor embed_65_cast_fp16 = reshape(shape = var_4725, x = query_states_65_cast_fp16)[name = string("embed_65_cast_fp16")]; tensor var_4729 = const()[name = string("op_4729"), val = tensor([1, 2, 64, 16])]; tensor var_4730_cast_fp16 = reshape(shape = var_4729, x = key_states_65_cast_fp16)[name = string("op_4730_cast_fp16")]; tensor embed_67_perm_0 = const()[name = string("embed_67_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_4736 = const()[name = string("op_4736"), val = tensor([1, 2, 64, 16])]; tensor var_4737_cast_fp16 = reshape(shape = var_4736, x = value_states_65_cast_fp16)[name = string("op_4737_cast_fp16")]; tensor value_states_67_perm_0 = const()[name = string("value_states_67_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_4741_cast_fp16 = mul(x = embed_65_cast_fp16, y = cos_1_cast_fp16)[name = string("op_4741_cast_fp16")]; tensor var_4742_split_sizes_0 = const()[name = string("op_4742_split_sizes_0"), val = tensor([32, 32])]; int32 var_4742_axis_0 = const()[name = string("op_4742_axis_0"), val = int32(-2)]; tensor var_4742_cast_fp16_0, tensor var_4742_cast_fp16_1 = split(axis = var_4742_axis_0, split_sizes = var_4742_split_sizes_0, x = embed_65_cast_fp16)[name = string("op_4742_cast_fp16")]; fp16 const_167_promoted_to_fp16 = const()[name = string("const_167_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_4744_cast_fp16 = mul(x = var_4742_cast_fp16_1, y = const_167_promoted_to_fp16)[name = string("op_4744_cast_fp16")]; int32 var_4746 = const()[name = string("op_4746"), val = int32(-2)]; bool var_4747_interleave_0 = const()[name = string("op_4747_interleave_0"), val = bool(false)]; tensor var_4747_cast_fp16 = concat(axis = var_4746, interleave = var_4747_interleave_0, values = (var_4744_cast_fp16, var_4742_cast_fp16_0))[name = string("op_4747_cast_fp16")]; tensor var_4748_cast_fp16 = mul(x = var_4747_cast_fp16, y = sin_1_cast_fp16)[name = string("op_4748_cast_fp16")]; tensor query_states_67_cast_fp16 = add(x = var_4741_cast_fp16, y = var_4748_cast_fp16)[name = string("query_states_67_cast_fp16")]; tensor embed_67_cast_fp16 = transpose(perm = embed_67_perm_0, x = var_4730_cast_fp16)[name = string("transpose_23")]; tensor var_4751_cast_fp16 = mul(x = embed_67_cast_fp16, y = cos_cast_fp16)[name = string("op_4751_cast_fp16")]; tensor var_4752_split_sizes_0 = const()[name = string("op_4752_split_sizes_0"), val = tensor([32, 32])]; int32 var_4752_axis_0 = const()[name = string("op_4752_axis_0"), val = int32(-1)]; tensor var_4752_cast_fp16_0, tensor var_4752_cast_fp16_1 = split(axis = var_4752_axis_0, split_sizes = var_4752_split_sizes_0, x = embed_67_cast_fp16)[name = string("op_4752_cast_fp16")]; fp16 const_168_promoted_to_fp16 = const()[name = string("const_168_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_4754_cast_fp16 = mul(x = var_4752_cast_fp16_1, y = const_168_promoted_to_fp16)[name = string("op_4754_cast_fp16")]; int32 var_4756 = const()[name = string("op_4756"), val = int32(-1)]; bool var_4757_interleave_0 = const()[name = string("op_4757_interleave_0"), val = bool(false)]; tensor var_4757_cast_fp16 = concat(axis = var_4756, interleave = var_4757_interleave_0, values = (var_4754_cast_fp16, var_4752_cast_fp16_0))[name = string("op_4757_cast_fp16")]; tensor var_4758_cast_fp16 = mul(x = var_4757_cast_fp16, y = sin_cast_fp16)[name = string("op_4758_cast_fp16")]; tensor key_states_67_cast_fp16 = add(x = var_4751_cast_fp16, y = var_4758_cast_fp16)[name = string("key_states_67_cast_fp16")]; tensor expand_dims_161 = const()[name = string("expand_dims_161"), val = tensor([16])]; tensor expand_dims_162 = const()[name = string("expand_dims_162"), val = tensor([0])]; tensor expand_dims_164 = const()[name = string("expand_dims_164"), val = tensor([17])]; tensor concat_131_values3_0 = const()[name = string("concat_131_values3_0"), val = tensor([0])]; int32 concat_131_axis_0 = const()[name = string("concat_131_axis_0"), val = int32(0)]; bool concat_131_interleave_0 = const()[name = string("concat_131_interleave_0"), val = bool(false)]; tensor concat_131 = concat(axis = concat_131_axis_0, interleave = concat_131_interleave_0, values = (expand_dims_161, expand_dims_162, position_id, concat_131_values3_0))[name = string("concat_131")]; tensor concat_132_values1_0 = const()[name = string("concat_132_values1_0"), val = tensor([0])]; tensor concat_132_values3_0 = const()[name = string("concat_132_values3_0"), val = tensor([0])]; int32 concat_132_axis_0 = const()[name = string("concat_132_axis_0"), val = int32(0)]; bool concat_132_interleave_0 = const()[name = string("concat_132_interleave_0"), val = bool(false)]; tensor concat_132 = concat(axis = concat_132_axis_0, interleave = concat_132_interleave_0, values = (expand_dims_164, concat_132_values1_0, var_426, concat_132_values3_0))[name = string("concat_132")]; tensor key_cache_internal_tensor_assign_17_stride_0 = const()[name = string("key_cache_internal_tensor_assign_17_stride_0"), val = tensor([1, 1, 1, 1])]; tensor key_cache_internal_tensor_assign_17_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_17_begin_mask_0"), val = tensor([false, false, false, true])]; tensor key_cache_internal_tensor_assign_17_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_17_end_mask_0"), val = tensor([false, true, false, true])]; tensor key_cache_internal_tensor_assign_17_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_17_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor key_cache_internal_tensor_assign_17_cast_fp16 = slice_update(begin = concat_131, begin_mask = key_cache_internal_tensor_assign_17_begin_mask_0, end = concat_132, end_mask = key_cache_internal_tensor_assign_17_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_17_squeeze_mask_0, stride = key_cache_internal_tensor_assign_17_stride_0, update = key_states_67_cast_fp16, x = coreml_update_state_78)[name = string("key_cache_internal_tensor_assign_17_cast_fp16")]; write_state(data = key_cache_internal_tensor_assign_17_cast_fp16, input = key_cache)[name = string("coreml_update_state_294_write_state")]; tensor coreml_update_state_80 = read_state(input = key_cache)[name = string("coreml_update_state_294")]; tensor value_cache_internal_tensor_assign_17_stride_0 = const()[name = string("value_cache_internal_tensor_assign_17_stride_0"), val = tensor([1, 1, 1, 1])]; tensor value_cache_internal_tensor_assign_17_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_17_begin_mask_0"), val = tensor([false, false, false, true])]; tensor value_cache_internal_tensor_assign_17_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_17_end_mask_0"), val = tensor([false, true, false, true])]; tensor value_cache_internal_tensor_assign_17_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_17_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor value_states_67_cast_fp16 = transpose(perm = value_states_67_perm_0, x = var_4737_cast_fp16)[name = string("transpose_22")]; tensor value_cache_internal_tensor_assign_17_cast_fp16 = slice_update(begin = concat_131, begin_mask = value_cache_internal_tensor_assign_17_begin_mask_0, end = concat_132, end_mask = value_cache_internal_tensor_assign_17_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_17_squeeze_mask_0, stride = value_cache_internal_tensor_assign_17_stride_0, update = value_states_67_cast_fp16, x = coreml_update_state_79)[name = string("value_cache_internal_tensor_assign_17_cast_fp16")]; write_state(data = value_cache_internal_tensor_assign_17_cast_fp16, input = value_cache)[name = string("coreml_update_state_295_write_state")]; tensor coreml_update_state_81 = read_state(input = value_cache)[name = string("coreml_update_state_295")]; tensor var_4801_begin_0 = const()[name = string("op_4801_begin_0"), val = tensor([16, 0, 0, 0])]; tensor var_4801_end_0 = const()[name = string("op_4801_end_0"), val = tensor([17, 2, 2048, 64])]; tensor var_4801_end_mask_0 = const()[name = string("op_4801_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_4801_cast_fp16 = slice_by_index(begin = var_4801_begin_0, end = var_4801_end_0, end_mask = var_4801_end_mask_0, x = coreml_update_state_80)[name = string("op_4801_cast_fp16")]; tensor tile_32 = const()[name = string("tile_32"), val = tensor([1, 1])]; int32 var_4804_axis_0 = const()[name = string("op_4804_axis_0"), val = int32(1)]; tensor var_4804_cast_fp16_0, tensor var_4804_cast_fp16_1 = split(axis = var_4804_axis_0, split_sizes = tile_32, x = var_4801_cast_fp16)[name = string("op_4804_cast_fp16")]; tensor var_4811_begin_0 = const()[name = string("op_4811_begin_0"), val = tensor([16, 0, 0, 0])]; tensor var_4811_end_0 = const()[name = string("op_4811_end_0"), val = tensor([17, 2, 2048, 64])]; tensor var_4811_end_mask_0 = const()[name = string("op_4811_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_4811_cast_fp16 = slice_by_index(begin = var_4811_begin_0, end = var_4811_end_0, end_mask = var_4811_end_mask_0, x = coreml_update_state_81)[name = string("op_4811_cast_fp16")]; tensor tile_33 = const()[name = string("tile_33"), val = tensor([1, 1])]; int32 var_4814_axis_0 = const()[name = string("op_4814_axis_0"), val = int32(1)]; tensor var_4814_cast_fp16_0, tensor var_4814_cast_fp16_1 = split(axis = var_4814_axis_0, split_sizes = tile_33, x = var_4811_cast_fp16)[name = string("op_4814_cast_fp16")]; tensor var_4817_split_sizes_0 = const()[name = string("op_4817_split_sizes_0"), val = tensor([8, 8])]; int32 var_4817_axis_0 = const()[name = string("op_4817_axis_0"), val = int32(1)]; tensor var_4817_cast_fp16_0, tensor var_4817_cast_fp16_1 = split(axis = var_4817_axis_0, split_sizes = var_4817_split_sizes_0, x = query_states_67_cast_fp16)[name = string("op_4817_cast_fp16")]; bool attn_weights_257_transpose_x_0 = const()[name = string("attn_weights_257_transpose_x_0"), val = bool(false)]; bool attn_weights_257_transpose_y_0 = const()[name = string("attn_weights_257_transpose_y_0"), val = bool(false)]; tensor attn_weights_257_cast_fp16 = matmul(transpose_x = attn_weights_257_transpose_x_0, transpose_y = attn_weights_257_transpose_y_0, x = var_4804_cast_fp16_0, y = var_4817_cast_fp16_0)[name = string("attn_weights_257_cast_fp16")]; fp16 _inversed_attn_weights_259_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_259_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_259_cast_fp16 = mul(x = attn_weights_257_cast_fp16, y = _inversed_attn_weights_259_y_0_to_fp16)[name = string("_inversed_attn_weights_259_cast_fp16")]; tensor attn_weights_261_cast_fp16 = add(x = _inversed_attn_weights_259_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_261_cast_fp16")]; int32 var_4824 = const()[name = string("op_4824"), val = int32(2)]; tensor attn_weights_263_cast_fp16 = softmax(axis = var_4824, x = attn_weights_261_cast_fp16)[name = string("attn_weights_263_cast_fp16")]; bool var_4830_transpose_x_1 = const()[name = string("op_4830_transpose_x_1"), val = bool(true)]; bool var_4830_transpose_y_1 = const()[name = string("op_4830_transpose_y_1"), val = bool(false)]; tensor var_4830_cast_fp16 = matmul(transpose_x = var_4830_transpose_x_1, transpose_y = var_4830_transpose_y_1, x = attn_weights_263_cast_fp16, y = var_4814_cast_fp16_0)[name = string("op_4830_cast_fp16")]; bool attn_weights_265_transpose_x_0 = const()[name = string("attn_weights_265_transpose_x_0"), val = bool(false)]; bool attn_weights_265_transpose_y_0 = const()[name = string("attn_weights_265_transpose_y_0"), val = bool(false)]; tensor attn_weights_265_cast_fp16 = matmul(transpose_x = attn_weights_265_transpose_x_0, transpose_y = attn_weights_265_transpose_y_0, x = var_4804_cast_fp16_1, y = var_4817_cast_fp16_1)[name = string("attn_weights_265_cast_fp16")]; fp16 _inversed_attn_weights_267_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_267_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_267_cast_fp16 = mul(x = attn_weights_265_cast_fp16, y = _inversed_attn_weights_267_y_0_to_fp16)[name = string("_inversed_attn_weights_267_cast_fp16")]; tensor attn_weights_269_cast_fp16 = add(x = _inversed_attn_weights_267_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_269_cast_fp16")]; int32 var_4836 = const()[name = string("op_4836"), val = int32(2)]; tensor attn_weights_271_cast_fp16 = softmax(axis = var_4836, x = attn_weights_269_cast_fp16)[name = string("attn_weights_271_cast_fp16")]; bool attn_output_97_transpose_x_1 = const()[name = string("attn_output_97_transpose_x_1"), val = bool(true)]; bool attn_output_97_transpose_y_1 = const()[name = string("attn_output_97_transpose_y_1"), val = bool(false)]; tensor attn_output_97_cast_fp16 = matmul(transpose_x = attn_output_97_transpose_x_1, transpose_y = attn_output_97_transpose_y_1, x = attn_weights_271_cast_fp16, y = var_4814_cast_fp16_1)[name = string("attn_output_97_cast_fp16")]; int32 var_4844 = const()[name = string("op_4844"), val = int32(1)]; bool attn_output_99_interleave_0 = const()[name = string("attn_output_99_interleave_0"), val = bool(false)]; tensor attn_output_99_cast_fp16 = concat(axis = var_4844, interleave = attn_output_99_interleave_0, values = (var_4830_cast_fp16, attn_output_97_cast_fp16))[name = string("attn_output_99_cast_fp16")]; tensor var_4848_perm_0 = const()[name = string("op_4848_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_4853 = const()[name = string("op_4853"), val = tensor([1, 1024, 1, 16])]; tensor var_4848_cast_fp16 = transpose(perm = var_4848_perm_0, x = attn_output_99_cast_fp16)[name = string("transpose_21")]; tensor x_293_cast_fp16 = reshape(shape = var_4853, x = var_4848_cast_fp16)[name = string("x_293_cast_fp16")]; string hidden_states_99_pad_type_0 = const()[name = string("hidden_states_99_pad_type_0"), val = string("valid")]; tensor hidden_states_99_strides_0 = const()[name = string("hidden_states_99_strides_0"), val = tensor([1, 1])]; tensor hidden_states_99_pad_0 = const()[name = string("hidden_states_99_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_99_dilations_0 = const()[name = string("hidden_states_99_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_99_groups_0 = const()[name = string("hidden_states_99_groups_0"), val = int32(1)]; tensor var_4860_to_fp16 = const()[name = string("op_4860_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(489317952)))]; tensor hidden_states_99_cast_fp16 = conv(dilations = hidden_states_99_dilations_0, groups = hidden_states_99_groups_0, pad = hidden_states_99_pad_0, pad_type = hidden_states_99_pad_type_0, strides = hidden_states_99_strides_0, weight = var_4860_to_fp16, x = x_293_cast_fp16)[name = string("hidden_states_99_cast_fp16")]; tensor x_295_cast_fp16 = add(x = x_287_cast_fp16, y = hidden_states_99_cast_fp16)[name = string("x_295_cast_fp16")]; int32 var_4872 = const()[name = string("op_4872"), val = int32(1)]; fp16 const_173_promoted_to_fp16 = const()[name = string("const_173_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_4875_cast_fp16 = mul(x = x_295_cast_fp16, y = const_173_promoted_to_fp16)[name = string("op_4875_cast_fp16")]; bool x_297_interleave_0 = const()[name = string("x_297_interleave_0"), val = bool(false)]; tensor x_297_cast_fp16 = concat(axis = var_4872, interleave = x_297_interleave_0, values = (x_295_cast_fp16, var_4875_cast_fp16))[name = string("x_297_cast_fp16")]; tensor out_199_axes_0 = const()[name = string("out_199_axes_0"), val = tensor([1])]; fp16 var_4885_to_fp16 = const()[name = string("op_4885_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_199_cast_fp16 = layer_norm(axes = out_199_axes_0, epsilon = var_4885_to_fp16, x = x_297_cast_fp16)[name = string("out_199_cast_fp16")]; tensor layer_layers_16_post_attention_layernorm_weight_to_fp16 = const()[name = string("layer_layers_16_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(491415168)))]; tensor out_201_cast_fp16 = mul(x = out_199_cast_fp16, y = layer_layers_16_post_attention_layernorm_weight_to_fp16)[name = string("out_201_cast_fp16")]; tensor var_4891_split_sizes_0 = const()[name = string("op_4891_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_4891_axis_0 = const()[name = string("op_4891_axis_0"), val = int32(1)]; tensor var_4891_cast_fp16_0, tensor var_4891_cast_fp16_1 = split(axis = var_4891_axis_0, split_sizes = var_4891_split_sizes_0, x = out_201_cast_fp16)[name = string("op_4891_cast_fp16")]; string input_33_pad_type_0 = const()[name = string("input_33_pad_type_0"), val = string("valid")]; tensor input_33_strides_0 = const()[name = string("input_33_strides_0"), val = tensor([1, 1])]; tensor input_33_pad_0 = const()[name = string("input_33_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_33_dilations_0 = const()[name = string("input_33_dilations_0"), val = tensor([1, 1])]; int32 input_33_groups_0 = const()[name = string("input_33_groups_0"), val = int32(1)]; tensor var_4896_to_fp16 = const()[name = string("op_4896_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(491419328)))]; tensor input_33_cast_fp16 = conv(dilations = input_33_dilations_0, groups = input_33_groups_0, pad = input_33_pad_0, pad_type = input_33_pad_type_0, strides = input_33_strides_0, weight = var_4896_to_fp16, x = var_4891_cast_fp16_0)[name = string("input_33_cast_fp16")]; tensor var_4907_cast_fp16 = silu(x = input_33_cast_fp16)[name = string("op_4907_cast_fp16")]; string var_4912_pad_type_0 = const()[name = string("op_4912_pad_type_0"), val = string("valid")]; tensor var_4912_strides_0 = const()[name = string("op_4912_strides_0"), val = tensor([1, 1])]; tensor var_4912_pad_0 = const()[name = string("op_4912_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_4912_dilations_0 = const()[name = string("op_4912_dilations_0"), val = tensor([1, 1])]; int32 var_4912_groups_0 = const()[name = string("op_4912_groups_0"), val = int32(1)]; tensor var_4895_to_fp16 = const()[name = string("op_4895_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(499808000)))]; tensor var_4912_cast_fp16 = conv(dilations = var_4912_dilations_0, groups = var_4912_groups_0, pad = var_4912_pad_0, pad_type = var_4912_pad_type_0, strides = var_4912_strides_0, weight = var_4895_to_fp16, x = var_4891_cast_fp16_0)[name = string("op_4912_cast_fp16")]; tensor x_303_cast_fp16 = mul(x = var_4907_cast_fp16, y = var_4912_cast_fp16)[name = string("x_303_cast_fp16")]; string hidden_states_101_pad_type_0 = const()[name = string("hidden_states_101_pad_type_0"), val = string("valid")]; tensor hidden_states_101_strides_0 = const()[name = string("hidden_states_101_strides_0"), val = tensor([1, 1])]; tensor hidden_states_101_pad_0 = const()[name = string("hidden_states_101_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_101_dilations_0 = const()[name = string("hidden_states_101_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_101_groups_0 = const()[name = string("hidden_states_101_groups_0"), val = int32(1)]; tensor var_4894_to_fp16 = const()[name = string("op_4894_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(508196672)))]; tensor hidden_states_101_cast_fp16 = conv(dilations = hidden_states_101_dilations_0, groups = hidden_states_101_groups_0, pad = hidden_states_101_pad_0, pad_type = hidden_states_101_pad_type_0, strides = hidden_states_101_strides_0, weight = var_4894_to_fp16, x = x_303_cast_fp16)[name = string("hidden_states_101_cast_fp16")]; tensor x_305_cast_fp16 = add(x = x_295_cast_fp16, y = hidden_states_101_cast_fp16)[name = string("x_305_cast_fp16")]; int32 var_4925 = const()[name = string("op_4925"), val = int32(1)]; fp16 const_174_promoted_to_fp16 = const()[name = string("const_174_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_4928_cast_fp16 = mul(x = x_305_cast_fp16, y = const_174_promoted_to_fp16)[name = string("op_4928_cast_fp16")]; bool x_307_interleave_0 = const()[name = string("x_307_interleave_0"), val = bool(false)]; tensor x_307_cast_fp16 = concat(axis = var_4925, interleave = x_307_interleave_0, values = (x_305_cast_fp16, var_4928_cast_fp16))[name = string("x_307_cast_fp16")]; tensor out_205_axes_0 = const()[name = string("out_205_axes_0"), val = tensor([1])]; fp16 var_4938_to_fp16 = const()[name = string("op_4938_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_205_cast_fp16 = layer_norm(axes = out_205_axes_0, epsilon = var_4938_to_fp16, x = x_307_cast_fp16)[name = string("out_205_cast_fp16")]; tensor layer_layers_17_input_layernorm_weight_to_fp16 = const()[name = string("layer_layers_17_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(516585344)))]; tensor out_207_cast_fp16 = mul(x = out_205_cast_fp16, y = layer_layers_17_input_layernorm_weight_to_fp16)[name = string("out_207_cast_fp16")]; tensor var_4944_split_sizes_0 = const()[name = string("op_4944_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_4944_axis_0 = const()[name = string("op_4944_axis_0"), val = int32(1)]; tensor var_4944_cast_fp16_0, tensor var_4944_cast_fp16_1 = split(axis = var_4944_axis_0, split_sizes = var_4944_split_sizes_0, x = out_207_cast_fp16)[name = string("op_4944_cast_fp16")]; string query_states_69_pad_type_0 = const()[name = string("query_states_69_pad_type_0"), val = string("valid")]; tensor query_states_69_strides_0 = const()[name = string("query_states_69_strides_0"), val = tensor([1, 1])]; tensor query_states_69_pad_0 = const()[name = string("query_states_69_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_states_69_dilations_0 = const()[name = string("query_states_69_dilations_0"), val = tensor([1, 1])]; int32 query_states_69_groups_0 = const()[name = string("query_states_69_groups_0"), val = int32(1)]; tensor var_4966_to_fp16 = const()[name = string("op_4966_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(516589504)))]; tensor query_states_69_cast_fp16 = conv(dilations = query_states_69_dilations_0, groups = query_states_69_groups_0, pad = query_states_69_pad_0, pad_type = query_states_69_pad_type_0, strides = query_states_69_strides_0, weight = var_4966_to_fp16, x = var_4944_cast_fp16_0)[name = string("query_states_69_cast_fp16")]; string key_states_69_pad_type_0 = const()[name = string("key_states_69_pad_type_0"), val = string("valid")]; tensor key_states_69_strides_0 = const()[name = string("key_states_69_strides_0"), val = tensor([1, 1])]; tensor key_states_69_pad_0 = const()[name = string("key_states_69_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_states_69_dilations_0 = const()[name = string("key_states_69_dilations_0"), val = tensor([1, 1])]; int32 key_states_69_groups_0 = const()[name = string("key_states_69_groups_0"), val = int32(1)]; tensor var_4977_to_fp16 = const()[name = string("op_4977_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(518686720)))]; tensor key_states_69_cast_fp16 = conv(dilations = key_states_69_dilations_0, groups = key_states_69_groups_0, pad = key_states_69_pad_0, pad_type = key_states_69_pad_type_0, strides = key_states_69_strides_0, weight = var_4977_to_fp16, x = var_4944_cast_fp16_0)[name = string("key_states_69_cast_fp16")]; string value_states_69_pad_type_0 = const()[name = string("value_states_69_pad_type_0"), val = string("valid")]; tensor value_states_69_strides_0 = const()[name = string("value_states_69_strides_0"), val = tensor([1, 1])]; tensor value_states_69_pad_0 = const()[name = string("value_states_69_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_states_69_dilations_0 = const()[name = string("value_states_69_dilations_0"), val = tensor([1, 1])]; int32 value_states_69_groups_0 = const()[name = string("value_states_69_groups_0"), val = int32(1)]; tensor var_4988_to_fp16 = const()[name = string("op_4988_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(518948928)))]; tensor value_states_69_cast_fp16 = conv(dilations = value_states_69_dilations_0, groups = value_states_69_groups_0, pad = value_states_69_pad_0, pad_type = value_states_69_pad_type_0, strides = value_states_69_strides_0, weight = var_4988_to_fp16, x = var_4944_cast_fp16_0)[name = string("value_states_69_cast_fp16")]; tensor var_4996 = const()[name = string("op_4996"), val = tensor([1, 16, 64, 16])]; tensor embed_69_cast_fp16 = reshape(shape = var_4996, x = query_states_69_cast_fp16)[name = string("embed_69_cast_fp16")]; tensor var_5000 = const()[name = string("op_5000"), val = tensor([1, 2, 64, 16])]; tensor var_5001_cast_fp16 = reshape(shape = var_5000, x = key_states_69_cast_fp16)[name = string("op_5001_cast_fp16")]; tensor embed_71_perm_0 = const()[name = string("embed_71_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_5007 = const()[name = string("op_5007"), val = tensor([1, 2, 64, 16])]; tensor var_5008_cast_fp16 = reshape(shape = var_5007, x = value_states_69_cast_fp16)[name = string("op_5008_cast_fp16")]; tensor value_states_71_perm_0 = const()[name = string("value_states_71_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_5012_cast_fp16 = mul(x = embed_69_cast_fp16, y = cos_1_cast_fp16)[name = string("op_5012_cast_fp16")]; tensor var_5013_split_sizes_0 = const()[name = string("op_5013_split_sizes_0"), val = tensor([32, 32])]; int32 var_5013_axis_0 = const()[name = string("op_5013_axis_0"), val = int32(-2)]; tensor var_5013_cast_fp16_0, tensor var_5013_cast_fp16_1 = split(axis = var_5013_axis_0, split_sizes = var_5013_split_sizes_0, x = embed_69_cast_fp16)[name = string("op_5013_cast_fp16")]; fp16 const_177_promoted_to_fp16 = const()[name = string("const_177_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_5015_cast_fp16 = mul(x = var_5013_cast_fp16_1, y = const_177_promoted_to_fp16)[name = string("op_5015_cast_fp16")]; int32 var_5017 = const()[name = string("op_5017"), val = int32(-2)]; bool var_5018_interleave_0 = const()[name = string("op_5018_interleave_0"), val = bool(false)]; tensor var_5018_cast_fp16 = concat(axis = var_5017, interleave = var_5018_interleave_0, values = (var_5015_cast_fp16, var_5013_cast_fp16_0))[name = string("op_5018_cast_fp16")]; tensor var_5019_cast_fp16 = mul(x = var_5018_cast_fp16, y = sin_1_cast_fp16)[name = string("op_5019_cast_fp16")]; tensor query_states_71_cast_fp16 = add(x = var_5012_cast_fp16, y = var_5019_cast_fp16)[name = string("query_states_71_cast_fp16")]; tensor embed_71_cast_fp16 = transpose(perm = embed_71_perm_0, x = var_5001_cast_fp16)[name = string("transpose_20")]; tensor var_5022_cast_fp16 = mul(x = embed_71_cast_fp16, y = cos_cast_fp16)[name = string("op_5022_cast_fp16")]; tensor var_5023_split_sizes_0 = const()[name = string("op_5023_split_sizes_0"), val = tensor([32, 32])]; int32 var_5023_axis_0 = const()[name = string("op_5023_axis_0"), val = int32(-1)]; tensor var_5023_cast_fp16_0, tensor var_5023_cast_fp16_1 = split(axis = var_5023_axis_0, split_sizes = var_5023_split_sizes_0, x = embed_71_cast_fp16)[name = string("op_5023_cast_fp16")]; fp16 const_178_promoted_to_fp16 = const()[name = string("const_178_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_5025_cast_fp16 = mul(x = var_5023_cast_fp16_1, y = const_178_promoted_to_fp16)[name = string("op_5025_cast_fp16")]; int32 var_5027 = const()[name = string("op_5027"), val = int32(-1)]; bool var_5028_interleave_0 = const()[name = string("op_5028_interleave_0"), val = bool(false)]; tensor var_5028_cast_fp16 = concat(axis = var_5027, interleave = var_5028_interleave_0, values = (var_5025_cast_fp16, var_5023_cast_fp16_0))[name = string("op_5028_cast_fp16")]; tensor var_5029_cast_fp16 = mul(x = var_5028_cast_fp16, y = sin_cast_fp16)[name = string("op_5029_cast_fp16")]; tensor key_states_71_cast_fp16 = add(x = var_5022_cast_fp16, y = var_5029_cast_fp16)[name = string("key_states_71_cast_fp16")]; tensor expand_dims_171 = const()[name = string("expand_dims_171"), val = tensor([17])]; tensor expand_dims_172 = const()[name = string("expand_dims_172"), val = tensor([0])]; tensor expand_dims_174 = const()[name = string("expand_dims_174"), val = tensor([18])]; tensor concat_139_values3_0 = const()[name = string("concat_139_values3_0"), val = tensor([0])]; int32 concat_139_axis_0 = const()[name = string("concat_139_axis_0"), val = int32(0)]; bool concat_139_interleave_0 = const()[name = string("concat_139_interleave_0"), val = bool(false)]; tensor concat_139 = concat(axis = concat_139_axis_0, interleave = concat_139_interleave_0, values = (expand_dims_171, expand_dims_172, position_id, concat_139_values3_0))[name = string("concat_139")]; tensor concat_140_values1_0 = const()[name = string("concat_140_values1_0"), val = tensor([0])]; tensor concat_140_values3_0 = const()[name = string("concat_140_values3_0"), val = tensor([0])]; int32 concat_140_axis_0 = const()[name = string("concat_140_axis_0"), val = int32(0)]; bool concat_140_interleave_0 = const()[name = string("concat_140_interleave_0"), val = bool(false)]; tensor concat_140 = concat(axis = concat_140_axis_0, interleave = concat_140_interleave_0, values = (expand_dims_174, concat_140_values1_0, var_426, concat_140_values3_0))[name = string("concat_140")]; tensor key_cache_internal_tensor_assign_18_stride_0 = const()[name = string("key_cache_internal_tensor_assign_18_stride_0"), val = tensor([1, 1, 1, 1])]; tensor key_cache_internal_tensor_assign_18_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_18_begin_mask_0"), val = tensor([false, false, false, true])]; tensor key_cache_internal_tensor_assign_18_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_18_end_mask_0"), val = tensor([false, true, false, true])]; tensor key_cache_internal_tensor_assign_18_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_18_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor key_cache_internal_tensor_assign_18_cast_fp16 = slice_update(begin = concat_139, begin_mask = key_cache_internal_tensor_assign_18_begin_mask_0, end = concat_140, end_mask = key_cache_internal_tensor_assign_18_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_18_squeeze_mask_0, stride = key_cache_internal_tensor_assign_18_stride_0, update = key_states_71_cast_fp16, x = coreml_update_state_80)[name = string("key_cache_internal_tensor_assign_18_cast_fp16")]; write_state(data = key_cache_internal_tensor_assign_18_cast_fp16, input = key_cache)[name = string("coreml_update_state_296_write_state")]; tensor coreml_update_state_82 = read_state(input = key_cache)[name = string("coreml_update_state_296")]; tensor value_cache_internal_tensor_assign_18_stride_0 = const()[name = string("value_cache_internal_tensor_assign_18_stride_0"), val = tensor([1, 1, 1, 1])]; tensor value_cache_internal_tensor_assign_18_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_18_begin_mask_0"), val = tensor([false, false, false, true])]; tensor value_cache_internal_tensor_assign_18_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_18_end_mask_0"), val = tensor([false, true, false, true])]; tensor value_cache_internal_tensor_assign_18_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_18_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor value_states_71_cast_fp16 = transpose(perm = value_states_71_perm_0, x = var_5008_cast_fp16)[name = string("transpose_19")]; tensor value_cache_internal_tensor_assign_18_cast_fp16 = slice_update(begin = concat_139, begin_mask = value_cache_internal_tensor_assign_18_begin_mask_0, end = concat_140, end_mask = value_cache_internal_tensor_assign_18_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_18_squeeze_mask_0, stride = value_cache_internal_tensor_assign_18_stride_0, update = value_states_71_cast_fp16, x = coreml_update_state_81)[name = string("value_cache_internal_tensor_assign_18_cast_fp16")]; write_state(data = value_cache_internal_tensor_assign_18_cast_fp16, input = value_cache)[name = string("coreml_update_state_297_write_state")]; tensor coreml_update_state_83 = read_state(input = value_cache)[name = string("coreml_update_state_297")]; tensor var_5072_begin_0 = const()[name = string("op_5072_begin_0"), val = tensor([17, 0, 0, 0])]; tensor var_5072_end_0 = const()[name = string("op_5072_end_0"), val = tensor([18, 2, 2048, 64])]; tensor var_5072_end_mask_0 = const()[name = string("op_5072_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_5072_cast_fp16 = slice_by_index(begin = var_5072_begin_0, end = var_5072_end_0, end_mask = var_5072_end_mask_0, x = coreml_update_state_82)[name = string("op_5072_cast_fp16")]; tensor tile_34 = const()[name = string("tile_34"), val = tensor([1, 1])]; int32 var_5075_axis_0 = const()[name = string("op_5075_axis_0"), val = int32(1)]; tensor var_5075_cast_fp16_0, tensor var_5075_cast_fp16_1 = split(axis = var_5075_axis_0, split_sizes = tile_34, x = var_5072_cast_fp16)[name = string("op_5075_cast_fp16")]; tensor var_5082_begin_0 = const()[name = string("op_5082_begin_0"), val = tensor([17, 0, 0, 0])]; tensor var_5082_end_0 = const()[name = string("op_5082_end_0"), val = tensor([18, 2, 2048, 64])]; tensor var_5082_end_mask_0 = const()[name = string("op_5082_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_5082_cast_fp16 = slice_by_index(begin = var_5082_begin_0, end = var_5082_end_0, end_mask = var_5082_end_mask_0, x = coreml_update_state_83)[name = string("op_5082_cast_fp16")]; tensor tile_35 = const()[name = string("tile_35"), val = tensor([1, 1])]; int32 var_5085_axis_0 = const()[name = string("op_5085_axis_0"), val = int32(1)]; tensor var_5085_cast_fp16_0, tensor var_5085_cast_fp16_1 = split(axis = var_5085_axis_0, split_sizes = tile_35, x = var_5082_cast_fp16)[name = string("op_5085_cast_fp16")]; tensor var_5088_split_sizes_0 = const()[name = string("op_5088_split_sizes_0"), val = tensor([8, 8])]; int32 var_5088_axis_0 = const()[name = string("op_5088_axis_0"), val = int32(1)]; tensor var_5088_cast_fp16_0, tensor var_5088_cast_fp16_1 = split(axis = var_5088_axis_0, split_sizes = var_5088_split_sizes_0, x = query_states_71_cast_fp16)[name = string("op_5088_cast_fp16")]; bool attn_weights_273_transpose_x_0 = const()[name = string("attn_weights_273_transpose_x_0"), val = bool(false)]; bool attn_weights_273_transpose_y_0 = const()[name = string("attn_weights_273_transpose_y_0"), val = bool(false)]; tensor attn_weights_273_cast_fp16 = matmul(transpose_x = attn_weights_273_transpose_x_0, transpose_y = attn_weights_273_transpose_y_0, x = var_5075_cast_fp16_0, y = var_5088_cast_fp16_0)[name = string("attn_weights_273_cast_fp16")]; fp16 _inversed_attn_weights_275_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_275_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_275_cast_fp16 = mul(x = attn_weights_273_cast_fp16, y = _inversed_attn_weights_275_y_0_to_fp16)[name = string("_inversed_attn_weights_275_cast_fp16")]; tensor attn_weights_277_cast_fp16 = add(x = _inversed_attn_weights_275_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_277_cast_fp16")]; int32 var_5095 = const()[name = string("op_5095"), val = int32(2)]; tensor attn_weights_279_cast_fp16 = softmax(axis = var_5095, x = attn_weights_277_cast_fp16)[name = string("attn_weights_279_cast_fp16")]; bool var_5101_transpose_x_1 = const()[name = string("op_5101_transpose_x_1"), val = bool(true)]; bool var_5101_transpose_y_1 = const()[name = string("op_5101_transpose_y_1"), val = bool(false)]; tensor var_5101_cast_fp16 = matmul(transpose_x = var_5101_transpose_x_1, transpose_y = var_5101_transpose_y_1, x = attn_weights_279_cast_fp16, y = var_5085_cast_fp16_0)[name = string("op_5101_cast_fp16")]; bool attn_weights_281_transpose_x_0 = const()[name = string("attn_weights_281_transpose_x_0"), val = bool(false)]; bool attn_weights_281_transpose_y_0 = const()[name = string("attn_weights_281_transpose_y_0"), val = bool(false)]; tensor attn_weights_281_cast_fp16 = matmul(transpose_x = attn_weights_281_transpose_x_0, transpose_y = attn_weights_281_transpose_y_0, x = var_5075_cast_fp16_1, y = var_5088_cast_fp16_1)[name = string("attn_weights_281_cast_fp16")]; fp16 _inversed_attn_weights_283_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_283_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_283_cast_fp16 = mul(x = attn_weights_281_cast_fp16, y = _inversed_attn_weights_283_y_0_to_fp16)[name = string("_inversed_attn_weights_283_cast_fp16")]; tensor attn_weights_285_cast_fp16 = add(x = _inversed_attn_weights_283_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_285_cast_fp16")]; int32 var_5107 = const()[name = string("op_5107"), val = int32(2)]; tensor attn_weights_287_cast_fp16 = softmax(axis = var_5107, x = attn_weights_285_cast_fp16)[name = string("attn_weights_287_cast_fp16")]; bool attn_output_103_transpose_x_1 = const()[name = string("attn_output_103_transpose_x_1"), val = bool(true)]; bool attn_output_103_transpose_y_1 = const()[name = string("attn_output_103_transpose_y_1"), val = bool(false)]; tensor attn_output_103_cast_fp16 = matmul(transpose_x = attn_output_103_transpose_x_1, transpose_y = attn_output_103_transpose_y_1, x = attn_weights_287_cast_fp16, y = var_5085_cast_fp16_1)[name = string("attn_output_103_cast_fp16")]; int32 var_5115 = const()[name = string("op_5115"), val = int32(1)]; bool attn_output_105_interleave_0 = const()[name = string("attn_output_105_interleave_0"), val = bool(false)]; tensor attn_output_105_cast_fp16 = concat(axis = var_5115, interleave = attn_output_105_interleave_0, values = (var_5101_cast_fp16, attn_output_103_cast_fp16))[name = string("attn_output_105_cast_fp16")]; tensor var_5119_perm_0 = const()[name = string("op_5119_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_5124 = const()[name = string("op_5124"), val = tensor([1, 1024, 1, 16])]; tensor var_5119_cast_fp16 = transpose(perm = var_5119_perm_0, x = attn_output_105_cast_fp16)[name = string("transpose_18")]; tensor x_311_cast_fp16 = reshape(shape = var_5124, x = var_5119_cast_fp16)[name = string("x_311_cast_fp16")]; string hidden_states_105_pad_type_0 = const()[name = string("hidden_states_105_pad_type_0"), val = string("valid")]; tensor hidden_states_105_strides_0 = const()[name = string("hidden_states_105_strides_0"), val = tensor([1, 1])]; tensor hidden_states_105_pad_0 = const()[name = string("hidden_states_105_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_105_dilations_0 = const()[name = string("hidden_states_105_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_105_groups_0 = const()[name = string("hidden_states_105_groups_0"), val = int32(1)]; tensor var_5131_to_fp16 = const()[name = string("op_5131_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(519211136)))]; tensor hidden_states_105_cast_fp16 = conv(dilations = hidden_states_105_dilations_0, groups = hidden_states_105_groups_0, pad = hidden_states_105_pad_0, pad_type = hidden_states_105_pad_type_0, strides = hidden_states_105_strides_0, weight = var_5131_to_fp16, x = x_311_cast_fp16)[name = string("hidden_states_105_cast_fp16")]; tensor x_313_cast_fp16 = add(x = x_305_cast_fp16, y = hidden_states_105_cast_fp16)[name = string("x_313_cast_fp16")]; int32 var_5143 = const()[name = string("op_5143"), val = int32(1)]; fp16 const_183_promoted_to_fp16 = const()[name = string("const_183_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_5146_cast_fp16 = mul(x = x_313_cast_fp16, y = const_183_promoted_to_fp16)[name = string("op_5146_cast_fp16")]; bool x_315_interleave_0 = const()[name = string("x_315_interleave_0"), val = bool(false)]; tensor x_315_cast_fp16 = concat(axis = var_5143, interleave = x_315_interleave_0, values = (x_313_cast_fp16, var_5146_cast_fp16))[name = string("x_315_cast_fp16")]; tensor out_211_axes_0 = const()[name = string("out_211_axes_0"), val = tensor([1])]; fp16 var_5156_to_fp16 = const()[name = string("op_5156_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_211_cast_fp16 = layer_norm(axes = out_211_axes_0, epsilon = var_5156_to_fp16, x = x_315_cast_fp16)[name = string("out_211_cast_fp16")]; tensor layer_layers_17_post_attention_layernorm_weight_to_fp16 = const()[name = string("layer_layers_17_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(521308352)))]; tensor out_213_cast_fp16 = mul(x = out_211_cast_fp16, y = layer_layers_17_post_attention_layernorm_weight_to_fp16)[name = string("out_213_cast_fp16")]; tensor var_5162_split_sizes_0 = const()[name = string("op_5162_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_5162_axis_0 = const()[name = string("op_5162_axis_0"), val = int32(1)]; tensor var_5162_cast_fp16_0, tensor var_5162_cast_fp16_1 = split(axis = var_5162_axis_0, split_sizes = var_5162_split_sizes_0, x = out_213_cast_fp16)[name = string("op_5162_cast_fp16")]; string input_35_pad_type_0 = const()[name = string("input_35_pad_type_0"), val = string("valid")]; tensor input_35_strides_0 = const()[name = string("input_35_strides_0"), val = tensor([1, 1])]; tensor input_35_pad_0 = const()[name = string("input_35_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_35_dilations_0 = const()[name = string("input_35_dilations_0"), val = tensor([1, 1])]; int32 input_35_groups_0 = const()[name = string("input_35_groups_0"), val = int32(1)]; tensor var_5167_to_fp16 = const()[name = string("op_5167_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(521312512)))]; tensor input_35_cast_fp16 = conv(dilations = input_35_dilations_0, groups = input_35_groups_0, pad = input_35_pad_0, pad_type = input_35_pad_type_0, strides = input_35_strides_0, weight = var_5167_to_fp16, x = var_5162_cast_fp16_0)[name = string("input_35_cast_fp16")]; tensor var_5178_cast_fp16 = silu(x = input_35_cast_fp16)[name = string("op_5178_cast_fp16")]; string var_5183_pad_type_0 = const()[name = string("op_5183_pad_type_0"), val = string("valid")]; tensor var_5183_strides_0 = const()[name = string("op_5183_strides_0"), val = tensor([1, 1])]; tensor var_5183_pad_0 = const()[name = string("op_5183_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_5183_dilations_0 = const()[name = string("op_5183_dilations_0"), val = tensor([1, 1])]; int32 var_5183_groups_0 = const()[name = string("op_5183_groups_0"), val = int32(1)]; tensor var_5166_to_fp16 = const()[name = string("op_5166_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(529701184)))]; tensor var_5183_cast_fp16 = conv(dilations = var_5183_dilations_0, groups = var_5183_groups_0, pad = var_5183_pad_0, pad_type = var_5183_pad_type_0, strides = var_5183_strides_0, weight = var_5166_to_fp16, x = var_5162_cast_fp16_0)[name = string("op_5183_cast_fp16")]; tensor x_321_cast_fp16 = mul(x = var_5178_cast_fp16, y = var_5183_cast_fp16)[name = string("x_321_cast_fp16")]; string hidden_states_107_pad_type_0 = const()[name = string("hidden_states_107_pad_type_0"), val = string("valid")]; tensor hidden_states_107_strides_0 = const()[name = string("hidden_states_107_strides_0"), val = tensor([1, 1])]; tensor hidden_states_107_pad_0 = const()[name = string("hidden_states_107_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_107_dilations_0 = const()[name = string("hidden_states_107_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_107_groups_0 = const()[name = string("hidden_states_107_groups_0"), val = int32(1)]; tensor var_5165_to_fp16 = const()[name = string("op_5165_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(538089856)))]; tensor hidden_states_107_cast_fp16 = conv(dilations = hidden_states_107_dilations_0, groups = hidden_states_107_groups_0, pad = hidden_states_107_pad_0, pad_type = hidden_states_107_pad_type_0, strides = hidden_states_107_strides_0, weight = var_5165_to_fp16, x = x_321_cast_fp16)[name = string("hidden_states_107_cast_fp16")]; tensor x_323_cast_fp16 = add(x = x_313_cast_fp16, y = hidden_states_107_cast_fp16)[name = string("x_323_cast_fp16")]; int32 var_5196 = const()[name = string("op_5196"), val = int32(1)]; fp16 const_184_promoted_to_fp16 = const()[name = string("const_184_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_5199_cast_fp16 = mul(x = x_323_cast_fp16, y = const_184_promoted_to_fp16)[name = string("op_5199_cast_fp16")]; bool x_325_interleave_0 = const()[name = string("x_325_interleave_0"), val = bool(false)]; tensor x_325_cast_fp16 = concat(axis = var_5196, interleave = x_325_interleave_0, values = (x_323_cast_fp16, var_5199_cast_fp16))[name = string("x_325_cast_fp16")]; tensor out_217_axes_0 = const()[name = string("out_217_axes_0"), val = tensor([1])]; fp16 var_5209_to_fp16 = const()[name = string("op_5209_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_217_cast_fp16 = layer_norm(axes = out_217_axes_0, epsilon = var_5209_to_fp16, x = x_325_cast_fp16)[name = string("out_217_cast_fp16")]; tensor layer_layers_18_input_layernorm_weight_to_fp16 = const()[name = string("layer_layers_18_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(546478528)))]; tensor out_219_cast_fp16 = mul(x = out_217_cast_fp16, y = layer_layers_18_input_layernorm_weight_to_fp16)[name = string("out_219_cast_fp16")]; tensor var_5215_split_sizes_0 = const()[name = string("op_5215_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_5215_axis_0 = const()[name = string("op_5215_axis_0"), val = int32(1)]; tensor var_5215_cast_fp16_0, tensor var_5215_cast_fp16_1 = split(axis = var_5215_axis_0, split_sizes = var_5215_split_sizes_0, x = out_219_cast_fp16)[name = string("op_5215_cast_fp16")]; string query_states_73_pad_type_0 = const()[name = string("query_states_73_pad_type_0"), val = string("valid")]; tensor query_states_73_strides_0 = const()[name = string("query_states_73_strides_0"), val = tensor([1, 1])]; tensor query_states_73_pad_0 = const()[name = string("query_states_73_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_states_73_dilations_0 = const()[name = string("query_states_73_dilations_0"), val = tensor([1, 1])]; int32 query_states_73_groups_0 = const()[name = string("query_states_73_groups_0"), val = int32(1)]; tensor var_5237_to_fp16 = const()[name = string("op_5237_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(546482688)))]; tensor query_states_73_cast_fp16 = conv(dilations = query_states_73_dilations_0, groups = query_states_73_groups_0, pad = query_states_73_pad_0, pad_type = query_states_73_pad_type_0, strides = query_states_73_strides_0, weight = var_5237_to_fp16, x = var_5215_cast_fp16_0)[name = string("query_states_73_cast_fp16")]; string key_states_73_pad_type_0 = const()[name = string("key_states_73_pad_type_0"), val = string("valid")]; tensor key_states_73_strides_0 = const()[name = string("key_states_73_strides_0"), val = tensor([1, 1])]; tensor key_states_73_pad_0 = const()[name = string("key_states_73_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_states_73_dilations_0 = const()[name = string("key_states_73_dilations_0"), val = tensor([1, 1])]; int32 key_states_73_groups_0 = const()[name = string("key_states_73_groups_0"), val = int32(1)]; tensor var_5248_to_fp16 = const()[name = string("op_5248_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(548579904)))]; tensor key_states_73_cast_fp16 = conv(dilations = key_states_73_dilations_0, groups = key_states_73_groups_0, pad = key_states_73_pad_0, pad_type = key_states_73_pad_type_0, strides = key_states_73_strides_0, weight = var_5248_to_fp16, x = var_5215_cast_fp16_0)[name = string("key_states_73_cast_fp16")]; string value_states_73_pad_type_0 = const()[name = string("value_states_73_pad_type_0"), val = string("valid")]; tensor value_states_73_strides_0 = const()[name = string("value_states_73_strides_0"), val = tensor([1, 1])]; tensor value_states_73_pad_0 = const()[name = string("value_states_73_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_states_73_dilations_0 = const()[name = string("value_states_73_dilations_0"), val = tensor([1, 1])]; int32 value_states_73_groups_0 = const()[name = string("value_states_73_groups_0"), val = int32(1)]; tensor var_5259_to_fp16 = const()[name = string("op_5259_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(548842112)))]; tensor value_states_73_cast_fp16 = conv(dilations = value_states_73_dilations_0, groups = value_states_73_groups_0, pad = value_states_73_pad_0, pad_type = value_states_73_pad_type_0, strides = value_states_73_strides_0, weight = var_5259_to_fp16, x = var_5215_cast_fp16_0)[name = string("value_states_73_cast_fp16")]; tensor var_5267 = const()[name = string("op_5267"), val = tensor([1, 16, 64, 16])]; tensor embed_73_cast_fp16 = reshape(shape = var_5267, x = query_states_73_cast_fp16)[name = string("embed_73_cast_fp16")]; tensor var_5271 = const()[name = string("op_5271"), val = tensor([1, 2, 64, 16])]; tensor var_5272_cast_fp16 = reshape(shape = var_5271, x = key_states_73_cast_fp16)[name = string("op_5272_cast_fp16")]; tensor embed_75_perm_0 = const()[name = string("embed_75_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_5278 = const()[name = string("op_5278"), val = tensor([1, 2, 64, 16])]; tensor var_5279_cast_fp16 = reshape(shape = var_5278, x = value_states_73_cast_fp16)[name = string("op_5279_cast_fp16")]; tensor value_states_75_perm_0 = const()[name = string("value_states_75_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_5283_cast_fp16 = mul(x = embed_73_cast_fp16, y = cos_1_cast_fp16)[name = string("op_5283_cast_fp16")]; tensor var_5284_split_sizes_0 = const()[name = string("op_5284_split_sizes_0"), val = tensor([32, 32])]; int32 var_5284_axis_0 = const()[name = string("op_5284_axis_0"), val = int32(-2)]; tensor var_5284_cast_fp16_0, tensor var_5284_cast_fp16_1 = split(axis = var_5284_axis_0, split_sizes = var_5284_split_sizes_0, x = embed_73_cast_fp16)[name = string("op_5284_cast_fp16")]; fp16 const_187_promoted_to_fp16 = const()[name = string("const_187_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_5286_cast_fp16 = mul(x = var_5284_cast_fp16_1, y = const_187_promoted_to_fp16)[name = string("op_5286_cast_fp16")]; int32 var_5288 = const()[name = string("op_5288"), val = int32(-2)]; bool var_5289_interleave_0 = const()[name = string("op_5289_interleave_0"), val = bool(false)]; tensor var_5289_cast_fp16 = concat(axis = var_5288, interleave = var_5289_interleave_0, values = (var_5286_cast_fp16, var_5284_cast_fp16_0))[name = string("op_5289_cast_fp16")]; tensor var_5290_cast_fp16 = mul(x = var_5289_cast_fp16, y = sin_1_cast_fp16)[name = string("op_5290_cast_fp16")]; tensor query_states_75_cast_fp16 = add(x = var_5283_cast_fp16, y = var_5290_cast_fp16)[name = string("query_states_75_cast_fp16")]; tensor embed_75_cast_fp16 = transpose(perm = embed_75_perm_0, x = var_5272_cast_fp16)[name = string("transpose_17")]; tensor var_5293_cast_fp16 = mul(x = embed_75_cast_fp16, y = cos_cast_fp16)[name = string("op_5293_cast_fp16")]; tensor var_5294_split_sizes_0 = const()[name = string("op_5294_split_sizes_0"), val = tensor([32, 32])]; int32 var_5294_axis_0 = const()[name = string("op_5294_axis_0"), val = int32(-1)]; tensor var_5294_cast_fp16_0, tensor var_5294_cast_fp16_1 = split(axis = var_5294_axis_0, split_sizes = var_5294_split_sizes_0, x = embed_75_cast_fp16)[name = string("op_5294_cast_fp16")]; fp16 const_188_promoted_to_fp16 = const()[name = string("const_188_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_5296_cast_fp16 = mul(x = var_5294_cast_fp16_1, y = const_188_promoted_to_fp16)[name = string("op_5296_cast_fp16")]; int32 var_5298 = const()[name = string("op_5298"), val = int32(-1)]; bool var_5299_interleave_0 = const()[name = string("op_5299_interleave_0"), val = bool(false)]; tensor var_5299_cast_fp16 = concat(axis = var_5298, interleave = var_5299_interleave_0, values = (var_5296_cast_fp16, var_5294_cast_fp16_0))[name = string("op_5299_cast_fp16")]; tensor var_5300_cast_fp16 = mul(x = var_5299_cast_fp16, y = sin_cast_fp16)[name = string("op_5300_cast_fp16")]; tensor key_states_75_cast_fp16 = add(x = var_5293_cast_fp16, y = var_5300_cast_fp16)[name = string("key_states_75_cast_fp16")]; tensor expand_dims_181 = const()[name = string("expand_dims_181"), val = tensor([18])]; tensor expand_dims_182 = const()[name = string("expand_dims_182"), val = tensor([0])]; tensor expand_dims_184 = const()[name = string("expand_dims_184"), val = tensor([19])]; tensor concat_147_values3_0 = const()[name = string("concat_147_values3_0"), val = tensor([0])]; int32 concat_147_axis_0 = const()[name = string("concat_147_axis_0"), val = int32(0)]; bool concat_147_interleave_0 = const()[name = string("concat_147_interleave_0"), val = bool(false)]; tensor concat_147 = concat(axis = concat_147_axis_0, interleave = concat_147_interleave_0, values = (expand_dims_181, expand_dims_182, position_id, concat_147_values3_0))[name = string("concat_147")]; tensor concat_148_values1_0 = const()[name = string("concat_148_values1_0"), val = tensor([0])]; tensor concat_148_values3_0 = const()[name = string("concat_148_values3_0"), val = tensor([0])]; int32 concat_148_axis_0 = const()[name = string("concat_148_axis_0"), val = int32(0)]; bool concat_148_interleave_0 = const()[name = string("concat_148_interleave_0"), val = bool(false)]; tensor concat_148 = concat(axis = concat_148_axis_0, interleave = concat_148_interleave_0, values = (expand_dims_184, concat_148_values1_0, var_426, concat_148_values3_0))[name = string("concat_148")]; tensor key_cache_internal_tensor_assign_19_stride_0 = const()[name = string("key_cache_internal_tensor_assign_19_stride_0"), val = tensor([1, 1, 1, 1])]; tensor key_cache_internal_tensor_assign_19_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_19_begin_mask_0"), val = tensor([false, false, false, true])]; tensor key_cache_internal_tensor_assign_19_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_19_end_mask_0"), val = tensor([false, true, false, true])]; tensor key_cache_internal_tensor_assign_19_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_19_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor key_cache_internal_tensor_assign_19_cast_fp16 = slice_update(begin = concat_147, begin_mask = key_cache_internal_tensor_assign_19_begin_mask_0, end = concat_148, end_mask = key_cache_internal_tensor_assign_19_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_19_squeeze_mask_0, stride = key_cache_internal_tensor_assign_19_stride_0, update = key_states_75_cast_fp16, x = coreml_update_state_82)[name = string("key_cache_internal_tensor_assign_19_cast_fp16")]; write_state(data = key_cache_internal_tensor_assign_19_cast_fp16, input = key_cache)[name = string("coreml_update_state_298_write_state")]; tensor coreml_update_state_84 = read_state(input = key_cache)[name = string("coreml_update_state_298")]; tensor value_cache_internal_tensor_assign_19_stride_0 = const()[name = string("value_cache_internal_tensor_assign_19_stride_0"), val = tensor([1, 1, 1, 1])]; tensor value_cache_internal_tensor_assign_19_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_19_begin_mask_0"), val = tensor([false, false, false, true])]; tensor value_cache_internal_tensor_assign_19_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_19_end_mask_0"), val = tensor([false, true, false, true])]; tensor value_cache_internal_tensor_assign_19_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_19_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor value_states_75_cast_fp16 = transpose(perm = value_states_75_perm_0, x = var_5279_cast_fp16)[name = string("transpose_16")]; tensor value_cache_internal_tensor_assign_19_cast_fp16 = slice_update(begin = concat_147, begin_mask = value_cache_internal_tensor_assign_19_begin_mask_0, end = concat_148, end_mask = value_cache_internal_tensor_assign_19_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_19_squeeze_mask_0, stride = value_cache_internal_tensor_assign_19_stride_0, update = value_states_75_cast_fp16, x = coreml_update_state_83)[name = string("value_cache_internal_tensor_assign_19_cast_fp16")]; write_state(data = value_cache_internal_tensor_assign_19_cast_fp16, input = value_cache)[name = string("coreml_update_state_299_write_state")]; tensor coreml_update_state_85 = read_state(input = value_cache)[name = string("coreml_update_state_299")]; tensor var_5343_begin_0 = const()[name = string("op_5343_begin_0"), val = tensor([18, 0, 0, 0])]; tensor var_5343_end_0 = const()[name = string("op_5343_end_0"), val = tensor([19, 2, 2048, 64])]; tensor var_5343_end_mask_0 = const()[name = string("op_5343_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_5343_cast_fp16 = slice_by_index(begin = var_5343_begin_0, end = var_5343_end_0, end_mask = var_5343_end_mask_0, x = coreml_update_state_84)[name = string("op_5343_cast_fp16")]; tensor tile_36 = const()[name = string("tile_36"), val = tensor([1, 1])]; int32 var_5346_axis_0 = const()[name = string("op_5346_axis_0"), val = int32(1)]; tensor var_5346_cast_fp16_0, tensor var_5346_cast_fp16_1 = split(axis = var_5346_axis_0, split_sizes = tile_36, x = var_5343_cast_fp16)[name = string("op_5346_cast_fp16")]; tensor var_5353_begin_0 = const()[name = string("op_5353_begin_0"), val = tensor([18, 0, 0, 0])]; tensor var_5353_end_0 = const()[name = string("op_5353_end_0"), val = tensor([19, 2, 2048, 64])]; tensor var_5353_end_mask_0 = const()[name = string("op_5353_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_5353_cast_fp16 = slice_by_index(begin = var_5353_begin_0, end = var_5353_end_0, end_mask = var_5353_end_mask_0, x = coreml_update_state_85)[name = string("op_5353_cast_fp16")]; tensor tile_37 = const()[name = string("tile_37"), val = tensor([1, 1])]; int32 var_5356_axis_0 = const()[name = string("op_5356_axis_0"), val = int32(1)]; tensor var_5356_cast_fp16_0, tensor var_5356_cast_fp16_1 = split(axis = var_5356_axis_0, split_sizes = tile_37, x = var_5353_cast_fp16)[name = string("op_5356_cast_fp16")]; tensor var_5359_split_sizes_0 = const()[name = string("op_5359_split_sizes_0"), val = tensor([8, 8])]; int32 var_5359_axis_0 = const()[name = string("op_5359_axis_0"), val = int32(1)]; tensor var_5359_cast_fp16_0, tensor var_5359_cast_fp16_1 = split(axis = var_5359_axis_0, split_sizes = var_5359_split_sizes_0, x = query_states_75_cast_fp16)[name = string("op_5359_cast_fp16")]; bool attn_weights_289_transpose_x_0 = const()[name = string("attn_weights_289_transpose_x_0"), val = bool(false)]; bool attn_weights_289_transpose_y_0 = const()[name = string("attn_weights_289_transpose_y_0"), val = bool(false)]; tensor attn_weights_289_cast_fp16 = matmul(transpose_x = attn_weights_289_transpose_x_0, transpose_y = attn_weights_289_transpose_y_0, x = var_5346_cast_fp16_0, y = var_5359_cast_fp16_0)[name = string("attn_weights_289_cast_fp16")]; fp16 _inversed_attn_weights_291_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_291_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_291_cast_fp16 = mul(x = attn_weights_289_cast_fp16, y = _inversed_attn_weights_291_y_0_to_fp16)[name = string("_inversed_attn_weights_291_cast_fp16")]; tensor attn_weights_293_cast_fp16 = add(x = _inversed_attn_weights_291_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_293_cast_fp16")]; int32 var_5366 = const()[name = string("op_5366"), val = int32(2)]; tensor attn_weights_295_cast_fp16 = softmax(axis = var_5366, x = attn_weights_293_cast_fp16)[name = string("attn_weights_295_cast_fp16")]; bool var_5372_transpose_x_1 = const()[name = string("op_5372_transpose_x_1"), val = bool(true)]; bool var_5372_transpose_y_1 = const()[name = string("op_5372_transpose_y_1"), val = bool(false)]; tensor var_5372_cast_fp16 = matmul(transpose_x = var_5372_transpose_x_1, transpose_y = var_5372_transpose_y_1, x = attn_weights_295_cast_fp16, y = var_5356_cast_fp16_0)[name = string("op_5372_cast_fp16")]; bool attn_weights_297_transpose_x_0 = const()[name = string("attn_weights_297_transpose_x_0"), val = bool(false)]; bool attn_weights_297_transpose_y_0 = const()[name = string("attn_weights_297_transpose_y_0"), val = bool(false)]; tensor attn_weights_297_cast_fp16 = matmul(transpose_x = attn_weights_297_transpose_x_0, transpose_y = attn_weights_297_transpose_y_0, x = var_5346_cast_fp16_1, y = var_5359_cast_fp16_1)[name = string("attn_weights_297_cast_fp16")]; fp16 _inversed_attn_weights_299_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_299_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_299_cast_fp16 = mul(x = attn_weights_297_cast_fp16, y = _inversed_attn_weights_299_y_0_to_fp16)[name = string("_inversed_attn_weights_299_cast_fp16")]; tensor attn_weights_301_cast_fp16 = add(x = _inversed_attn_weights_299_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_301_cast_fp16")]; int32 var_5378 = const()[name = string("op_5378"), val = int32(2)]; tensor attn_weights_303_cast_fp16 = softmax(axis = var_5378, x = attn_weights_301_cast_fp16)[name = string("attn_weights_303_cast_fp16")]; bool attn_output_109_transpose_x_1 = const()[name = string("attn_output_109_transpose_x_1"), val = bool(true)]; bool attn_output_109_transpose_y_1 = const()[name = string("attn_output_109_transpose_y_1"), val = bool(false)]; tensor attn_output_109_cast_fp16 = matmul(transpose_x = attn_output_109_transpose_x_1, transpose_y = attn_output_109_transpose_y_1, x = attn_weights_303_cast_fp16, y = var_5356_cast_fp16_1)[name = string("attn_output_109_cast_fp16")]; int32 var_5386 = const()[name = string("op_5386"), val = int32(1)]; bool attn_output_111_interleave_0 = const()[name = string("attn_output_111_interleave_0"), val = bool(false)]; tensor attn_output_111_cast_fp16 = concat(axis = var_5386, interleave = attn_output_111_interleave_0, values = (var_5372_cast_fp16, attn_output_109_cast_fp16))[name = string("attn_output_111_cast_fp16")]; tensor var_5390_perm_0 = const()[name = string("op_5390_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_5395 = const()[name = string("op_5395"), val = tensor([1, 1024, 1, 16])]; tensor var_5390_cast_fp16 = transpose(perm = var_5390_perm_0, x = attn_output_111_cast_fp16)[name = string("transpose_15")]; tensor x_329_cast_fp16 = reshape(shape = var_5395, x = var_5390_cast_fp16)[name = string("x_329_cast_fp16")]; string hidden_states_111_pad_type_0 = const()[name = string("hidden_states_111_pad_type_0"), val = string("valid")]; tensor hidden_states_111_strides_0 = const()[name = string("hidden_states_111_strides_0"), val = tensor([1, 1])]; tensor hidden_states_111_pad_0 = const()[name = string("hidden_states_111_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_111_dilations_0 = const()[name = string("hidden_states_111_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_111_groups_0 = const()[name = string("hidden_states_111_groups_0"), val = int32(1)]; tensor var_5402_to_fp16 = const()[name = string("op_5402_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(549104320)))]; tensor hidden_states_111_cast_fp16 = conv(dilations = hidden_states_111_dilations_0, groups = hidden_states_111_groups_0, pad = hidden_states_111_pad_0, pad_type = hidden_states_111_pad_type_0, strides = hidden_states_111_strides_0, weight = var_5402_to_fp16, x = x_329_cast_fp16)[name = string("hidden_states_111_cast_fp16")]; tensor x_331_cast_fp16 = add(x = x_323_cast_fp16, y = hidden_states_111_cast_fp16)[name = string("x_331_cast_fp16")]; int32 var_5414 = const()[name = string("op_5414"), val = int32(1)]; fp16 const_193_promoted_to_fp16 = const()[name = string("const_193_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_5417_cast_fp16 = mul(x = x_331_cast_fp16, y = const_193_promoted_to_fp16)[name = string("op_5417_cast_fp16")]; bool x_333_interleave_0 = const()[name = string("x_333_interleave_0"), val = bool(false)]; tensor x_333_cast_fp16 = concat(axis = var_5414, interleave = x_333_interleave_0, values = (x_331_cast_fp16, var_5417_cast_fp16))[name = string("x_333_cast_fp16")]; tensor out_223_axes_0 = const()[name = string("out_223_axes_0"), val = tensor([1])]; fp16 var_5427_to_fp16 = const()[name = string("op_5427_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_223_cast_fp16 = layer_norm(axes = out_223_axes_0, epsilon = var_5427_to_fp16, x = x_333_cast_fp16)[name = string("out_223_cast_fp16")]; tensor layer_layers_18_post_attention_layernorm_weight_to_fp16 = const()[name = string("layer_layers_18_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(551201536)))]; tensor out_225_cast_fp16 = mul(x = out_223_cast_fp16, y = layer_layers_18_post_attention_layernorm_weight_to_fp16)[name = string("out_225_cast_fp16")]; tensor var_5433_split_sizes_0 = const()[name = string("op_5433_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_5433_axis_0 = const()[name = string("op_5433_axis_0"), val = int32(1)]; tensor var_5433_cast_fp16_0, tensor var_5433_cast_fp16_1 = split(axis = var_5433_axis_0, split_sizes = var_5433_split_sizes_0, x = out_225_cast_fp16)[name = string("op_5433_cast_fp16")]; string input_37_pad_type_0 = const()[name = string("input_37_pad_type_0"), val = string("valid")]; tensor input_37_strides_0 = const()[name = string("input_37_strides_0"), val = tensor([1, 1])]; tensor input_37_pad_0 = const()[name = string("input_37_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_37_dilations_0 = const()[name = string("input_37_dilations_0"), val = tensor([1, 1])]; int32 input_37_groups_0 = const()[name = string("input_37_groups_0"), val = int32(1)]; tensor var_5438_to_fp16 = const()[name = string("op_5438_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(551205696)))]; tensor input_37_cast_fp16 = conv(dilations = input_37_dilations_0, groups = input_37_groups_0, pad = input_37_pad_0, pad_type = input_37_pad_type_0, strides = input_37_strides_0, weight = var_5438_to_fp16, x = var_5433_cast_fp16_0)[name = string("input_37_cast_fp16")]; tensor var_5449_cast_fp16 = silu(x = input_37_cast_fp16)[name = string("op_5449_cast_fp16")]; string var_5454_pad_type_0 = const()[name = string("op_5454_pad_type_0"), val = string("valid")]; tensor var_5454_strides_0 = const()[name = string("op_5454_strides_0"), val = tensor([1, 1])]; tensor var_5454_pad_0 = const()[name = string("op_5454_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_5454_dilations_0 = const()[name = string("op_5454_dilations_0"), val = tensor([1, 1])]; int32 var_5454_groups_0 = const()[name = string("op_5454_groups_0"), val = int32(1)]; tensor var_5437_to_fp16 = const()[name = string("op_5437_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(559594368)))]; tensor var_5454_cast_fp16 = conv(dilations = var_5454_dilations_0, groups = var_5454_groups_0, pad = var_5454_pad_0, pad_type = var_5454_pad_type_0, strides = var_5454_strides_0, weight = var_5437_to_fp16, x = var_5433_cast_fp16_0)[name = string("op_5454_cast_fp16")]; tensor x_339_cast_fp16 = mul(x = var_5449_cast_fp16, y = var_5454_cast_fp16)[name = string("x_339_cast_fp16")]; string hidden_states_113_pad_type_0 = const()[name = string("hidden_states_113_pad_type_0"), val = string("valid")]; tensor hidden_states_113_strides_0 = const()[name = string("hidden_states_113_strides_0"), val = tensor([1, 1])]; tensor hidden_states_113_pad_0 = const()[name = string("hidden_states_113_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_113_dilations_0 = const()[name = string("hidden_states_113_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_113_groups_0 = const()[name = string("hidden_states_113_groups_0"), val = int32(1)]; tensor var_5436_to_fp16 = const()[name = string("op_5436_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(567983040)))]; tensor hidden_states_113_cast_fp16 = conv(dilations = hidden_states_113_dilations_0, groups = hidden_states_113_groups_0, pad = hidden_states_113_pad_0, pad_type = hidden_states_113_pad_type_0, strides = hidden_states_113_strides_0, weight = var_5436_to_fp16, x = x_339_cast_fp16)[name = string("hidden_states_113_cast_fp16")]; tensor x_341_cast_fp16 = add(x = x_331_cast_fp16, y = hidden_states_113_cast_fp16)[name = string("x_341_cast_fp16")]; int32 var_5467 = const()[name = string("op_5467"), val = int32(1)]; fp16 const_194_promoted_to_fp16 = const()[name = string("const_194_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_5470_cast_fp16 = mul(x = x_341_cast_fp16, y = const_194_promoted_to_fp16)[name = string("op_5470_cast_fp16")]; bool x_343_interleave_0 = const()[name = string("x_343_interleave_0"), val = bool(false)]; tensor x_343_cast_fp16 = concat(axis = var_5467, interleave = x_343_interleave_0, values = (x_341_cast_fp16, var_5470_cast_fp16))[name = string("x_343_cast_fp16")]; tensor out_229_axes_0 = const()[name = string("out_229_axes_0"), val = tensor([1])]; fp16 var_5480_to_fp16 = const()[name = string("op_5480_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_229_cast_fp16 = layer_norm(axes = out_229_axes_0, epsilon = var_5480_to_fp16, x = x_343_cast_fp16)[name = string("out_229_cast_fp16")]; tensor layer_layers_19_input_layernorm_weight_to_fp16 = const()[name = string("layer_layers_19_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(576371712)))]; tensor out_231_cast_fp16 = mul(x = out_229_cast_fp16, y = layer_layers_19_input_layernorm_weight_to_fp16)[name = string("out_231_cast_fp16")]; tensor var_5486_split_sizes_0 = const()[name = string("op_5486_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_5486_axis_0 = const()[name = string("op_5486_axis_0"), val = int32(1)]; tensor var_5486_cast_fp16_0, tensor var_5486_cast_fp16_1 = split(axis = var_5486_axis_0, split_sizes = var_5486_split_sizes_0, x = out_231_cast_fp16)[name = string("op_5486_cast_fp16")]; string query_states_77_pad_type_0 = const()[name = string("query_states_77_pad_type_0"), val = string("valid")]; tensor query_states_77_strides_0 = const()[name = string("query_states_77_strides_0"), val = tensor([1, 1])]; tensor query_states_77_pad_0 = const()[name = string("query_states_77_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_states_77_dilations_0 = const()[name = string("query_states_77_dilations_0"), val = tensor([1, 1])]; int32 query_states_77_groups_0 = const()[name = string("query_states_77_groups_0"), val = int32(1)]; tensor var_5508_to_fp16 = const()[name = string("op_5508_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(576375872)))]; tensor query_states_77_cast_fp16 = conv(dilations = query_states_77_dilations_0, groups = query_states_77_groups_0, pad = query_states_77_pad_0, pad_type = query_states_77_pad_type_0, strides = query_states_77_strides_0, weight = var_5508_to_fp16, x = var_5486_cast_fp16_0)[name = string("query_states_77_cast_fp16")]; string key_states_77_pad_type_0 = const()[name = string("key_states_77_pad_type_0"), val = string("valid")]; tensor key_states_77_strides_0 = const()[name = string("key_states_77_strides_0"), val = tensor([1, 1])]; tensor key_states_77_pad_0 = const()[name = string("key_states_77_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_states_77_dilations_0 = const()[name = string("key_states_77_dilations_0"), val = tensor([1, 1])]; int32 key_states_77_groups_0 = const()[name = string("key_states_77_groups_0"), val = int32(1)]; tensor var_5519_to_fp16 = const()[name = string("op_5519_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(578473088)))]; tensor key_states_77_cast_fp16 = conv(dilations = key_states_77_dilations_0, groups = key_states_77_groups_0, pad = key_states_77_pad_0, pad_type = key_states_77_pad_type_0, strides = key_states_77_strides_0, weight = var_5519_to_fp16, x = var_5486_cast_fp16_0)[name = string("key_states_77_cast_fp16")]; string value_states_77_pad_type_0 = const()[name = string("value_states_77_pad_type_0"), val = string("valid")]; tensor value_states_77_strides_0 = const()[name = string("value_states_77_strides_0"), val = tensor([1, 1])]; tensor value_states_77_pad_0 = const()[name = string("value_states_77_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_states_77_dilations_0 = const()[name = string("value_states_77_dilations_0"), val = tensor([1, 1])]; int32 value_states_77_groups_0 = const()[name = string("value_states_77_groups_0"), val = int32(1)]; tensor var_5530_to_fp16 = const()[name = string("op_5530_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(578735296)))]; tensor value_states_77_cast_fp16 = conv(dilations = value_states_77_dilations_0, groups = value_states_77_groups_0, pad = value_states_77_pad_0, pad_type = value_states_77_pad_type_0, strides = value_states_77_strides_0, weight = var_5530_to_fp16, x = var_5486_cast_fp16_0)[name = string("value_states_77_cast_fp16")]; tensor var_5538 = const()[name = string("op_5538"), val = tensor([1, 16, 64, 16])]; tensor embed_77_cast_fp16 = reshape(shape = var_5538, x = query_states_77_cast_fp16)[name = string("embed_77_cast_fp16")]; tensor var_5542 = const()[name = string("op_5542"), val = tensor([1, 2, 64, 16])]; tensor var_5543_cast_fp16 = reshape(shape = var_5542, x = key_states_77_cast_fp16)[name = string("op_5543_cast_fp16")]; tensor embed_79_perm_0 = const()[name = string("embed_79_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_5549 = const()[name = string("op_5549"), val = tensor([1, 2, 64, 16])]; tensor var_5550_cast_fp16 = reshape(shape = var_5549, x = value_states_77_cast_fp16)[name = string("op_5550_cast_fp16")]; tensor value_states_79_perm_0 = const()[name = string("value_states_79_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_5554_cast_fp16 = mul(x = embed_77_cast_fp16, y = cos_1_cast_fp16)[name = string("op_5554_cast_fp16")]; tensor var_5555_split_sizes_0 = const()[name = string("op_5555_split_sizes_0"), val = tensor([32, 32])]; int32 var_5555_axis_0 = const()[name = string("op_5555_axis_0"), val = int32(-2)]; tensor var_5555_cast_fp16_0, tensor var_5555_cast_fp16_1 = split(axis = var_5555_axis_0, split_sizes = var_5555_split_sizes_0, x = embed_77_cast_fp16)[name = string("op_5555_cast_fp16")]; fp16 const_197_promoted_to_fp16 = const()[name = string("const_197_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_5557_cast_fp16 = mul(x = var_5555_cast_fp16_1, y = const_197_promoted_to_fp16)[name = string("op_5557_cast_fp16")]; int32 var_5559 = const()[name = string("op_5559"), val = int32(-2)]; bool var_5560_interleave_0 = const()[name = string("op_5560_interleave_0"), val = bool(false)]; tensor var_5560_cast_fp16 = concat(axis = var_5559, interleave = var_5560_interleave_0, values = (var_5557_cast_fp16, var_5555_cast_fp16_0))[name = string("op_5560_cast_fp16")]; tensor var_5561_cast_fp16 = mul(x = var_5560_cast_fp16, y = sin_1_cast_fp16)[name = string("op_5561_cast_fp16")]; tensor query_states_79_cast_fp16 = add(x = var_5554_cast_fp16, y = var_5561_cast_fp16)[name = string("query_states_79_cast_fp16")]; tensor embed_79_cast_fp16 = transpose(perm = embed_79_perm_0, x = var_5543_cast_fp16)[name = string("transpose_14")]; tensor var_5564_cast_fp16 = mul(x = embed_79_cast_fp16, y = cos_cast_fp16)[name = string("op_5564_cast_fp16")]; tensor var_5565_split_sizes_0 = const()[name = string("op_5565_split_sizes_0"), val = tensor([32, 32])]; int32 var_5565_axis_0 = const()[name = string("op_5565_axis_0"), val = int32(-1)]; tensor var_5565_cast_fp16_0, tensor var_5565_cast_fp16_1 = split(axis = var_5565_axis_0, split_sizes = var_5565_split_sizes_0, x = embed_79_cast_fp16)[name = string("op_5565_cast_fp16")]; fp16 const_198_promoted_to_fp16 = const()[name = string("const_198_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_5567_cast_fp16 = mul(x = var_5565_cast_fp16_1, y = const_198_promoted_to_fp16)[name = string("op_5567_cast_fp16")]; int32 var_5569 = const()[name = string("op_5569"), val = int32(-1)]; bool var_5570_interleave_0 = const()[name = string("op_5570_interleave_0"), val = bool(false)]; tensor var_5570_cast_fp16 = concat(axis = var_5569, interleave = var_5570_interleave_0, values = (var_5567_cast_fp16, var_5565_cast_fp16_0))[name = string("op_5570_cast_fp16")]; tensor var_5571_cast_fp16 = mul(x = var_5570_cast_fp16, y = sin_cast_fp16)[name = string("op_5571_cast_fp16")]; tensor key_states_79_cast_fp16 = add(x = var_5564_cast_fp16, y = var_5571_cast_fp16)[name = string("key_states_79_cast_fp16")]; tensor expand_dims_191 = const()[name = string("expand_dims_191"), val = tensor([19])]; tensor expand_dims_192 = const()[name = string("expand_dims_192"), val = tensor([0])]; tensor expand_dims_194 = const()[name = string("expand_dims_194"), val = tensor([20])]; tensor concat_155_values3_0 = const()[name = string("concat_155_values3_0"), val = tensor([0])]; int32 concat_155_axis_0 = const()[name = string("concat_155_axis_0"), val = int32(0)]; bool concat_155_interleave_0 = const()[name = string("concat_155_interleave_0"), val = bool(false)]; tensor concat_155 = concat(axis = concat_155_axis_0, interleave = concat_155_interleave_0, values = (expand_dims_191, expand_dims_192, position_id, concat_155_values3_0))[name = string("concat_155")]; tensor concat_156_values1_0 = const()[name = string("concat_156_values1_0"), val = tensor([0])]; tensor concat_156_values3_0 = const()[name = string("concat_156_values3_0"), val = tensor([0])]; int32 concat_156_axis_0 = const()[name = string("concat_156_axis_0"), val = int32(0)]; bool concat_156_interleave_0 = const()[name = string("concat_156_interleave_0"), val = bool(false)]; tensor concat_156 = concat(axis = concat_156_axis_0, interleave = concat_156_interleave_0, values = (expand_dims_194, concat_156_values1_0, var_426, concat_156_values3_0))[name = string("concat_156")]; tensor key_cache_internal_tensor_assign_20_stride_0 = const()[name = string("key_cache_internal_tensor_assign_20_stride_0"), val = tensor([1, 1, 1, 1])]; tensor key_cache_internal_tensor_assign_20_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_20_begin_mask_0"), val = tensor([false, false, false, true])]; tensor key_cache_internal_tensor_assign_20_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_20_end_mask_0"), val = tensor([false, true, false, true])]; tensor key_cache_internal_tensor_assign_20_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_20_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor key_cache_internal_tensor_assign_20_cast_fp16 = slice_update(begin = concat_155, begin_mask = key_cache_internal_tensor_assign_20_begin_mask_0, end = concat_156, end_mask = key_cache_internal_tensor_assign_20_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_20_squeeze_mask_0, stride = key_cache_internal_tensor_assign_20_stride_0, update = key_states_79_cast_fp16, x = coreml_update_state_84)[name = string("key_cache_internal_tensor_assign_20_cast_fp16")]; write_state(data = key_cache_internal_tensor_assign_20_cast_fp16, input = key_cache)[name = string("coreml_update_state_300_write_state")]; tensor coreml_update_state_86 = read_state(input = key_cache)[name = string("coreml_update_state_300")]; tensor value_cache_internal_tensor_assign_20_stride_0 = const()[name = string("value_cache_internal_tensor_assign_20_stride_0"), val = tensor([1, 1, 1, 1])]; tensor value_cache_internal_tensor_assign_20_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_20_begin_mask_0"), val = tensor([false, false, false, true])]; tensor value_cache_internal_tensor_assign_20_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_20_end_mask_0"), val = tensor([false, true, false, true])]; tensor value_cache_internal_tensor_assign_20_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_20_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor value_states_79_cast_fp16 = transpose(perm = value_states_79_perm_0, x = var_5550_cast_fp16)[name = string("transpose_13")]; tensor value_cache_internal_tensor_assign_20_cast_fp16 = slice_update(begin = concat_155, begin_mask = value_cache_internal_tensor_assign_20_begin_mask_0, end = concat_156, end_mask = value_cache_internal_tensor_assign_20_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_20_squeeze_mask_0, stride = value_cache_internal_tensor_assign_20_stride_0, update = value_states_79_cast_fp16, x = coreml_update_state_85)[name = string("value_cache_internal_tensor_assign_20_cast_fp16")]; write_state(data = value_cache_internal_tensor_assign_20_cast_fp16, input = value_cache)[name = string("coreml_update_state_301_write_state")]; tensor coreml_update_state_87 = read_state(input = value_cache)[name = string("coreml_update_state_301")]; tensor var_5614_begin_0 = const()[name = string("op_5614_begin_0"), val = tensor([19, 0, 0, 0])]; tensor var_5614_end_0 = const()[name = string("op_5614_end_0"), val = tensor([20, 2, 2048, 64])]; tensor var_5614_end_mask_0 = const()[name = string("op_5614_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_5614_cast_fp16 = slice_by_index(begin = var_5614_begin_0, end = var_5614_end_0, end_mask = var_5614_end_mask_0, x = coreml_update_state_86)[name = string("op_5614_cast_fp16")]; tensor tile_38 = const()[name = string("tile_38"), val = tensor([1, 1])]; int32 var_5617_axis_0 = const()[name = string("op_5617_axis_0"), val = int32(1)]; tensor var_5617_cast_fp16_0, tensor var_5617_cast_fp16_1 = split(axis = var_5617_axis_0, split_sizes = tile_38, x = var_5614_cast_fp16)[name = string("op_5617_cast_fp16")]; tensor var_5624_begin_0 = const()[name = string("op_5624_begin_0"), val = tensor([19, 0, 0, 0])]; tensor var_5624_end_0 = const()[name = string("op_5624_end_0"), val = tensor([20, 2, 2048, 64])]; tensor var_5624_end_mask_0 = const()[name = string("op_5624_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_5624_cast_fp16 = slice_by_index(begin = var_5624_begin_0, end = var_5624_end_0, end_mask = var_5624_end_mask_0, x = coreml_update_state_87)[name = string("op_5624_cast_fp16")]; tensor tile_39 = const()[name = string("tile_39"), val = tensor([1, 1])]; int32 var_5627_axis_0 = const()[name = string("op_5627_axis_0"), val = int32(1)]; tensor var_5627_cast_fp16_0, tensor var_5627_cast_fp16_1 = split(axis = var_5627_axis_0, split_sizes = tile_39, x = var_5624_cast_fp16)[name = string("op_5627_cast_fp16")]; tensor var_5630_split_sizes_0 = const()[name = string("op_5630_split_sizes_0"), val = tensor([8, 8])]; int32 var_5630_axis_0 = const()[name = string("op_5630_axis_0"), val = int32(1)]; tensor var_5630_cast_fp16_0, tensor var_5630_cast_fp16_1 = split(axis = var_5630_axis_0, split_sizes = var_5630_split_sizes_0, x = query_states_79_cast_fp16)[name = string("op_5630_cast_fp16")]; bool attn_weights_305_transpose_x_0 = const()[name = string("attn_weights_305_transpose_x_0"), val = bool(false)]; bool attn_weights_305_transpose_y_0 = const()[name = string("attn_weights_305_transpose_y_0"), val = bool(false)]; tensor attn_weights_305_cast_fp16 = matmul(transpose_x = attn_weights_305_transpose_x_0, transpose_y = attn_weights_305_transpose_y_0, x = var_5617_cast_fp16_0, y = var_5630_cast_fp16_0)[name = string("attn_weights_305_cast_fp16")]; fp16 _inversed_attn_weights_307_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_307_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_307_cast_fp16 = mul(x = attn_weights_305_cast_fp16, y = _inversed_attn_weights_307_y_0_to_fp16)[name = string("_inversed_attn_weights_307_cast_fp16")]; tensor attn_weights_309_cast_fp16 = add(x = _inversed_attn_weights_307_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_309_cast_fp16")]; int32 var_5637 = const()[name = string("op_5637"), val = int32(2)]; tensor attn_weights_311_cast_fp16 = softmax(axis = var_5637, x = attn_weights_309_cast_fp16)[name = string("attn_weights_311_cast_fp16")]; bool var_5643_transpose_x_1 = const()[name = string("op_5643_transpose_x_1"), val = bool(true)]; bool var_5643_transpose_y_1 = const()[name = string("op_5643_transpose_y_1"), val = bool(false)]; tensor var_5643_cast_fp16 = matmul(transpose_x = var_5643_transpose_x_1, transpose_y = var_5643_transpose_y_1, x = attn_weights_311_cast_fp16, y = var_5627_cast_fp16_0)[name = string("op_5643_cast_fp16")]; bool attn_weights_313_transpose_x_0 = const()[name = string("attn_weights_313_transpose_x_0"), val = bool(false)]; bool attn_weights_313_transpose_y_0 = const()[name = string("attn_weights_313_transpose_y_0"), val = bool(false)]; tensor attn_weights_313_cast_fp16 = matmul(transpose_x = attn_weights_313_transpose_x_0, transpose_y = attn_weights_313_transpose_y_0, x = var_5617_cast_fp16_1, y = var_5630_cast_fp16_1)[name = string("attn_weights_313_cast_fp16")]; fp16 _inversed_attn_weights_315_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_315_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_315_cast_fp16 = mul(x = attn_weights_313_cast_fp16, y = _inversed_attn_weights_315_y_0_to_fp16)[name = string("_inversed_attn_weights_315_cast_fp16")]; tensor attn_weights_317_cast_fp16 = add(x = _inversed_attn_weights_315_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_317_cast_fp16")]; int32 var_5649 = const()[name = string("op_5649"), val = int32(2)]; tensor attn_weights_319_cast_fp16 = softmax(axis = var_5649, x = attn_weights_317_cast_fp16)[name = string("attn_weights_319_cast_fp16")]; bool attn_output_115_transpose_x_1 = const()[name = string("attn_output_115_transpose_x_1"), val = bool(true)]; bool attn_output_115_transpose_y_1 = const()[name = string("attn_output_115_transpose_y_1"), val = bool(false)]; tensor attn_output_115_cast_fp16 = matmul(transpose_x = attn_output_115_transpose_x_1, transpose_y = attn_output_115_transpose_y_1, x = attn_weights_319_cast_fp16, y = var_5627_cast_fp16_1)[name = string("attn_output_115_cast_fp16")]; int32 var_5657 = const()[name = string("op_5657"), val = int32(1)]; bool attn_output_117_interleave_0 = const()[name = string("attn_output_117_interleave_0"), val = bool(false)]; tensor attn_output_117_cast_fp16 = concat(axis = var_5657, interleave = attn_output_117_interleave_0, values = (var_5643_cast_fp16, attn_output_115_cast_fp16))[name = string("attn_output_117_cast_fp16")]; tensor var_5661_perm_0 = const()[name = string("op_5661_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_5666 = const()[name = string("op_5666"), val = tensor([1, 1024, 1, 16])]; tensor var_5661_cast_fp16 = transpose(perm = var_5661_perm_0, x = attn_output_117_cast_fp16)[name = string("transpose_12")]; tensor x_347_cast_fp16 = reshape(shape = var_5666, x = var_5661_cast_fp16)[name = string("x_347_cast_fp16")]; string hidden_states_117_pad_type_0 = const()[name = string("hidden_states_117_pad_type_0"), val = string("valid")]; tensor hidden_states_117_strides_0 = const()[name = string("hidden_states_117_strides_0"), val = tensor([1, 1])]; tensor hidden_states_117_pad_0 = const()[name = string("hidden_states_117_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_117_dilations_0 = const()[name = string("hidden_states_117_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_117_groups_0 = const()[name = string("hidden_states_117_groups_0"), val = int32(1)]; tensor var_5673_to_fp16 = const()[name = string("op_5673_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(578997504)))]; tensor hidden_states_117_cast_fp16 = conv(dilations = hidden_states_117_dilations_0, groups = hidden_states_117_groups_0, pad = hidden_states_117_pad_0, pad_type = hidden_states_117_pad_type_0, strides = hidden_states_117_strides_0, weight = var_5673_to_fp16, x = x_347_cast_fp16)[name = string("hidden_states_117_cast_fp16")]; tensor x_349_cast_fp16 = add(x = x_341_cast_fp16, y = hidden_states_117_cast_fp16)[name = string("x_349_cast_fp16")]; int32 var_5685 = const()[name = string("op_5685"), val = int32(1)]; fp16 const_203_promoted_to_fp16 = const()[name = string("const_203_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_5688_cast_fp16 = mul(x = x_349_cast_fp16, y = const_203_promoted_to_fp16)[name = string("op_5688_cast_fp16")]; bool x_351_interleave_0 = const()[name = string("x_351_interleave_0"), val = bool(false)]; tensor x_351_cast_fp16 = concat(axis = var_5685, interleave = x_351_interleave_0, values = (x_349_cast_fp16, var_5688_cast_fp16))[name = string("x_351_cast_fp16")]; tensor out_235_axes_0 = const()[name = string("out_235_axes_0"), val = tensor([1])]; fp16 var_5698_to_fp16 = const()[name = string("op_5698_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_235_cast_fp16 = layer_norm(axes = out_235_axes_0, epsilon = var_5698_to_fp16, x = x_351_cast_fp16)[name = string("out_235_cast_fp16")]; tensor layer_layers_19_post_attention_layernorm_weight_to_fp16 = const()[name = string("layer_layers_19_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(581094720)))]; tensor out_237_cast_fp16 = mul(x = out_235_cast_fp16, y = layer_layers_19_post_attention_layernorm_weight_to_fp16)[name = string("out_237_cast_fp16")]; tensor var_5704_split_sizes_0 = const()[name = string("op_5704_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_5704_axis_0 = const()[name = string("op_5704_axis_0"), val = int32(1)]; tensor var_5704_cast_fp16_0, tensor var_5704_cast_fp16_1 = split(axis = var_5704_axis_0, split_sizes = var_5704_split_sizes_0, x = out_237_cast_fp16)[name = string("op_5704_cast_fp16")]; string input_39_pad_type_0 = const()[name = string("input_39_pad_type_0"), val = string("valid")]; tensor input_39_strides_0 = const()[name = string("input_39_strides_0"), val = tensor([1, 1])]; tensor input_39_pad_0 = const()[name = string("input_39_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_39_dilations_0 = const()[name = string("input_39_dilations_0"), val = tensor([1, 1])]; int32 input_39_groups_0 = const()[name = string("input_39_groups_0"), val = int32(1)]; tensor var_5709_to_fp16 = const()[name = string("op_5709_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(581098880)))]; tensor input_39_cast_fp16 = conv(dilations = input_39_dilations_0, groups = input_39_groups_0, pad = input_39_pad_0, pad_type = input_39_pad_type_0, strides = input_39_strides_0, weight = var_5709_to_fp16, x = var_5704_cast_fp16_0)[name = string("input_39_cast_fp16")]; tensor var_5720_cast_fp16 = silu(x = input_39_cast_fp16)[name = string("op_5720_cast_fp16")]; string var_5725_pad_type_0 = const()[name = string("op_5725_pad_type_0"), val = string("valid")]; tensor var_5725_strides_0 = const()[name = string("op_5725_strides_0"), val = tensor([1, 1])]; tensor var_5725_pad_0 = const()[name = string("op_5725_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_5725_dilations_0 = const()[name = string("op_5725_dilations_0"), val = tensor([1, 1])]; int32 var_5725_groups_0 = const()[name = string("op_5725_groups_0"), val = int32(1)]; tensor var_5708_to_fp16 = const()[name = string("op_5708_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(589487552)))]; tensor var_5725_cast_fp16 = conv(dilations = var_5725_dilations_0, groups = var_5725_groups_0, pad = var_5725_pad_0, pad_type = var_5725_pad_type_0, strides = var_5725_strides_0, weight = var_5708_to_fp16, x = var_5704_cast_fp16_0)[name = string("op_5725_cast_fp16")]; tensor x_357_cast_fp16 = mul(x = var_5720_cast_fp16, y = var_5725_cast_fp16)[name = string("x_357_cast_fp16")]; string hidden_states_119_pad_type_0 = const()[name = string("hidden_states_119_pad_type_0"), val = string("valid")]; tensor hidden_states_119_strides_0 = const()[name = string("hidden_states_119_strides_0"), val = tensor([1, 1])]; tensor hidden_states_119_pad_0 = const()[name = string("hidden_states_119_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_119_dilations_0 = const()[name = string("hidden_states_119_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_119_groups_0 = const()[name = string("hidden_states_119_groups_0"), val = int32(1)]; tensor var_5707_to_fp16 = const()[name = string("op_5707_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(597876224)))]; tensor hidden_states_119_cast_fp16 = conv(dilations = hidden_states_119_dilations_0, groups = hidden_states_119_groups_0, pad = hidden_states_119_pad_0, pad_type = hidden_states_119_pad_type_0, strides = hidden_states_119_strides_0, weight = var_5707_to_fp16, x = x_357_cast_fp16)[name = string("hidden_states_119_cast_fp16")]; tensor x_359_cast_fp16 = add(x = x_349_cast_fp16, y = hidden_states_119_cast_fp16)[name = string("x_359_cast_fp16")]; int32 var_5738 = const()[name = string("op_5738"), val = int32(1)]; fp16 const_204_promoted_to_fp16 = const()[name = string("const_204_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_5741_cast_fp16 = mul(x = x_359_cast_fp16, y = const_204_promoted_to_fp16)[name = string("op_5741_cast_fp16")]; bool x_361_interleave_0 = const()[name = string("x_361_interleave_0"), val = bool(false)]; tensor x_361_cast_fp16 = concat(axis = var_5738, interleave = x_361_interleave_0, values = (x_359_cast_fp16, var_5741_cast_fp16))[name = string("x_361_cast_fp16")]; tensor out_241_axes_0 = const()[name = string("out_241_axes_0"), val = tensor([1])]; fp16 var_5751_to_fp16 = const()[name = string("op_5751_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_241_cast_fp16 = layer_norm(axes = out_241_axes_0, epsilon = var_5751_to_fp16, x = x_361_cast_fp16)[name = string("out_241_cast_fp16")]; tensor layer_layers_20_input_layernorm_weight_to_fp16 = const()[name = string("layer_layers_20_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(606264896)))]; tensor out_243_cast_fp16 = mul(x = out_241_cast_fp16, y = layer_layers_20_input_layernorm_weight_to_fp16)[name = string("out_243_cast_fp16")]; tensor var_5757_split_sizes_0 = const()[name = string("op_5757_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_5757_axis_0 = const()[name = string("op_5757_axis_0"), val = int32(1)]; tensor var_5757_cast_fp16_0, tensor var_5757_cast_fp16_1 = split(axis = var_5757_axis_0, split_sizes = var_5757_split_sizes_0, x = out_243_cast_fp16)[name = string("op_5757_cast_fp16")]; string query_states_81_pad_type_0 = const()[name = string("query_states_81_pad_type_0"), val = string("valid")]; tensor query_states_81_strides_0 = const()[name = string("query_states_81_strides_0"), val = tensor([1, 1])]; tensor query_states_81_pad_0 = const()[name = string("query_states_81_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_states_81_dilations_0 = const()[name = string("query_states_81_dilations_0"), val = tensor([1, 1])]; int32 query_states_81_groups_0 = const()[name = string("query_states_81_groups_0"), val = int32(1)]; tensor var_5779_to_fp16 = const()[name = string("op_5779_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(606269056)))]; tensor query_states_81_cast_fp16 = conv(dilations = query_states_81_dilations_0, groups = query_states_81_groups_0, pad = query_states_81_pad_0, pad_type = query_states_81_pad_type_0, strides = query_states_81_strides_0, weight = var_5779_to_fp16, x = var_5757_cast_fp16_0)[name = string("query_states_81_cast_fp16")]; string key_states_81_pad_type_0 = const()[name = string("key_states_81_pad_type_0"), val = string("valid")]; tensor key_states_81_strides_0 = const()[name = string("key_states_81_strides_0"), val = tensor([1, 1])]; tensor key_states_81_pad_0 = const()[name = string("key_states_81_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_states_81_dilations_0 = const()[name = string("key_states_81_dilations_0"), val = tensor([1, 1])]; int32 key_states_81_groups_0 = const()[name = string("key_states_81_groups_0"), val = int32(1)]; tensor var_5790_to_fp16 = const()[name = string("op_5790_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(608366272)))]; tensor key_states_81_cast_fp16 = conv(dilations = key_states_81_dilations_0, groups = key_states_81_groups_0, pad = key_states_81_pad_0, pad_type = key_states_81_pad_type_0, strides = key_states_81_strides_0, weight = var_5790_to_fp16, x = var_5757_cast_fp16_0)[name = string("key_states_81_cast_fp16")]; string value_states_81_pad_type_0 = const()[name = string("value_states_81_pad_type_0"), val = string("valid")]; tensor value_states_81_strides_0 = const()[name = string("value_states_81_strides_0"), val = tensor([1, 1])]; tensor value_states_81_pad_0 = const()[name = string("value_states_81_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_states_81_dilations_0 = const()[name = string("value_states_81_dilations_0"), val = tensor([1, 1])]; int32 value_states_81_groups_0 = const()[name = string("value_states_81_groups_0"), val = int32(1)]; tensor var_5801_to_fp16 = const()[name = string("op_5801_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(608628480)))]; tensor value_states_81_cast_fp16 = conv(dilations = value_states_81_dilations_0, groups = value_states_81_groups_0, pad = value_states_81_pad_0, pad_type = value_states_81_pad_type_0, strides = value_states_81_strides_0, weight = var_5801_to_fp16, x = var_5757_cast_fp16_0)[name = string("value_states_81_cast_fp16")]; tensor var_5809 = const()[name = string("op_5809"), val = tensor([1, 16, 64, 16])]; tensor embed_81_cast_fp16 = reshape(shape = var_5809, x = query_states_81_cast_fp16)[name = string("embed_81_cast_fp16")]; tensor var_5813 = const()[name = string("op_5813"), val = tensor([1, 2, 64, 16])]; tensor var_5814_cast_fp16 = reshape(shape = var_5813, x = key_states_81_cast_fp16)[name = string("op_5814_cast_fp16")]; tensor embed_83_perm_0 = const()[name = string("embed_83_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_5820 = const()[name = string("op_5820"), val = tensor([1, 2, 64, 16])]; tensor var_5821_cast_fp16 = reshape(shape = var_5820, x = value_states_81_cast_fp16)[name = string("op_5821_cast_fp16")]; tensor value_states_83_perm_0 = const()[name = string("value_states_83_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_5825_cast_fp16 = mul(x = embed_81_cast_fp16, y = cos_1_cast_fp16)[name = string("op_5825_cast_fp16")]; tensor var_5826_split_sizes_0 = const()[name = string("op_5826_split_sizes_0"), val = tensor([32, 32])]; int32 var_5826_axis_0 = const()[name = string("op_5826_axis_0"), val = int32(-2)]; tensor var_5826_cast_fp16_0, tensor var_5826_cast_fp16_1 = split(axis = var_5826_axis_0, split_sizes = var_5826_split_sizes_0, x = embed_81_cast_fp16)[name = string("op_5826_cast_fp16")]; fp16 const_207_promoted_to_fp16 = const()[name = string("const_207_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_5828_cast_fp16 = mul(x = var_5826_cast_fp16_1, y = const_207_promoted_to_fp16)[name = string("op_5828_cast_fp16")]; int32 var_5830 = const()[name = string("op_5830"), val = int32(-2)]; bool var_5831_interleave_0 = const()[name = string("op_5831_interleave_0"), val = bool(false)]; tensor var_5831_cast_fp16 = concat(axis = var_5830, interleave = var_5831_interleave_0, values = (var_5828_cast_fp16, var_5826_cast_fp16_0))[name = string("op_5831_cast_fp16")]; tensor var_5832_cast_fp16 = mul(x = var_5831_cast_fp16, y = sin_1_cast_fp16)[name = string("op_5832_cast_fp16")]; tensor query_states_83_cast_fp16 = add(x = var_5825_cast_fp16, y = var_5832_cast_fp16)[name = string("query_states_83_cast_fp16")]; tensor embed_83_cast_fp16 = transpose(perm = embed_83_perm_0, x = var_5814_cast_fp16)[name = string("transpose_11")]; tensor var_5835_cast_fp16 = mul(x = embed_83_cast_fp16, y = cos_cast_fp16)[name = string("op_5835_cast_fp16")]; tensor var_5836_split_sizes_0 = const()[name = string("op_5836_split_sizes_0"), val = tensor([32, 32])]; int32 var_5836_axis_0 = const()[name = string("op_5836_axis_0"), val = int32(-1)]; tensor var_5836_cast_fp16_0, tensor var_5836_cast_fp16_1 = split(axis = var_5836_axis_0, split_sizes = var_5836_split_sizes_0, x = embed_83_cast_fp16)[name = string("op_5836_cast_fp16")]; fp16 const_208_promoted_to_fp16 = const()[name = string("const_208_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_5838_cast_fp16 = mul(x = var_5836_cast_fp16_1, y = const_208_promoted_to_fp16)[name = string("op_5838_cast_fp16")]; int32 var_5840 = const()[name = string("op_5840"), val = int32(-1)]; bool var_5841_interleave_0 = const()[name = string("op_5841_interleave_0"), val = bool(false)]; tensor var_5841_cast_fp16 = concat(axis = var_5840, interleave = var_5841_interleave_0, values = (var_5838_cast_fp16, var_5836_cast_fp16_0))[name = string("op_5841_cast_fp16")]; tensor var_5842_cast_fp16 = mul(x = var_5841_cast_fp16, y = sin_cast_fp16)[name = string("op_5842_cast_fp16")]; tensor key_states_83_cast_fp16 = add(x = var_5835_cast_fp16, y = var_5842_cast_fp16)[name = string("key_states_83_cast_fp16")]; tensor expand_dims_201 = const()[name = string("expand_dims_201"), val = tensor([20])]; tensor expand_dims_202 = const()[name = string("expand_dims_202"), val = tensor([0])]; tensor expand_dims_204 = const()[name = string("expand_dims_204"), val = tensor([21])]; tensor concat_163_values3_0 = const()[name = string("concat_163_values3_0"), val = tensor([0])]; int32 concat_163_axis_0 = const()[name = string("concat_163_axis_0"), val = int32(0)]; bool concat_163_interleave_0 = const()[name = string("concat_163_interleave_0"), val = bool(false)]; tensor concat_163 = concat(axis = concat_163_axis_0, interleave = concat_163_interleave_0, values = (expand_dims_201, expand_dims_202, position_id, concat_163_values3_0))[name = string("concat_163")]; tensor concat_164_values1_0 = const()[name = string("concat_164_values1_0"), val = tensor([0])]; tensor concat_164_values3_0 = const()[name = string("concat_164_values3_0"), val = tensor([0])]; int32 concat_164_axis_0 = const()[name = string("concat_164_axis_0"), val = int32(0)]; bool concat_164_interleave_0 = const()[name = string("concat_164_interleave_0"), val = bool(false)]; tensor concat_164 = concat(axis = concat_164_axis_0, interleave = concat_164_interleave_0, values = (expand_dims_204, concat_164_values1_0, var_426, concat_164_values3_0))[name = string("concat_164")]; tensor key_cache_internal_tensor_assign_21_stride_0 = const()[name = string("key_cache_internal_tensor_assign_21_stride_0"), val = tensor([1, 1, 1, 1])]; tensor key_cache_internal_tensor_assign_21_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_21_begin_mask_0"), val = tensor([false, false, false, true])]; tensor key_cache_internal_tensor_assign_21_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_21_end_mask_0"), val = tensor([false, true, false, true])]; tensor key_cache_internal_tensor_assign_21_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_21_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor key_cache_internal_tensor_assign_21_cast_fp16 = slice_update(begin = concat_163, begin_mask = key_cache_internal_tensor_assign_21_begin_mask_0, end = concat_164, end_mask = key_cache_internal_tensor_assign_21_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_21_squeeze_mask_0, stride = key_cache_internal_tensor_assign_21_stride_0, update = key_states_83_cast_fp16, x = coreml_update_state_86)[name = string("key_cache_internal_tensor_assign_21_cast_fp16")]; write_state(data = key_cache_internal_tensor_assign_21_cast_fp16, input = key_cache)[name = string("coreml_update_state_302_write_state")]; tensor coreml_update_state_88 = read_state(input = key_cache)[name = string("coreml_update_state_302")]; tensor value_cache_internal_tensor_assign_21_stride_0 = const()[name = string("value_cache_internal_tensor_assign_21_stride_0"), val = tensor([1, 1, 1, 1])]; tensor value_cache_internal_tensor_assign_21_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_21_begin_mask_0"), val = tensor([false, false, false, true])]; tensor value_cache_internal_tensor_assign_21_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_21_end_mask_0"), val = tensor([false, true, false, true])]; tensor value_cache_internal_tensor_assign_21_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_21_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor value_states_83_cast_fp16 = transpose(perm = value_states_83_perm_0, x = var_5821_cast_fp16)[name = string("transpose_10")]; tensor value_cache_internal_tensor_assign_21_cast_fp16 = slice_update(begin = concat_163, begin_mask = value_cache_internal_tensor_assign_21_begin_mask_0, end = concat_164, end_mask = value_cache_internal_tensor_assign_21_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_21_squeeze_mask_0, stride = value_cache_internal_tensor_assign_21_stride_0, update = value_states_83_cast_fp16, x = coreml_update_state_87)[name = string("value_cache_internal_tensor_assign_21_cast_fp16")]; write_state(data = value_cache_internal_tensor_assign_21_cast_fp16, input = value_cache)[name = string("coreml_update_state_303_write_state")]; tensor coreml_update_state_89 = read_state(input = value_cache)[name = string("coreml_update_state_303")]; tensor var_5885_begin_0 = const()[name = string("op_5885_begin_0"), val = tensor([20, 0, 0, 0])]; tensor var_5885_end_0 = const()[name = string("op_5885_end_0"), val = tensor([21, 2, 2048, 64])]; tensor var_5885_end_mask_0 = const()[name = string("op_5885_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_5885_cast_fp16 = slice_by_index(begin = var_5885_begin_0, end = var_5885_end_0, end_mask = var_5885_end_mask_0, x = coreml_update_state_88)[name = string("op_5885_cast_fp16")]; tensor tile_40 = const()[name = string("tile_40"), val = tensor([1, 1])]; int32 var_5888_axis_0 = const()[name = string("op_5888_axis_0"), val = int32(1)]; tensor var_5888_cast_fp16_0, tensor var_5888_cast_fp16_1 = split(axis = var_5888_axis_0, split_sizes = tile_40, x = var_5885_cast_fp16)[name = string("op_5888_cast_fp16")]; tensor var_5895_begin_0 = const()[name = string("op_5895_begin_0"), val = tensor([20, 0, 0, 0])]; tensor var_5895_end_0 = const()[name = string("op_5895_end_0"), val = tensor([21, 2, 2048, 64])]; tensor var_5895_end_mask_0 = const()[name = string("op_5895_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_5895_cast_fp16 = slice_by_index(begin = var_5895_begin_0, end = var_5895_end_0, end_mask = var_5895_end_mask_0, x = coreml_update_state_89)[name = string("op_5895_cast_fp16")]; tensor tile_41 = const()[name = string("tile_41"), val = tensor([1, 1])]; int32 var_5898_axis_0 = const()[name = string("op_5898_axis_0"), val = int32(1)]; tensor var_5898_cast_fp16_0, tensor var_5898_cast_fp16_1 = split(axis = var_5898_axis_0, split_sizes = tile_41, x = var_5895_cast_fp16)[name = string("op_5898_cast_fp16")]; tensor var_5901_split_sizes_0 = const()[name = string("op_5901_split_sizes_0"), val = tensor([8, 8])]; int32 var_5901_axis_0 = const()[name = string("op_5901_axis_0"), val = int32(1)]; tensor var_5901_cast_fp16_0, tensor var_5901_cast_fp16_1 = split(axis = var_5901_axis_0, split_sizes = var_5901_split_sizes_0, x = query_states_83_cast_fp16)[name = string("op_5901_cast_fp16")]; bool attn_weights_321_transpose_x_0 = const()[name = string("attn_weights_321_transpose_x_0"), val = bool(false)]; bool attn_weights_321_transpose_y_0 = const()[name = string("attn_weights_321_transpose_y_0"), val = bool(false)]; tensor attn_weights_321_cast_fp16 = matmul(transpose_x = attn_weights_321_transpose_x_0, transpose_y = attn_weights_321_transpose_y_0, x = var_5888_cast_fp16_0, y = var_5901_cast_fp16_0)[name = string("attn_weights_321_cast_fp16")]; fp16 _inversed_attn_weights_323_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_323_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_323_cast_fp16 = mul(x = attn_weights_321_cast_fp16, y = _inversed_attn_weights_323_y_0_to_fp16)[name = string("_inversed_attn_weights_323_cast_fp16")]; tensor attn_weights_325_cast_fp16 = add(x = _inversed_attn_weights_323_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_325_cast_fp16")]; int32 var_5908 = const()[name = string("op_5908"), val = int32(2)]; tensor attn_weights_327_cast_fp16 = softmax(axis = var_5908, x = attn_weights_325_cast_fp16)[name = string("attn_weights_327_cast_fp16")]; bool var_5914_transpose_x_1 = const()[name = string("op_5914_transpose_x_1"), val = bool(true)]; bool var_5914_transpose_y_1 = const()[name = string("op_5914_transpose_y_1"), val = bool(false)]; tensor var_5914_cast_fp16 = matmul(transpose_x = var_5914_transpose_x_1, transpose_y = var_5914_transpose_y_1, x = attn_weights_327_cast_fp16, y = var_5898_cast_fp16_0)[name = string("op_5914_cast_fp16")]; bool attn_weights_329_transpose_x_0 = const()[name = string("attn_weights_329_transpose_x_0"), val = bool(false)]; bool attn_weights_329_transpose_y_0 = const()[name = string("attn_weights_329_transpose_y_0"), val = bool(false)]; tensor attn_weights_329_cast_fp16 = matmul(transpose_x = attn_weights_329_transpose_x_0, transpose_y = attn_weights_329_transpose_y_0, x = var_5888_cast_fp16_1, y = var_5901_cast_fp16_1)[name = string("attn_weights_329_cast_fp16")]; fp16 _inversed_attn_weights_331_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_331_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_331_cast_fp16 = mul(x = attn_weights_329_cast_fp16, y = _inversed_attn_weights_331_y_0_to_fp16)[name = string("_inversed_attn_weights_331_cast_fp16")]; tensor attn_weights_333_cast_fp16 = add(x = _inversed_attn_weights_331_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_333_cast_fp16")]; int32 var_5920 = const()[name = string("op_5920"), val = int32(2)]; tensor attn_weights_335_cast_fp16 = softmax(axis = var_5920, x = attn_weights_333_cast_fp16)[name = string("attn_weights_335_cast_fp16")]; bool attn_output_121_transpose_x_1 = const()[name = string("attn_output_121_transpose_x_1"), val = bool(true)]; bool attn_output_121_transpose_y_1 = const()[name = string("attn_output_121_transpose_y_1"), val = bool(false)]; tensor attn_output_121_cast_fp16 = matmul(transpose_x = attn_output_121_transpose_x_1, transpose_y = attn_output_121_transpose_y_1, x = attn_weights_335_cast_fp16, y = var_5898_cast_fp16_1)[name = string("attn_output_121_cast_fp16")]; int32 var_5928 = const()[name = string("op_5928"), val = int32(1)]; bool attn_output_123_interleave_0 = const()[name = string("attn_output_123_interleave_0"), val = bool(false)]; tensor attn_output_123_cast_fp16 = concat(axis = var_5928, interleave = attn_output_123_interleave_0, values = (var_5914_cast_fp16, attn_output_121_cast_fp16))[name = string("attn_output_123_cast_fp16")]; tensor var_5932_perm_0 = const()[name = string("op_5932_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_5937 = const()[name = string("op_5937"), val = tensor([1, 1024, 1, 16])]; tensor var_5932_cast_fp16 = transpose(perm = var_5932_perm_0, x = attn_output_123_cast_fp16)[name = string("transpose_9")]; tensor x_365_cast_fp16 = reshape(shape = var_5937, x = var_5932_cast_fp16)[name = string("x_365_cast_fp16")]; string hidden_states_123_pad_type_0 = const()[name = string("hidden_states_123_pad_type_0"), val = string("valid")]; tensor hidden_states_123_strides_0 = const()[name = string("hidden_states_123_strides_0"), val = tensor([1, 1])]; tensor hidden_states_123_pad_0 = const()[name = string("hidden_states_123_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_123_dilations_0 = const()[name = string("hidden_states_123_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_123_groups_0 = const()[name = string("hidden_states_123_groups_0"), val = int32(1)]; tensor var_5944_to_fp16 = const()[name = string("op_5944_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(608890688)))]; tensor hidden_states_123_cast_fp16 = conv(dilations = hidden_states_123_dilations_0, groups = hidden_states_123_groups_0, pad = hidden_states_123_pad_0, pad_type = hidden_states_123_pad_type_0, strides = hidden_states_123_strides_0, weight = var_5944_to_fp16, x = x_365_cast_fp16)[name = string("hidden_states_123_cast_fp16")]; tensor x_367_cast_fp16 = add(x = x_359_cast_fp16, y = hidden_states_123_cast_fp16)[name = string("x_367_cast_fp16")]; int32 var_5956 = const()[name = string("op_5956"), val = int32(1)]; fp16 const_213_promoted_to_fp16 = const()[name = string("const_213_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_5959_cast_fp16 = mul(x = x_367_cast_fp16, y = const_213_promoted_to_fp16)[name = string("op_5959_cast_fp16")]; bool x_369_interleave_0 = const()[name = string("x_369_interleave_0"), val = bool(false)]; tensor x_369_cast_fp16 = concat(axis = var_5956, interleave = x_369_interleave_0, values = (x_367_cast_fp16, var_5959_cast_fp16))[name = string("x_369_cast_fp16")]; tensor out_247_axes_0 = const()[name = string("out_247_axes_0"), val = tensor([1])]; fp16 var_5969_to_fp16 = const()[name = string("op_5969_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_247_cast_fp16 = layer_norm(axes = out_247_axes_0, epsilon = var_5969_to_fp16, x = x_369_cast_fp16)[name = string("out_247_cast_fp16")]; tensor layer_layers_20_post_attention_layernorm_weight_to_fp16 = const()[name = string("layer_layers_20_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(610987904)))]; tensor out_249_cast_fp16 = mul(x = out_247_cast_fp16, y = layer_layers_20_post_attention_layernorm_weight_to_fp16)[name = string("out_249_cast_fp16")]; tensor var_5975_split_sizes_0 = const()[name = string("op_5975_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_5975_axis_0 = const()[name = string("op_5975_axis_0"), val = int32(1)]; tensor var_5975_cast_fp16_0, tensor var_5975_cast_fp16_1 = split(axis = var_5975_axis_0, split_sizes = var_5975_split_sizes_0, x = out_249_cast_fp16)[name = string("op_5975_cast_fp16")]; string input_41_pad_type_0 = const()[name = string("input_41_pad_type_0"), val = string("valid")]; tensor input_41_strides_0 = const()[name = string("input_41_strides_0"), val = tensor([1, 1])]; tensor input_41_pad_0 = const()[name = string("input_41_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_41_dilations_0 = const()[name = string("input_41_dilations_0"), val = tensor([1, 1])]; int32 input_41_groups_0 = const()[name = string("input_41_groups_0"), val = int32(1)]; tensor var_5980_to_fp16 = const()[name = string("op_5980_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(610992064)))]; tensor input_41_cast_fp16 = conv(dilations = input_41_dilations_0, groups = input_41_groups_0, pad = input_41_pad_0, pad_type = input_41_pad_type_0, strides = input_41_strides_0, weight = var_5980_to_fp16, x = var_5975_cast_fp16_0)[name = string("input_41_cast_fp16")]; tensor var_5991_cast_fp16 = silu(x = input_41_cast_fp16)[name = string("op_5991_cast_fp16")]; string var_5996_pad_type_0 = const()[name = string("op_5996_pad_type_0"), val = string("valid")]; tensor var_5996_strides_0 = const()[name = string("op_5996_strides_0"), val = tensor([1, 1])]; tensor var_5996_pad_0 = const()[name = string("op_5996_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_5996_dilations_0 = const()[name = string("op_5996_dilations_0"), val = tensor([1, 1])]; int32 var_5996_groups_0 = const()[name = string("op_5996_groups_0"), val = int32(1)]; tensor var_5979_to_fp16 = const()[name = string("op_5979_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(619380736)))]; tensor var_5996_cast_fp16 = conv(dilations = var_5996_dilations_0, groups = var_5996_groups_0, pad = var_5996_pad_0, pad_type = var_5996_pad_type_0, strides = var_5996_strides_0, weight = var_5979_to_fp16, x = var_5975_cast_fp16_0)[name = string("op_5996_cast_fp16")]; tensor x_375_cast_fp16 = mul(x = var_5991_cast_fp16, y = var_5996_cast_fp16)[name = string("x_375_cast_fp16")]; string hidden_states_125_pad_type_0 = const()[name = string("hidden_states_125_pad_type_0"), val = string("valid")]; tensor hidden_states_125_strides_0 = const()[name = string("hidden_states_125_strides_0"), val = tensor([1, 1])]; tensor hidden_states_125_pad_0 = const()[name = string("hidden_states_125_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_125_dilations_0 = const()[name = string("hidden_states_125_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_125_groups_0 = const()[name = string("hidden_states_125_groups_0"), val = int32(1)]; tensor var_5978_to_fp16 = const()[name = string("op_5978_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(627769408)))]; tensor hidden_states_125_cast_fp16 = conv(dilations = hidden_states_125_dilations_0, groups = hidden_states_125_groups_0, pad = hidden_states_125_pad_0, pad_type = hidden_states_125_pad_type_0, strides = hidden_states_125_strides_0, weight = var_5978_to_fp16, x = x_375_cast_fp16)[name = string("hidden_states_125_cast_fp16")]; tensor x_377_cast_fp16 = add(x = x_367_cast_fp16, y = hidden_states_125_cast_fp16)[name = string("x_377_cast_fp16")]; int32 var_6009 = const()[name = string("op_6009"), val = int32(1)]; fp16 const_214_promoted_to_fp16 = const()[name = string("const_214_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_6012_cast_fp16 = mul(x = x_377_cast_fp16, y = const_214_promoted_to_fp16)[name = string("op_6012_cast_fp16")]; bool x_379_interleave_0 = const()[name = string("x_379_interleave_0"), val = bool(false)]; tensor x_379_cast_fp16 = concat(axis = var_6009, interleave = x_379_interleave_0, values = (x_377_cast_fp16, var_6012_cast_fp16))[name = string("x_379_cast_fp16")]; tensor out_253_axes_0 = const()[name = string("out_253_axes_0"), val = tensor([1])]; fp16 var_6022_to_fp16 = const()[name = string("op_6022_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_253_cast_fp16 = layer_norm(axes = out_253_axes_0, epsilon = var_6022_to_fp16, x = x_379_cast_fp16)[name = string("out_253_cast_fp16")]; tensor layer_layers_21_input_layernorm_weight_to_fp16 = const()[name = string("layer_layers_21_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(636158080)))]; tensor out_255_cast_fp16 = mul(x = out_253_cast_fp16, y = layer_layers_21_input_layernorm_weight_to_fp16)[name = string("out_255_cast_fp16")]; tensor var_6028_split_sizes_0 = const()[name = string("op_6028_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_6028_axis_0 = const()[name = string("op_6028_axis_0"), val = int32(1)]; tensor var_6028_cast_fp16_0, tensor var_6028_cast_fp16_1 = split(axis = var_6028_axis_0, split_sizes = var_6028_split_sizes_0, x = out_255_cast_fp16)[name = string("op_6028_cast_fp16")]; string query_states_85_pad_type_0 = const()[name = string("query_states_85_pad_type_0"), val = string("valid")]; tensor query_states_85_strides_0 = const()[name = string("query_states_85_strides_0"), val = tensor([1, 1])]; tensor query_states_85_pad_0 = const()[name = string("query_states_85_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_states_85_dilations_0 = const()[name = string("query_states_85_dilations_0"), val = tensor([1, 1])]; int32 query_states_85_groups_0 = const()[name = string("query_states_85_groups_0"), val = int32(1)]; tensor var_6050_to_fp16 = const()[name = string("op_6050_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(636162240)))]; tensor query_states_85_cast_fp16 = conv(dilations = query_states_85_dilations_0, groups = query_states_85_groups_0, pad = query_states_85_pad_0, pad_type = query_states_85_pad_type_0, strides = query_states_85_strides_0, weight = var_6050_to_fp16, x = var_6028_cast_fp16_0)[name = string("query_states_85_cast_fp16")]; string key_states_85_pad_type_0 = const()[name = string("key_states_85_pad_type_0"), val = string("valid")]; tensor key_states_85_strides_0 = const()[name = string("key_states_85_strides_0"), val = tensor([1, 1])]; tensor key_states_85_pad_0 = const()[name = string("key_states_85_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_states_85_dilations_0 = const()[name = string("key_states_85_dilations_0"), val = tensor([1, 1])]; int32 key_states_85_groups_0 = const()[name = string("key_states_85_groups_0"), val = int32(1)]; tensor var_6061_to_fp16 = const()[name = string("op_6061_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(638259456)))]; tensor key_states_85_cast_fp16 = conv(dilations = key_states_85_dilations_0, groups = key_states_85_groups_0, pad = key_states_85_pad_0, pad_type = key_states_85_pad_type_0, strides = key_states_85_strides_0, weight = var_6061_to_fp16, x = var_6028_cast_fp16_0)[name = string("key_states_85_cast_fp16")]; string value_states_85_pad_type_0 = const()[name = string("value_states_85_pad_type_0"), val = string("valid")]; tensor value_states_85_strides_0 = const()[name = string("value_states_85_strides_0"), val = tensor([1, 1])]; tensor value_states_85_pad_0 = const()[name = string("value_states_85_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_states_85_dilations_0 = const()[name = string("value_states_85_dilations_0"), val = tensor([1, 1])]; int32 value_states_85_groups_0 = const()[name = string("value_states_85_groups_0"), val = int32(1)]; tensor var_6072_to_fp16 = const()[name = string("op_6072_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(638521664)))]; tensor value_states_85_cast_fp16 = conv(dilations = value_states_85_dilations_0, groups = value_states_85_groups_0, pad = value_states_85_pad_0, pad_type = value_states_85_pad_type_0, strides = value_states_85_strides_0, weight = var_6072_to_fp16, x = var_6028_cast_fp16_0)[name = string("value_states_85_cast_fp16")]; tensor var_6080 = const()[name = string("op_6080"), val = tensor([1, 16, 64, 16])]; tensor embed_85_cast_fp16 = reshape(shape = var_6080, x = query_states_85_cast_fp16)[name = string("embed_85_cast_fp16")]; tensor var_6084 = const()[name = string("op_6084"), val = tensor([1, 2, 64, 16])]; tensor var_6085_cast_fp16 = reshape(shape = var_6084, x = key_states_85_cast_fp16)[name = string("op_6085_cast_fp16")]; tensor embed_87_perm_0 = const()[name = string("embed_87_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_6091 = const()[name = string("op_6091"), val = tensor([1, 2, 64, 16])]; tensor var_6092_cast_fp16 = reshape(shape = var_6091, x = value_states_85_cast_fp16)[name = string("op_6092_cast_fp16")]; tensor value_states_87_perm_0 = const()[name = string("value_states_87_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_6096_cast_fp16 = mul(x = embed_85_cast_fp16, y = cos_1_cast_fp16)[name = string("op_6096_cast_fp16")]; tensor var_6097_split_sizes_0 = const()[name = string("op_6097_split_sizes_0"), val = tensor([32, 32])]; int32 var_6097_axis_0 = const()[name = string("op_6097_axis_0"), val = int32(-2)]; tensor var_6097_cast_fp16_0, tensor var_6097_cast_fp16_1 = split(axis = var_6097_axis_0, split_sizes = var_6097_split_sizes_0, x = embed_85_cast_fp16)[name = string("op_6097_cast_fp16")]; fp16 const_217_promoted_to_fp16 = const()[name = string("const_217_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_6099_cast_fp16 = mul(x = var_6097_cast_fp16_1, y = const_217_promoted_to_fp16)[name = string("op_6099_cast_fp16")]; int32 var_6101 = const()[name = string("op_6101"), val = int32(-2)]; bool var_6102_interleave_0 = const()[name = string("op_6102_interleave_0"), val = bool(false)]; tensor var_6102_cast_fp16 = concat(axis = var_6101, interleave = var_6102_interleave_0, values = (var_6099_cast_fp16, var_6097_cast_fp16_0))[name = string("op_6102_cast_fp16")]; tensor var_6103_cast_fp16 = mul(x = var_6102_cast_fp16, y = sin_1_cast_fp16)[name = string("op_6103_cast_fp16")]; tensor query_states_87_cast_fp16 = add(x = var_6096_cast_fp16, y = var_6103_cast_fp16)[name = string("query_states_87_cast_fp16")]; tensor embed_87_cast_fp16 = transpose(perm = embed_87_perm_0, x = var_6085_cast_fp16)[name = string("transpose_8")]; tensor var_6106_cast_fp16 = mul(x = embed_87_cast_fp16, y = cos_cast_fp16)[name = string("op_6106_cast_fp16")]; tensor var_6107_split_sizes_0 = const()[name = string("op_6107_split_sizes_0"), val = tensor([32, 32])]; int32 var_6107_axis_0 = const()[name = string("op_6107_axis_0"), val = int32(-1)]; tensor var_6107_cast_fp16_0, tensor var_6107_cast_fp16_1 = split(axis = var_6107_axis_0, split_sizes = var_6107_split_sizes_0, x = embed_87_cast_fp16)[name = string("op_6107_cast_fp16")]; fp16 const_218_promoted_to_fp16 = const()[name = string("const_218_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_6109_cast_fp16 = mul(x = var_6107_cast_fp16_1, y = const_218_promoted_to_fp16)[name = string("op_6109_cast_fp16")]; int32 var_6111 = const()[name = string("op_6111"), val = int32(-1)]; bool var_6112_interleave_0 = const()[name = string("op_6112_interleave_0"), val = bool(false)]; tensor var_6112_cast_fp16 = concat(axis = var_6111, interleave = var_6112_interleave_0, values = (var_6109_cast_fp16, var_6107_cast_fp16_0))[name = string("op_6112_cast_fp16")]; tensor var_6113_cast_fp16 = mul(x = var_6112_cast_fp16, y = sin_cast_fp16)[name = string("op_6113_cast_fp16")]; tensor key_states_87_cast_fp16 = add(x = var_6106_cast_fp16, y = var_6113_cast_fp16)[name = string("key_states_87_cast_fp16")]; tensor expand_dims_211 = const()[name = string("expand_dims_211"), val = tensor([21])]; tensor expand_dims_212 = const()[name = string("expand_dims_212"), val = tensor([0])]; tensor expand_dims_214 = const()[name = string("expand_dims_214"), val = tensor([22])]; tensor concat_171_values3_0 = const()[name = string("concat_171_values3_0"), val = tensor([0])]; int32 concat_171_axis_0 = const()[name = string("concat_171_axis_0"), val = int32(0)]; bool concat_171_interleave_0 = const()[name = string("concat_171_interleave_0"), val = bool(false)]; tensor concat_171 = concat(axis = concat_171_axis_0, interleave = concat_171_interleave_0, values = (expand_dims_211, expand_dims_212, position_id, concat_171_values3_0))[name = string("concat_171")]; tensor concat_172_values1_0 = const()[name = string("concat_172_values1_0"), val = tensor([0])]; tensor concat_172_values3_0 = const()[name = string("concat_172_values3_0"), val = tensor([0])]; int32 concat_172_axis_0 = const()[name = string("concat_172_axis_0"), val = int32(0)]; bool concat_172_interleave_0 = const()[name = string("concat_172_interleave_0"), val = bool(false)]; tensor concat_172 = concat(axis = concat_172_axis_0, interleave = concat_172_interleave_0, values = (expand_dims_214, concat_172_values1_0, var_426, concat_172_values3_0))[name = string("concat_172")]; tensor key_cache_internal_tensor_assign_22_stride_0 = const()[name = string("key_cache_internal_tensor_assign_22_stride_0"), val = tensor([1, 1, 1, 1])]; tensor key_cache_internal_tensor_assign_22_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_22_begin_mask_0"), val = tensor([false, false, false, true])]; tensor key_cache_internal_tensor_assign_22_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_22_end_mask_0"), val = tensor([false, true, false, true])]; tensor key_cache_internal_tensor_assign_22_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_22_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor key_cache_internal_tensor_assign_22_cast_fp16 = slice_update(begin = concat_171, begin_mask = key_cache_internal_tensor_assign_22_begin_mask_0, end = concat_172, end_mask = key_cache_internal_tensor_assign_22_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_22_squeeze_mask_0, stride = key_cache_internal_tensor_assign_22_stride_0, update = key_states_87_cast_fp16, x = coreml_update_state_88)[name = string("key_cache_internal_tensor_assign_22_cast_fp16")]; write_state(data = key_cache_internal_tensor_assign_22_cast_fp16, input = key_cache)[name = string("coreml_update_state_304_write_state")]; tensor coreml_update_state_90 = read_state(input = key_cache)[name = string("coreml_update_state_304")]; tensor value_cache_internal_tensor_assign_22_stride_0 = const()[name = string("value_cache_internal_tensor_assign_22_stride_0"), val = tensor([1, 1, 1, 1])]; tensor value_cache_internal_tensor_assign_22_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_22_begin_mask_0"), val = tensor([false, false, false, true])]; tensor value_cache_internal_tensor_assign_22_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_22_end_mask_0"), val = tensor([false, true, false, true])]; tensor value_cache_internal_tensor_assign_22_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_22_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor value_states_87_cast_fp16 = transpose(perm = value_states_87_perm_0, x = var_6092_cast_fp16)[name = string("transpose_7")]; tensor value_cache_internal_tensor_assign_22_cast_fp16 = slice_update(begin = concat_171, begin_mask = value_cache_internal_tensor_assign_22_begin_mask_0, end = concat_172, end_mask = value_cache_internal_tensor_assign_22_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_22_squeeze_mask_0, stride = value_cache_internal_tensor_assign_22_stride_0, update = value_states_87_cast_fp16, x = coreml_update_state_89)[name = string("value_cache_internal_tensor_assign_22_cast_fp16")]; write_state(data = value_cache_internal_tensor_assign_22_cast_fp16, input = value_cache)[name = string("coreml_update_state_305_write_state")]; tensor coreml_update_state_91 = read_state(input = value_cache)[name = string("coreml_update_state_305")]; tensor var_6156_begin_0 = const()[name = string("op_6156_begin_0"), val = tensor([21, 0, 0, 0])]; tensor var_6156_end_0 = const()[name = string("op_6156_end_0"), val = tensor([22, 2, 2048, 64])]; tensor var_6156_end_mask_0 = const()[name = string("op_6156_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_6156_cast_fp16 = slice_by_index(begin = var_6156_begin_0, end = var_6156_end_0, end_mask = var_6156_end_mask_0, x = coreml_update_state_90)[name = string("op_6156_cast_fp16")]; tensor tile_42 = const()[name = string("tile_42"), val = tensor([1, 1])]; int32 var_6159_axis_0 = const()[name = string("op_6159_axis_0"), val = int32(1)]; tensor var_6159_cast_fp16_0, tensor var_6159_cast_fp16_1 = split(axis = var_6159_axis_0, split_sizes = tile_42, x = var_6156_cast_fp16)[name = string("op_6159_cast_fp16")]; tensor var_6166_begin_0 = const()[name = string("op_6166_begin_0"), val = tensor([21, 0, 0, 0])]; tensor var_6166_end_0 = const()[name = string("op_6166_end_0"), val = tensor([22, 2, 2048, 64])]; tensor var_6166_end_mask_0 = const()[name = string("op_6166_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_6166_cast_fp16 = slice_by_index(begin = var_6166_begin_0, end = var_6166_end_0, end_mask = var_6166_end_mask_0, x = coreml_update_state_91)[name = string("op_6166_cast_fp16")]; tensor tile_43 = const()[name = string("tile_43"), val = tensor([1, 1])]; int32 var_6169_axis_0 = const()[name = string("op_6169_axis_0"), val = int32(1)]; tensor var_6169_cast_fp16_0, tensor var_6169_cast_fp16_1 = split(axis = var_6169_axis_0, split_sizes = tile_43, x = var_6166_cast_fp16)[name = string("op_6169_cast_fp16")]; tensor var_6172_split_sizes_0 = const()[name = string("op_6172_split_sizes_0"), val = tensor([8, 8])]; int32 var_6172_axis_0 = const()[name = string("op_6172_axis_0"), val = int32(1)]; tensor var_6172_cast_fp16_0, tensor var_6172_cast_fp16_1 = split(axis = var_6172_axis_0, split_sizes = var_6172_split_sizes_0, x = query_states_87_cast_fp16)[name = string("op_6172_cast_fp16")]; bool attn_weights_337_transpose_x_0 = const()[name = string("attn_weights_337_transpose_x_0"), val = bool(false)]; bool attn_weights_337_transpose_y_0 = const()[name = string("attn_weights_337_transpose_y_0"), val = bool(false)]; tensor attn_weights_337_cast_fp16 = matmul(transpose_x = attn_weights_337_transpose_x_0, transpose_y = attn_weights_337_transpose_y_0, x = var_6159_cast_fp16_0, y = var_6172_cast_fp16_0)[name = string("attn_weights_337_cast_fp16")]; fp16 _inversed_attn_weights_339_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_339_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_339_cast_fp16 = mul(x = attn_weights_337_cast_fp16, y = _inversed_attn_weights_339_y_0_to_fp16)[name = string("_inversed_attn_weights_339_cast_fp16")]; tensor attn_weights_341_cast_fp16 = add(x = _inversed_attn_weights_339_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_341_cast_fp16")]; int32 var_6179 = const()[name = string("op_6179"), val = int32(2)]; tensor attn_weights_343_cast_fp16 = softmax(axis = var_6179, x = attn_weights_341_cast_fp16)[name = string("attn_weights_343_cast_fp16")]; bool var_6185_transpose_x_1 = const()[name = string("op_6185_transpose_x_1"), val = bool(true)]; bool var_6185_transpose_y_1 = const()[name = string("op_6185_transpose_y_1"), val = bool(false)]; tensor var_6185_cast_fp16 = matmul(transpose_x = var_6185_transpose_x_1, transpose_y = var_6185_transpose_y_1, x = attn_weights_343_cast_fp16, y = var_6169_cast_fp16_0)[name = string("op_6185_cast_fp16")]; bool attn_weights_345_transpose_x_0 = const()[name = string("attn_weights_345_transpose_x_0"), val = bool(false)]; bool attn_weights_345_transpose_y_0 = const()[name = string("attn_weights_345_transpose_y_0"), val = bool(false)]; tensor attn_weights_345_cast_fp16 = matmul(transpose_x = attn_weights_345_transpose_x_0, transpose_y = attn_weights_345_transpose_y_0, x = var_6159_cast_fp16_1, y = var_6172_cast_fp16_1)[name = string("attn_weights_345_cast_fp16")]; fp16 _inversed_attn_weights_347_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_347_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_347_cast_fp16 = mul(x = attn_weights_345_cast_fp16, y = _inversed_attn_weights_347_y_0_to_fp16)[name = string("_inversed_attn_weights_347_cast_fp16")]; tensor attn_weights_349_cast_fp16 = add(x = _inversed_attn_weights_347_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_349_cast_fp16")]; int32 var_6191 = const()[name = string("op_6191"), val = int32(2)]; tensor attn_weights_351_cast_fp16 = softmax(axis = var_6191, x = attn_weights_349_cast_fp16)[name = string("attn_weights_351_cast_fp16")]; bool attn_output_127_transpose_x_1 = const()[name = string("attn_output_127_transpose_x_1"), val = bool(true)]; bool attn_output_127_transpose_y_1 = const()[name = string("attn_output_127_transpose_y_1"), val = bool(false)]; tensor attn_output_127_cast_fp16 = matmul(transpose_x = attn_output_127_transpose_x_1, transpose_y = attn_output_127_transpose_y_1, x = attn_weights_351_cast_fp16, y = var_6169_cast_fp16_1)[name = string("attn_output_127_cast_fp16")]; int32 var_6199 = const()[name = string("op_6199"), val = int32(1)]; bool attn_output_129_interleave_0 = const()[name = string("attn_output_129_interleave_0"), val = bool(false)]; tensor attn_output_129_cast_fp16 = concat(axis = var_6199, interleave = attn_output_129_interleave_0, values = (var_6185_cast_fp16, attn_output_127_cast_fp16))[name = string("attn_output_129_cast_fp16")]; tensor var_6203_perm_0 = const()[name = string("op_6203_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_6208 = const()[name = string("op_6208"), val = tensor([1, 1024, 1, 16])]; tensor var_6203_cast_fp16 = transpose(perm = var_6203_perm_0, x = attn_output_129_cast_fp16)[name = string("transpose_6")]; tensor x_383_cast_fp16 = reshape(shape = var_6208, x = var_6203_cast_fp16)[name = string("x_383_cast_fp16")]; string hidden_states_129_pad_type_0 = const()[name = string("hidden_states_129_pad_type_0"), val = string("valid")]; tensor hidden_states_129_strides_0 = const()[name = string("hidden_states_129_strides_0"), val = tensor([1, 1])]; tensor hidden_states_129_pad_0 = const()[name = string("hidden_states_129_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_129_dilations_0 = const()[name = string("hidden_states_129_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_129_groups_0 = const()[name = string("hidden_states_129_groups_0"), val = int32(1)]; tensor var_6215_to_fp16 = const()[name = string("op_6215_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(638783872)))]; tensor hidden_states_129_cast_fp16 = conv(dilations = hidden_states_129_dilations_0, groups = hidden_states_129_groups_0, pad = hidden_states_129_pad_0, pad_type = hidden_states_129_pad_type_0, strides = hidden_states_129_strides_0, weight = var_6215_to_fp16, x = x_383_cast_fp16)[name = string("hidden_states_129_cast_fp16")]; tensor x_385_cast_fp16 = add(x = x_377_cast_fp16, y = hidden_states_129_cast_fp16)[name = string("x_385_cast_fp16")]; int32 var_6227 = const()[name = string("op_6227"), val = int32(1)]; fp16 const_223_promoted_to_fp16 = const()[name = string("const_223_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_6230_cast_fp16 = mul(x = x_385_cast_fp16, y = const_223_promoted_to_fp16)[name = string("op_6230_cast_fp16")]; bool x_387_interleave_0 = const()[name = string("x_387_interleave_0"), val = bool(false)]; tensor x_387_cast_fp16 = concat(axis = var_6227, interleave = x_387_interleave_0, values = (x_385_cast_fp16, var_6230_cast_fp16))[name = string("x_387_cast_fp16")]; tensor out_259_axes_0 = const()[name = string("out_259_axes_0"), val = tensor([1])]; fp16 var_6240_to_fp16 = const()[name = string("op_6240_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_259_cast_fp16 = layer_norm(axes = out_259_axes_0, epsilon = var_6240_to_fp16, x = x_387_cast_fp16)[name = string("out_259_cast_fp16")]; tensor layer_layers_21_post_attention_layernorm_weight_to_fp16 = const()[name = string("layer_layers_21_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(640881088)))]; tensor out_261_cast_fp16 = mul(x = out_259_cast_fp16, y = layer_layers_21_post_attention_layernorm_weight_to_fp16)[name = string("out_261_cast_fp16")]; tensor var_6246_split_sizes_0 = const()[name = string("op_6246_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_6246_axis_0 = const()[name = string("op_6246_axis_0"), val = int32(1)]; tensor var_6246_cast_fp16_0, tensor var_6246_cast_fp16_1 = split(axis = var_6246_axis_0, split_sizes = var_6246_split_sizes_0, x = out_261_cast_fp16)[name = string("op_6246_cast_fp16")]; string input_43_pad_type_0 = const()[name = string("input_43_pad_type_0"), val = string("valid")]; tensor input_43_strides_0 = const()[name = string("input_43_strides_0"), val = tensor([1, 1])]; tensor input_43_pad_0 = const()[name = string("input_43_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_43_dilations_0 = const()[name = string("input_43_dilations_0"), val = tensor([1, 1])]; int32 input_43_groups_0 = const()[name = string("input_43_groups_0"), val = int32(1)]; tensor var_6251_to_fp16 = const()[name = string("op_6251_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(640885248)))]; tensor input_43_cast_fp16 = conv(dilations = input_43_dilations_0, groups = input_43_groups_0, pad = input_43_pad_0, pad_type = input_43_pad_type_0, strides = input_43_strides_0, weight = var_6251_to_fp16, x = var_6246_cast_fp16_0)[name = string("input_43_cast_fp16")]; tensor var_6262_cast_fp16 = silu(x = input_43_cast_fp16)[name = string("op_6262_cast_fp16")]; string var_6267_pad_type_0 = const()[name = string("op_6267_pad_type_0"), val = string("valid")]; tensor var_6267_strides_0 = const()[name = string("op_6267_strides_0"), val = tensor([1, 1])]; tensor var_6267_pad_0 = const()[name = string("op_6267_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_6267_dilations_0 = const()[name = string("op_6267_dilations_0"), val = tensor([1, 1])]; int32 var_6267_groups_0 = const()[name = string("op_6267_groups_0"), val = int32(1)]; tensor var_6250_to_fp16 = const()[name = string("op_6250_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(649273920)))]; tensor var_6267_cast_fp16 = conv(dilations = var_6267_dilations_0, groups = var_6267_groups_0, pad = var_6267_pad_0, pad_type = var_6267_pad_type_0, strides = var_6267_strides_0, weight = var_6250_to_fp16, x = var_6246_cast_fp16_0)[name = string("op_6267_cast_fp16")]; tensor x_393_cast_fp16 = mul(x = var_6262_cast_fp16, y = var_6267_cast_fp16)[name = string("x_393_cast_fp16")]; string hidden_states_131_pad_type_0 = const()[name = string("hidden_states_131_pad_type_0"), val = string("valid")]; tensor hidden_states_131_strides_0 = const()[name = string("hidden_states_131_strides_0"), val = tensor([1, 1])]; tensor hidden_states_131_pad_0 = const()[name = string("hidden_states_131_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_131_dilations_0 = const()[name = string("hidden_states_131_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_131_groups_0 = const()[name = string("hidden_states_131_groups_0"), val = int32(1)]; tensor var_6249_to_fp16 = const()[name = string("op_6249_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(657662592)))]; tensor hidden_states_131_cast_fp16 = conv(dilations = hidden_states_131_dilations_0, groups = hidden_states_131_groups_0, pad = hidden_states_131_pad_0, pad_type = hidden_states_131_pad_type_0, strides = hidden_states_131_strides_0, weight = var_6249_to_fp16, x = x_393_cast_fp16)[name = string("hidden_states_131_cast_fp16")]; tensor x_395_cast_fp16 = add(x = x_385_cast_fp16, y = hidden_states_131_cast_fp16)[name = string("x_395_cast_fp16")]; int32 var_6280 = const()[name = string("op_6280"), val = int32(1)]; fp16 const_224_promoted_to_fp16 = const()[name = string("const_224_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_6283_cast_fp16 = mul(x = x_395_cast_fp16, y = const_224_promoted_to_fp16)[name = string("op_6283_cast_fp16")]; bool x_397_interleave_0 = const()[name = string("x_397_interleave_0"), val = bool(false)]; tensor x_397_cast_fp16 = concat(axis = var_6280, interleave = x_397_interleave_0, values = (x_395_cast_fp16, var_6283_cast_fp16))[name = string("x_397_cast_fp16")]; tensor out_265_axes_0 = const()[name = string("out_265_axes_0"), val = tensor([1])]; fp16 var_6293_to_fp16 = const()[name = string("op_6293_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_265_cast_fp16 = layer_norm(axes = out_265_axes_0, epsilon = var_6293_to_fp16, x = x_397_cast_fp16)[name = string("out_265_cast_fp16")]; tensor layer_layers_22_input_layernorm_weight_to_fp16 = const()[name = string("layer_layers_22_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(666051264)))]; tensor out_267_cast_fp16 = mul(x = out_265_cast_fp16, y = layer_layers_22_input_layernorm_weight_to_fp16)[name = string("out_267_cast_fp16")]; tensor var_6299_split_sizes_0 = const()[name = string("op_6299_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_6299_axis_0 = const()[name = string("op_6299_axis_0"), val = int32(1)]; tensor var_6299_cast_fp16_0, tensor var_6299_cast_fp16_1 = split(axis = var_6299_axis_0, split_sizes = var_6299_split_sizes_0, x = out_267_cast_fp16)[name = string("op_6299_cast_fp16")]; string query_states_89_pad_type_0 = const()[name = string("query_states_89_pad_type_0"), val = string("valid")]; tensor query_states_89_strides_0 = const()[name = string("query_states_89_strides_0"), val = tensor([1, 1])]; tensor query_states_89_pad_0 = const()[name = string("query_states_89_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_states_89_dilations_0 = const()[name = string("query_states_89_dilations_0"), val = tensor([1, 1])]; int32 query_states_89_groups_0 = const()[name = string("query_states_89_groups_0"), val = int32(1)]; tensor var_6321_to_fp16 = const()[name = string("op_6321_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(666055424)))]; tensor query_states_89_cast_fp16 = conv(dilations = query_states_89_dilations_0, groups = query_states_89_groups_0, pad = query_states_89_pad_0, pad_type = query_states_89_pad_type_0, strides = query_states_89_strides_0, weight = var_6321_to_fp16, x = var_6299_cast_fp16_0)[name = string("query_states_89_cast_fp16")]; string key_states_89_pad_type_0 = const()[name = string("key_states_89_pad_type_0"), val = string("valid")]; tensor key_states_89_strides_0 = const()[name = string("key_states_89_strides_0"), val = tensor([1, 1])]; tensor key_states_89_pad_0 = const()[name = string("key_states_89_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_states_89_dilations_0 = const()[name = string("key_states_89_dilations_0"), val = tensor([1, 1])]; int32 key_states_89_groups_0 = const()[name = string("key_states_89_groups_0"), val = int32(1)]; tensor var_6332_to_fp16 = const()[name = string("op_6332_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(668152640)))]; tensor key_states_89_cast_fp16 = conv(dilations = key_states_89_dilations_0, groups = key_states_89_groups_0, pad = key_states_89_pad_0, pad_type = key_states_89_pad_type_0, strides = key_states_89_strides_0, weight = var_6332_to_fp16, x = var_6299_cast_fp16_0)[name = string("key_states_89_cast_fp16")]; string value_states_89_pad_type_0 = const()[name = string("value_states_89_pad_type_0"), val = string("valid")]; tensor value_states_89_strides_0 = const()[name = string("value_states_89_strides_0"), val = tensor([1, 1])]; tensor value_states_89_pad_0 = const()[name = string("value_states_89_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_states_89_dilations_0 = const()[name = string("value_states_89_dilations_0"), val = tensor([1, 1])]; int32 value_states_89_groups_0 = const()[name = string("value_states_89_groups_0"), val = int32(1)]; tensor var_6343_to_fp16 = const()[name = string("op_6343_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(668414848)))]; tensor value_states_89_cast_fp16 = conv(dilations = value_states_89_dilations_0, groups = value_states_89_groups_0, pad = value_states_89_pad_0, pad_type = value_states_89_pad_type_0, strides = value_states_89_strides_0, weight = var_6343_to_fp16, x = var_6299_cast_fp16_0)[name = string("value_states_89_cast_fp16")]; tensor var_6351 = const()[name = string("op_6351"), val = tensor([1, 16, 64, 16])]; tensor embed_89_cast_fp16 = reshape(shape = var_6351, x = query_states_89_cast_fp16)[name = string("embed_89_cast_fp16")]; tensor var_6355 = const()[name = string("op_6355"), val = tensor([1, 2, 64, 16])]; tensor var_6356_cast_fp16 = reshape(shape = var_6355, x = key_states_89_cast_fp16)[name = string("op_6356_cast_fp16")]; tensor embed_91_perm_0 = const()[name = string("embed_91_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_6362 = const()[name = string("op_6362"), val = tensor([1, 2, 64, 16])]; tensor var_6363_cast_fp16 = reshape(shape = var_6362, x = value_states_89_cast_fp16)[name = string("op_6363_cast_fp16")]; tensor value_states_91_perm_0 = const()[name = string("value_states_91_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_6367_cast_fp16 = mul(x = embed_89_cast_fp16, y = cos_1_cast_fp16)[name = string("op_6367_cast_fp16")]; tensor var_6368_split_sizes_0 = const()[name = string("op_6368_split_sizes_0"), val = tensor([32, 32])]; int32 var_6368_axis_0 = const()[name = string("op_6368_axis_0"), val = int32(-2)]; tensor var_6368_cast_fp16_0, tensor var_6368_cast_fp16_1 = split(axis = var_6368_axis_0, split_sizes = var_6368_split_sizes_0, x = embed_89_cast_fp16)[name = string("op_6368_cast_fp16")]; fp16 const_227_promoted_to_fp16 = const()[name = string("const_227_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_6370_cast_fp16 = mul(x = var_6368_cast_fp16_1, y = const_227_promoted_to_fp16)[name = string("op_6370_cast_fp16")]; int32 var_6372 = const()[name = string("op_6372"), val = int32(-2)]; bool var_6373_interleave_0 = const()[name = string("op_6373_interleave_0"), val = bool(false)]; tensor var_6373_cast_fp16 = concat(axis = var_6372, interleave = var_6373_interleave_0, values = (var_6370_cast_fp16, var_6368_cast_fp16_0))[name = string("op_6373_cast_fp16")]; tensor var_6374_cast_fp16 = mul(x = var_6373_cast_fp16, y = sin_1_cast_fp16)[name = string("op_6374_cast_fp16")]; tensor query_states_91_cast_fp16 = add(x = var_6367_cast_fp16, y = var_6374_cast_fp16)[name = string("query_states_91_cast_fp16")]; tensor embed_91_cast_fp16 = transpose(perm = embed_91_perm_0, x = var_6356_cast_fp16)[name = string("transpose_5")]; tensor var_6377_cast_fp16 = mul(x = embed_91_cast_fp16, y = cos_cast_fp16)[name = string("op_6377_cast_fp16")]; tensor var_6378_split_sizes_0 = const()[name = string("op_6378_split_sizes_0"), val = tensor([32, 32])]; int32 var_6378_axis_0 = const()[name = string("op_6378_axis_0"), val = int32(-1)]; tensor var_6378_cast_fp16_0, tensor var_6378_cast_fp16_1 = split(axis = var_6378_axis_0, split_sizes = var_6378_split_sizes_0, x = embed_91_cast_fp16)[name = string("op_6378_cast_fp16")]; fp16 const_228_promoted_to_fp16 = const()[name = string("const_228_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_6380_cast_fp16 = mul(x = var_6378_cast_fp16_1, y = const_228_promoted_to_fp16)[name = string("op_6380_cast_fp16")]; int32 var_6382 = const()[name = string("op_6382"), val = int32(-1)]; bool var_6383_interleave_0 = const()[name = string("op_6383_interleave_0"), val = bool(false)]; tensor var_6383_cast_fp16 = concat(axis = var_6382, interleave = var_6383_interleave_0, values = (var_6380_cast_fp16, var_6378_cast_fp16_0))[name = string("op_6383_cast_fp16")]; tensor var_6384_cast_fp16 = mul(x = var_6383_cast_fp16, y = sin_cast_fp16)[name = string("op_6384_cast_fp16")]; tensor key_states_91_cast_fp16 = add(x = var_6377_cast_fp16, y = var_6384_cast_fp16)[name = string("key_states_91_cast_fp16")]; tensor expand_dims_221 = const()[name = string("expand_dims_221"), val = tensor([22])]; tensor expand_dims_222 = const()[name = string("expand_dims_222"), val = tensor([0])]; tensor expand_dims_224 = const()[name = string("expand_dims_224"), val = tensor([23])]; tensor concat_179_values3_0 = const()[name = string("concat_179_values3_0"), val = tensor([0])]; int32 concat_179_axis_0 = const()[name = string("concat_179_axis_0"), val = int32(0)]; bool concat_179_interleave_0 = const()[name = string("concat_179_interleave_0"), val = bool(false)]; tensor concat_179 = concat(axis = concat_179_axis_0, interleave = concat_179_interleave_0, values = (expand_dims_221, expand_dims_222, position_id, concat_179_values3_0))[name = string("concat_179")]; tensor concat_180_values1_0 = const()[name = string("concat_180_values1_0"), val = tensor([0])]; tensor concat_180_values3_0 = const()[name = string("concat_180_values3_0"), val = tensor([0])]; int32 concat_180_axis_0 = const()[name = string("concat_180_axis_0"), val = int32(0)]; bool concat_180_interleave_0 = const()[name = string("concat_180_interleave_0"), val = bool(false)]; tensor concat_180 = concat(axis = concat_180_axis_0, interleave = concat_180_interleave_0, values = (expand_dims_224, concat_180_values1_0, var_426, concat_180_values3_0))[name = string("concat_180")]; tensor key_cache_internal_tensor_assign_23_stride_0 = const()[name = string("key_cache_internal_tensor_assign_23_stride_0"), val = tensor([1, 1, 1, 1])]; tensor key_cache_internal_tensor_assign_23_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_23_begin_mask_0"), val = tensor([false, false, false, true])]; tensor key_cache_internal_tensor_assign_23_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_23_end_mask_0"), val = tensor([false, true, false, true])]; tensor key_cache_internal_tensor_assign_23_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_23_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor key_cache_internal_tensor_assign_23_cast_fp16 = slice_update(begin = concat_179, begin_mask = key_cache_internal_tensor_assign_23_begin_mask_0, end = concat_180, end_mask = key_cache_internal_tensor_assign_23_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_23_squeeze_mask_0, stride = key_cache_internal_tensor_assign_23_stride_0, update = key_states_91_cast_fp16, x = coreml_update_state_90)[name = string("key_cache_internal_tensor_assign_23_cast_fp16")]; write_state(data = key_cache_internal_tensor_assign_23_cast_fp16, input = key_cache)[name = string("coreml_update_state_306_write_state")]; tensor coreml_update_state_92 = read_state(input = key_cache)[name = string("coreml_update_state_306")]; tensor value_cache_internal_tensor_assign_23_stride_0 = const()[name = string("value_cache_internal_tensor_assign_23_stride_0"), val = tensor([1, 1, 1, 1])]; tensor value_cache_internal_tensor_assign_23_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_23_begin_mask_0"), val = tensor([false, false, false, true])]; tensor value_cache_internal_tensor_assign_23_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_23_end_mask_0"), val = tensor([false, true, false, true])]; tensor value_cache_internal_tensor_assign_23_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_23_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor value_states_91_cast_fp16 = transpose(perm = value_states_91_perm_0, x = var_6363_cast_fp16)[name = string("transpose_4")]; tensor value_cache_internal_tensor_assign_23_cast_fp16 = slice_update(begin = concat_179, begin_mask = value_cache_internal_tensor_assign_23_begin_mask_0, end = concat_180, end_mask = value_cache_internal_tensor_assign_23_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_23_squeeze_mask_0, stride = value_cache_internal_tensor_assign_23_stride_0, update = value_states_91_cast_fp16, x = coreml_update_state_91)[name = string("value_cache_internal_tensor_assign_23_cast_fp16")]; write_state(data = value_cache_internal_tensor_assign_23_cast_fp16, input = value_cache)[name = string("coreml_update_state_307_write_state")]; tensor coreml_update_state_93 = read_state(input = value_cache)[name = string("coreml_update_state_307")]; tensor var_6427_begin_0 = const()[name = string("op_6427_begin_0"), val = tensor([22, 0, 0, 0])]; tensor var_6427_end_0 = const()[name = string("op_6427_end_0"), val = tensor([23, 2, 2048, 64])]; tensor var_6427_end_mask_0 = const()[name = string("op_6427_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_6427_cast_fp16 = slice_by_index(begin = var_6427_begin_0, end = var_6427_end_0, end_mask = var_6427_end_mask_0, x = coreml_update_state_92)[name = string("op_6427_cast_fp16")]; tensor tile_44 = const()[name = string("tile_44"), val = tensor([1, 1])]; int32 var_6430_axis_0 = const()[name = string("op_6430_axis_0"), val = int32(1)]; tensor var_6430_cast_fp16_0, tensor var_6430_cast_fp16_1 = split(axis = var_6430_axis_0, split_sizes = tile_44, x = var_6427_cast_fp16)[name = string("op_6430_cast_fp16")]; tensor var_6437_begin_0 = const()[name = string("op_6437_begin_0"), val = tensor([22, 0, 0, 0])]; tensor var_6437_end_0 = const()[name = string("op_6437_end_0"), val = tensor([23, 2, 2048, 64])]; tensor var_6437_end_mask_0 = const()[name = string("op_6437_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_6437_cast_fp16 = slice_by_index(begin = var_6437_begin_0, end = var_6437_end_0, end_mask = var_6437_end_mask_0, x = coreml_update_state_93)[name = string("op_6437_cast_fp16")]; tensor tile_45 = const()[name = string("tile_45"), val = tensor([1, 1])]; int32 var_6440_axis_0 = const()[name = string("op_6440_axis_0"), val = int32(1)]; tensor var_6440_cast_fp16_0, tensor var_6440_cast_fp16_1 = split(axis = var_6440_axis_0, split_sizes = tile_45, x = var_6437_cast_fp16)[name = string("op_6440_cast_fp16")]; tensor var_6443_split_sizes_0 = const()[name = string("op_6443_split_sizes_0"), val = tensor([8, 8])]; int32 var_6443_axis_0 = const()[name = string("op_6443_axis_0"), val = int32(1)]; tensor var_6443_cast_fp16_0, tensor var_6443_cast_fp16_1 = split(axis = var_6443_axis_0, split_sizes = var_6443_split_sizes_0, x = query_states_91_cast_fp16)[name = string("op_6443_cast_fp16")]; bool attn_weights_353_transpose_x_0 = const()[name = string("attn_weights_353_transpose_x_0"), val = bool(false)]; bool attn_weights_353_transpose_y_0 = const()[name = string("attn_weights_353_transpose_y_0"), val = bool(false)]; tensor attn_weights_353_cast_fp16 = matmul(transpose_x = attn_weights_353_transpose_x_0, transpose_y = attn_weights_353_transpose_y_0, x = var_6430_cast_fp16_0, y = var_6443_cast_fp16_0)[name = string("attn_weights_353_cast_fp16")]; fp16 _inversed_attn_weights_355_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_355_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_355_cast_fp16 = mul(x = attn_weights_353_cast_fp16, y = _inversed_attn_weights_355_y_0_to_fp16)[name = string("_inversed_attn_weights_355_cast_fp16")]; tensor attn_weights_357_cast_fp16 = add(x = _inversed_attn_weights_355_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_357_cast_fp16")]; int32 var_6450 = const()[name = string("op_6450"), val = int32(2)]; tensor attn_weights_359_cast_fp16 = softmax(axis = var_6450, x = attn_weights_357_cast_fp16)[name = string("attn_weights_359_cast_fp16")]; bool var_6456_transpose_x_1 = const()[name = string("op_6456_transpose_x_1"), val = bool(true)]; bool var_6456_transpose_y_1 = const()[name = string("op_6456_transpose_y_1"), val = bool(false)]; tensor var_6456_cast_fp16 = matmul(transpose_x = var_6456_transpose_x_1, transpose_y = var_6456_transpose_y_1, x = attn_weights_359_cast_fp16, y = var_6440_cast_fp16_0)[name = string("op_6456_cast_fp16")]; bool attn_weights_361_transpose_x_0 = const()[name = string("attn_weights_361_transpose_x_0"), val = bool(false)]; bool attn_weights_361_transpose_y_0 = const()[name = string("attn_weights_361_transpose_y_0"), val = bool(false)]; tensor attn_weights_361_cast_fp16 = matmul(transpose_x = attn_weights_361_transpose_x_0, transpose_y = attn_weights_361_transpose_y_0, x = var_6430_cast_fp16_1, y = var_6443_cast_fp16_1)[name = string("attn_weights_361_cast_fp16")]; fp16 _inversed_attn_weights_363_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_363_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_363_cast_fp16 = mul(x = attn_weights_361_cast_fp16, y = _inversed_attn_weights_363_y_0_to_fp16)[name = string("_inversed_attn_weights_363_cast_fp16")]; tensor attn_weights_365_cast_fp16 = add(x = _inversed_attn_weights_363_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_365_cast_fp16")]; int32 var_6462 = const()[name = string("op_6462"), val = int32(2)]; tensor attn_weights_367_cast_fp16 = softmax(axis = var_6462, x = attn_weights_365_cast_fp16)[name = string("attn_weights_367_cast_fp16")]; bool attn_output_133_transpose_x_1 = const()[name = string("attn_output_133_transpose_x_1"), val = bool(true)]; bool attn_output_133_transpose_y_1 = const()[name = string("attn_output_133_transpose_y_1"), val = bool(false)]; tensor attn_output_133_cast_fp16 = matmul(transpose_x = attn_output_133_transpose_x_1, transpose_y = attn_output_133_transpose_y_1, x = attn_weights_367_cast_fp16, y = var_6440_cast_fp16_1)[name = string("attn_output_133_cast_fp16")]; int32 var_6470 = const()[name = string("op_6470"), val = int32(1)]; bool attn_output_135_interleave_0 = const()[name = string("attn_output_135_interleave_0"), val = bool(false)]; tensor attn_output_135_cast_fp16 = concat(axis = var_6470, interleave = attn_output_135_interleave_0, values = (var_6456_cast_fp16, attn_output_133_cast_fp16))[name = string("attn_output_135_cast_fp16")]; tensor var_6474_perm_0 = const()[name = string("op_6474_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_6479 = const()[name = string("op_6479"), val = tensor([1, 1024, 1, 16])]; tensor var_6474_cast_fp16 = transpose(perm = var_6474_perm_0, x = attn_output_135_cast_fp16)[name = string("transpose_3")]; tensor x_401_cast_fp16 = reshape(shape = var_6479, x = var_6474_cast_fp16)[name = string("x_401_cast_fp16")]; string hidden_states_135_pad_type_0 = const()[name = string("hidden_states_135_pad_type_0"), val = string("valid")]; tensor hidden_states_135_strides_0 = const()[name = string("hidden_states_135_strides_0"), val = tensor([1, 1])]; tensor hidden_states_135_pad_0 = const()[name = string("hidden_states_135_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_135_dilations_0 = const()[name = string("hidden_states_135_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_135_groups_0 = const()[name = string("hidden_states_135_groups_0"), val = int32(1)]; tensor var_6486_to_fp16 = const()[name = string("op_6486_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(668677056)))]; tensor hidden_states_135_cast_fp16 = conv(dilations = hidden_states_135_dilations_0, groups = hidden_states_135_groups_0, pad = hidden_states_135_pad_0, pad_type = hidden_states_135_pad_type_0, strides = hidden_states_135_strides_0, weight = var_6486_to_fp16, x = x_401_cast_fp16)[name = string("hidden_states_135_cast_fp16")]; tensor x_403_cast_fp16 = add(x = x_395_cast_fp16, y = hidden_states_135_cast_fp16)[name = string("x_403_cast_fp16")]; int32 var_6498 = const()[name = string("op_6498"), val = int32(1)]; fp16 const_233_promoted_to_fp16 = const()[name = string("const_233_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_6501_cast_fp16 = mul(x = x_403_cast_fp16, y = const_233_promoted_to_fp16)[name = string("op_6501_cast_fp16")]; bool x_405_interleave_0 = const()[name = string("x_405_interleave_0"), val = bool(false)]; tensor x_405_cast_fp16 = concat(axis = var_6498, interleave = x_405_interleave_0, values = (x_403_cast_fp16, var_6501_cast_fp16))[name = string("x_405_cast_fp16")]; tensor out_271_axes_0 = const()[name = string("out_271_axes_0"), val = tensor([1])]; fp16 var_6511_to_fp16 = const()[name = string("op_6511_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_271_cast_fp16 = layer_norm(axes = out_271_axes_0, epsilon = var_6511_to_fp16, x = x_405_cast_fp16)[name = string("out_271_cast_fp16")]; tensor layer_layers_22_post_attention_layernorm_weight_to_fp16 = const()[name = string("layer_layers_22_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(670774272)))]; tensor out_273_cast_fp16 = mul(x = out_271_cast_fp16, y = layer_layers_22_post_attention_layernorm_weight_to_fp16)[name = string("out_273_cast_fp16")]; tensor var_6517_split_sizes_0 = const()[name = string("op_6517_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_6517_axis_0 = const()[name = string("op_6517_axis_0"), val = int32(1)]; tensor var_6517_cast_fp16_0, tensor var_6517_cast_fp16_1 = split(axis = var_6517_axis_0, split_sizes = var_6517_split_sizes_0, x = out_273_cast_fp16)[name = string("op_6517_cast_fp16")]; string input_45_pad_type_0 = const()[name = string("input_45_pad_type_0"), val = string("valid")]; tensor input_45_strides_0 = const()[name = string("input_45_strides_0"), val = tensor([1, 1])]; tensor input_45_pad_0 = const()[name = string("input_45_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_45_dilations_0 = const()[name = string("input_45_dilations_0"), val = tensor([1, 1])]; int32 input_45_groups_0 = const()[name = string("input_45_groups_0"), val = int32(1)]; tensor var_6522_to_fp16 = const()[name = string("op_6522_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(670778432)))]; tensor input_45_cast_fp16 = conv(dilations = input_45_dilations_0, groups = input_45_groups_0, pad = input_45_pad_0, pad_type = input_45_pad_type_0, strides = input_45_strides_0, weight = var_6522_to_fp16, x = var_6517_cast_fp16_0)[name = string("input_45_cast_fp16")]; tensor var_6533_cast_fp16 = silu(x = input_45_cast_fp16)[name = string("op_6533_cast_fp16")]; string var_6538_pad_type_0 = const()[name = string("op_6538_pad_type_0"), val = string("valid")]; tensor var_6538_strides_0 = const()[name = string("op_6538_strides_0"), val = tensor([1, 1])]; tensor var_6538_pad_0 = const()[name = string("op_6538_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_6538_dilations_0 = const()[name = string("op_6538_dilations_0"), val = tensor([1, 1])]; int32 var_6538_groups_0 = const()[name = string("op_6538_groups_0"), val = int32(1)]; tensor var_6521_to_fp16 = const()[name = string("op_6521_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(679167104)))]; tensor var_6538_cast_fp16 = conv(dilations = var_6538_dilations_0, groups = var_6538_groups_0, pad = var_6538_pad_0, pad_type = var_6538_pad_type_0, strides = var_6538_strides_0, weight = var_6521_to_fp16, x = var_6517_cast_fp16_0)[name = string("op_6538_cast_fp16")]; tensor x_411_cast_fp16 = mul(x = var_6533_cast_fp16, y = var_6538_cast_fp16)[name = string("x_411_cast_fp16")]; string hidden_states_137_pad_type_0 = const()[name = string("hidden_states_137_pad_type_0"), val = string("valid")]; tensor hidden_states_137_strides_0 = const()[name = string("hidden_states_137_strides_0"), val = tensor([1, 1])]; tensor hidden_states_137_pad_0 = const()[name = string("hidden_states_137_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_137_dilations_0 = const()[name = string("hidden_states_137_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_137_groups_0 = const()[name = string("hidden_states_137_groups_0"), val = int32(1)]; tensor var_6520_to_fp16 = const()[name = string("op_6520_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(687555776)))]; tensor hidden_states_137_cast_fp16 = conv(dilations = hidden_states_137_dilations_0, groups = hidden_states_137_groups_0, pad = hidden_states_137_pad_0, pad_type = hidden_states_137_pad_type_0, strides = hidden_states_137_strides_0, weight = var_6520_to_fp16, x = x_411_cast_fp16)[name = string("hidden_states_137_cast_fp16")]; tensor x_413_cast_fp16 = add(x = x_403_cast_fp16, y = hidden_states_137_cast_fp16)[name = string("x_413_cast_fp16")]; int32 var_6551 = const()[name = string("op_6551"), val = int32(1)]; fp16 const_234_promoted_to_fp16 = const()[name = string("const_234_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_6554_cast_fp16 = mul(x = x_413_cast_fp16, y = const_234_promoted_to_fp16)[name = string("op_6554_cast_fp16")]; bool x_415_interleave_0 = const()[name = string("x_415_interleave_0"), val = bool(false)]; tensor x_415_cast_fp16 = concat(axis = var_6551, interleave = x_415_interleave_0, values = (x_413_cast_fp16, var_6554_cast_fp16))[name = string("x_415_cast_fp16")]; tensor out_277_axes_0 = const()[name = string("out_277_axes_0"), val = tensor([1])]; fp16 var_6564_to_fp16 = const()[name = string("op_6564_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_277_cast_fp16 = layer_norm(axes = out_277_axes_0, epsilon = var_6564_to_fp16, x = x_415_cast_fp16)[name = string("out_277_cast_fp16")]; tensor layer_layers_23_input_layernorm_weight_to_fp16 = const()[name = string("layer_layers_23_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(695944448)))]; tensor out_279_cast_fp16 = mul(x = out_277_cast_fp16, y = layer_layers_23_input_layernorm_weight_to_fp16)[name = string("out_279_cast_fp16")]; tensor var_6570_split_sizes_0 = const()[name = string("op_6570_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_6570_axis_0 = const()[name = string("op_6570_axis_0"), val = int32(1)]; tensor var_6570_cast_fp16_0, tensor var_6570_cast_fp16_1 = split(axis = var_6570_axis_0, split_sizes = var_6570_split_sizes_0, x = out_279_cast_fp16)[name = string("op_6570_cast_fp16")]; string query_states_93_pad_type_0 = const()[name = string("query_states_93_pad_type_0"), val = string("valid")]; tensor query_states_93_strides_0 = const()[name = string("query_states_93_strides_0"), val = tensor([1, 1])]; tensor query_states_93_pad_0 = const()[name = string("query_states_93_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_states_93_dilations_0 = const()[name = string("query_states_93_dilations_0"), val = tensor([1, 1])]; int32 query_states_93_groups_0 = const()[name = string("query_states_93_groups_0"), val = int32(1)]; tensor var_6592_to_fp16 = const()[name = string("op_6592_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(695948608)))]; tensor query_states_93_cast_fp16 = conv(dilations = query_states_93_dilations_0, groups = query_states_93_groups_0, pad = query_states_93_pad_0, pad_type = query_states_93_pad_type_0, strides = query_states_93_strides_0, weight = var_6592_to_fp16, x = var_6570_cast_fp16_0)[name = string("query_states_93_cast_fp16")]; string key_states_93_pad_type_0 = const()[name = string("key_states_93_pad_type_0"), val = string("valid")]; tensor key_states_93_strides_0 = const()[name = string("key_states_93_strides_0"), val = tensor([1, 1])]; tensor key_states_93_pad_0 = const()[name = string("key_states_93_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_states_93_dilations_0 = const()[name = string("key_states_93_dilations_0"), val = tensor([1, 1])]; int32 key_states_93_groups_0 = const()[name = string("key_states_93_groups_0"), val = int32(1)]; tensor var_6603_to_fp16 = const()[name = string("op_6603_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(698045824)))]; tensor key_states_93_cast_fp16 = conv(dilations = key_states_93_dilations_0, groups = key_states_93_groups_0, pad = key_states_93_pad_0, pad_type = key_states_93_pad_type_0, strides = key_states_93_strides_0, weight = var_6603_to_fp16, x = var_6570_cast_fp16_0)[name = string("key_states_93_cast_fp16")]; string value_states_93_pad_type_0 = const()[name = string("value_states_93_pad_type_0"), val = string("valid")]; tensor value_states_93_strides_0 = const()[name = string("value_states_93_strides_0"), val = tensor([1, 1])]; tensor value_states_93_pad_0 = const()[name = string("value_states_93_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_states_93_dilations_0 = const()[name = string("value_states_93_dilations_0"), val = tensor([1, 1])]; int32 value_states_93_groups_0 = const()[name = string("value_states_93_groups_0"), val = int32(1)]; tensor var_6614_to_fp16 = const()[name = string("op_6614_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(698308032)))]; tensor value_states_93_cast_fp16 = conv(dilations = value_states_93_dilations_0, groups = value_states_93_groups_0, pad = value_states_93_pad_0, pad_type = value_states_93_pad_type_0, strides = value_states_93_strides_0, weight = var_6614_to_fp16, x = var_6570_cast_fp16_0)[name = string("value_states_93_cast_fp16")]; tensor var_6622 = const()[name = string("op_6622"), val = tensor([1, 16, 64, 16])]; tensor embed_93_cast_fp16 = reshape(shape = var_6622, x = query_states_93_cast_fp16)[name = string("embed_93_cast_fp16")]; tensor var_6626 = const()[name = string("op_6626"), val = tensor([1, 2, 64, 16])]; tensor var_6627_cast_fp16 = reshape(shape = var_6626, x = key_states_93_cast_fp16)[name = string("op_6627_cast_fp16")]; tensor embed_perm_0 = const()[name = string("embed_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_6633 = const()[name = string("op_6633"), val = tensor([1, 2, 64, 16])]; tensor var_6634_cast_fp16 = reshape(shape = var_6633, x = value_states_93_cast_fp16)[name = string("op_6634_cast_fp16")]; tensor value_states_perm_0 = const()[name = string("value_states_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_6638_cast_fp16 = mul(x = embed_93_cast_fp16, y = cos_1_cast_fp16)[name = string("op_6638_cast_fp16")]; tensor var_6639_split_sizes_0 = const()[name = string("op_6639_split_sizes_0"), val = tensor([32, 32])]; int32 var_6639_axis_0 = const()[name = string("op_6639_axis_0"), val = int32(-2)]; tensor var_6639_cast_fp16_0, tensor var_6639_cast_fp16_1 = split(axis = var_6639_axis_0, split_sizes = var_6639_split_sizes_0, x = embed_93_cast_fp16)[name = string("op_6639_cast_fp16")]; fp16 const_237_promoted_to_fp16 = const()[name = string("const_237_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_6641_cast_fp16 = mul(x = var_6639_cast_fp16_1, y = const_237_promoted_to_fp16)[name = string("op_6641_cast_fp16")]; int32 var_6643 = const()[name = string("op_6643"), val = int32(-2)]; bool var_6644_interleave_0 = const()[name = string("op_6644_interleave_0"), val = bool(false)]; tensor var_6644_cast_fp16 = concat(axis = var_6643, interleave = var_6644_interleave_0, values = (var_6641_cast_fp16, var_6639_cast_fp16_0))[name = string("op_6644_cast_fp16")]; tensor var_6645_cast_fp16 = mul(x = var_6644_cast_fp16, y = sin_1_cast_fp16)[name = string("op_6645_cast_fp16")]; tensor query_states_cast_fp16 = add(x = var_6638_cast_fp16, y = var_6645_cast_fp16)[name = string("query_states_cast_fp16")]; tensor embed_cast_fp16 = transpose(perm = embed_perm_0, x = var_6627_cast_fp16)[name = string("transpose_2")]; tensor var_6648_cast_fp16 = mul(x = embed_cast_fp16, y = cos_cast_fp16)[name = string("op_6648_cast_fp16")]; tensor var_6649_split_sizes_0 = const()[name = string("op_6649_split_sizes_0"), val = tensor([32, 32])]; int32 var_6649_axis_0 = const()[name = string("op_6649_axis_0"), val = int32(-1)]; tensor var_6649_cast_fp16_0, tensor var_6649_cast_fp16_1 = split(axis = var_6649_axis_0, split_sizes = var_6649_split_sizes_0, x = embed_cast_fp16)[name = string("op_6649_cast_fp16")]; fp16 const_238_promoted_to_fp16 = const()[name = string("const_238_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_6651_cast_fp16 = mul(x = var_6649_cast_fp16_1, y = const_238_promoted_to_fp16)[name = string("op_6651_cast_fp16")]; int32 var_6653 = const()[name = string("op_6653"), val = int32(-1)]; bool var_6654_interleave_0 = const()[name = string("op_6654_interleave_0"), val = bool(false)]; tensor var_6654_cast_fp16 = concat(axis = var_6653, interleave = var_6654_interleave_0, values = (var_6651_cast_fp16, var_6649_cast_fp16_0))[name = string("op_6654_cast_fp16")]; tensor var_6655_cast_fp16 = mul(x = var_6654_cast_fp16, y = sin_cast_fp16)[name = string("op_6655_cast_fp16")]; tensor key_states_cast_fp16 = add(x = var_6648_cast_fp16, y = var_6655_cast_fp16)[name = string("key_states_cast_fp16")]; tensor expand_dims_231 = const()[name = string("expand_dims_231"), val = tensor([23])]; tensor expand_dims_232 = const()[name = string("expand_dims_232"), val = tensor([0])]; tensor expand_dims_234 = const()[name = string("expand_dims_234"), val = tensor([24])]; tensor concat_187_values3_0 = const()[name = string("concat_187_values3_0"), val = tensor([0])]; int32 concat_187_axis_0 = const()[name = string("concat_187_axis_0"), val = int32(0)]; bool concat_187_interleave_0 = const()[name = string("concat_187_interleave_0"), val = bool(false)]; tensor concat_187 = concat(axis = concat_187_axis_0, interleave = concat_187_interleave_0, values = (expand_dims_231, expand_dims_232, position_id, concat_187_values3_0))[name = string("concat_187")]; tensor concat_188_values1_0 = const()[name = string("concat_188_values1_0"), val = tensor([0])]; tensor concat_188_values3_0 = const()[name = string("concat_188_values3_0"), val = tensor([0])]; int32 concat_188_axis_0 = const()[name = string("concat_188_axis_0"), val = int32(0)]; bool concat_188_interleave_0 = const()[name = string("concat_188_interleave_0"), val = bool(false)]; tensor concat_188 = concat(axis = concat_188_axis_0, interleave = concat_188_interleave_0, values = (expand_dims_234, concat_188_values1_0, var_426, concat_188_values3_0))[name = string("concat_188")]; tensor key_cache_internal_tensor_assign_24_stride_0 = const()[name = string("key_cache_internal_tensor_assign_24_stride_0"), val = tensor([1, 1, 1, 1])]; tensor key_cache_internal_tensor_assign_24_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_24_begin_mask_0"), val = tensor([false, false, false, true])]; tensor key_cache_internal_tensor_assign_24_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_24_end_mask_0"), val = tensor([false, true, false, true])]; tensor key_cache_internal_tensor_assign_24_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_24_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor key_cache_internal_tensor_assign_24_cast_fp16 = slice_update(begin = concat_187, begin_mask = key_cache_internal_tensor_assign_24_begin_mask_0, end = concat_188, end_mask = key_cache_internal_tensor_assign_24_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_24_squeeze_mask_0, stride = key_cache_internal_tensor_assign_24_stride_0, update = key_states_cast_fp16, x = coreml_update_state_92)[name = string("key_cache_internal_tensor_assign_24_cast_fp16")]; write_state(data = key_cache_internal_tensor_assign_24_cast_fp16, input = key_cache)[name = string("coreml_update_state_308_write_state")]; tensor coreml_update_state_94 = read_state(input = key_cache)[name = string("coreml_update_state_308")]; tensor value_cache_internal_tensor_assign_24_stride_0 = const()[name = string("value_cache_internal_tensor_assign_24_stride_0"), val = tensor([1, 1, 1, 1])]; tensor value_cache_internal_tensor_assign_24_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_24_begin_mask_0"), val = tensor([false, false, false, true])]; tensor value_cache_internal_tensor_assign_24_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_24_end_mask_0"), val = tensor([false, true, false, true])]; tensor value_cache_internal_tensor_assign_24_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_24_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor value_states_cast_fp16 = transpose(perm = value_states_perm_0, x = var_6634_cast_fp16)[name = string("transpose_1")]; tensor value_cache_internal_tensor_assign_24_cast_fp16 = slice_update(begin = concat_187, begin_mask = value_cache_internal_tensor_assign_24_begin_mask_0, end = concat_188, end_mask = value_cache_internal_tensor_assign_24_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_24_squeeze_mask_0, stride = value_cache_internal_tensor_assign_24_stride_0, update = value_states_cast_fp16, x = coreml_update_state_93)[name = string("value_cache_internal_tensor_assign_24_cast_fp16")]; write_state(data = value_cache_internal_tensor_assign_24_cast_fp16, input = value_cache)[name = string("coreml_update_state_309_write_state")]; tensor coreml_update_state_95 = read_state(input = value_cache)[name = string("coreml_update_state_309")]; tensor var_6698_begin_0 = const()[name = string("op_6698_begin_0"), val = tensor([23, 0, 0, 0])]; tensor var_6698_end_0 = const()[name = string("op_6698_end_0"), val = tensor([1, 2, 2048, 64])]; tensor var_6698_end_mask_0 = const()[name = string("op_6698_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_6698_cast_fp16 = slice_by_index(begin = var_6698_begin_0, end = var_6698_end_0, end_mask = var_6698_end_mask_0, x = coreml_update_state_94)[name = string("op_6698_cast_fp16")]; tensor tile_46 = const()[name = string("tile_46"), val = tensor([1, 1])]; int32 var_6701_axis_0 = const()[name = string("op_6701_axis_0"), val = int32(1)]; tensor var_6701_cast_fp16_0, tensor var_6701_cast_fp16_1 = split(axis = var_6701_axis_0, split_sizes = tile_46, x = var_6698_cast_fp16)[name = string("op_6701_cast_fp16")]; tensor var_6708_begin_0 = const()[name = string("op_6708_begin_0"), val = tensor([23, 0, 0, 0])]; tensor var_6708_end_0 = const()[name = string("op_6708_end_0"), val = tensor([1, 2, 2048, 64])]; tensor var_6708_end_mask_0 = const()[name = string("op_6708_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_6708_cast_fp16 = slice_by_index(begin = var_6708_begin_0, end = var_6708_end_0, end_mask = var_6708_end_mask_0, x = coreml_update_state_95)[name = string("op_6708_cast_fp16")]; tensor tile_47 = const()[name = string("tile_47"), val = tensor([1, 1])]; int32 var_6711_axis_0 = const()[name = string("op_6711_axis_0"), val = int32(1)]; tensor var_6711_cast_fp16_0, tensor var_6711_cast_fp16_1 = split(axis = var_6711_axis_0, split_sizes = tile_47, x = var_6708_cast_fp16)[name = string("op_6711_cast_fp16")]; tensor var_6714_split_sizes_0 = const()[name = string("op_6714_split_sizes_0"), val = tensor([8, 8])]; int32 var_6714_axis_0 = const()[name = string("op_6714_axis_0"), val = int32(1)]; tensor var_6714_cast_fp16_0, tensor var_6714_cast_fp16_1 = split(axis = var_6714_axis_0, split_sizes = var_6714_split_sizes_0, x = query_states_cast_fp16)[name = string("op_6714_cast_fp16")]; bool attn_weights_369_transpose_x_0 = const()[name = string("attn_weights_369_transpose_x_0"), val = bool(false)]; bool attn_weights_369_transpose_y_0 = const()[name = string("attn_weights_369_transpose_y_0"), val = bool(false)]; tensor attn_weights_369_cast_fp16 = matmul(transpose_x = attn_weights_369_transpose_x_0, transpose_y = attn_weights_369_transpose_y_0, x = var_6701_cast_fp16_0, y = var_6714_cast_fp16_0)[name = string("attn_weights_369_cast_fp16")]; fp16 _inversed_attn_weights_371_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_371_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_371_cast_fp16 = mul(x = attn_weights_369_cast_fp16, y = _inversed_attn_weights_371_y_0_to_fp16)[name = string("_inversed_attn_weights_371_cast_fp16")]; tensor attn_weights_373_cast_fp16 = add(x = _inversed_attn_weights_371_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_373_cast_fp16")]; int32 var_6721 = const()[name = string("op_6721"), val = int32(2)]; tensor attn_weights_375_cast_fp16 = softmax(axis = var_6721, x = attn_weights_373_cast_fp16)[name = string("attn_weights_375_cast_fp16")]; bool var_6727_transpose_x_1 = const()[name = string("op_6727_transpose_x_1"), val = bool(true)]; bool var_6727_transpose_y_1 = const()[name = string("op_6727_transpose_y_1"), val = bool(false)]; tensor var_6727_cast_fp16 = matmul(transpose_x = var_6727_transpose_x_1, transpose_y = var_6727_transpose_y_1, x = attn_weights_375_cast_fp16, y = var_6711_cast_fp16_0)[name = string("op_6727_cast_fp16")]; bool attn_weights_377_transpose_x_0 = const()[name = string("attn_weights_377_transpose_x_0"), val = bool(false)]; bool attn_weights_377_transpose_y_0 = const()[name = string("attn_weights_377_transpose_y_0"), val = bool(false)]; tensor attn_weights_377_cast_fp16 = matmul(transpose_x = attn_weights_377_transpose_x_0, transpose_y = attn_weights_377_transpose_y_0, x = var_6701_cast_fp16_1, y = var_6714_cast_fp16_1)[name = string("attn_weights_377_cast_fp16")]; fp16 _inversed_attn_weights_379_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_379_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_379_cast_fp16 = mul(x = attn_weights_377_cast_fp16, y = _inversed_attn_weights_379_y_0_to_fp16)[name = string("_inversed_attn_weights_379_cast_fp16")]; tensor attn_weights_381_cast_fp16 = add(x = _inversed_attn_weights_379_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_381_cast_fp16")]; int32 var_6733 = const()[name = string("op_6733"), val = int32(2)]; tensor attn_weights_cast_fp16 = softmax(axis = var_6733, x = attn_weights_381_cast_fp16)[name = string("attn_weights_cast_fp16")]; bool attn_output_139_transpose_x_1 = const()[name = string("attn_output_139_transpose_x_1"), val = bool(true)]; bool attn_output_139_transpose_y_1 = const()[name = string("attn_output_139_transpose_y_1"), val = bool(false)]; tensor attn_output_139_cast_fp16 = matmul(transpose_x = attn_output_139_transpose_x_1, transpose_y = attn_output_139_transpose_y_1, x = attn_weights_cast_fp16, y = var_6711_cast_fp16_1)[name = string("attn_output_139_cast_fp16")]; int32 var_6741 = const()[name = string("op_6741"), val = int32(1)]; bool attn_output_141_interleave_0 = const()[name = string("attn_output_141_interleave_0"), val = bool(false)]; tensor attn_output_141_cast_fp16 = concat(axis = var_6741, interleave = attn_output_141_interleave_0, values = (var_6727_cast_fp16, attn_output_139_cast_fp16))[name = string("attn_output_141_cast_fp16")]; tensor var_6745_perm_0 = const()[name = string("op_6745_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_6750 = const()[name = string("op_6750"), val = tensor([1, 1024, 1, 16])]; tensor var_6745_cast_fp16 = transpose(perm = var_6745_perm_0, x = attn_output_141_cast_fp16)[name = string("transpose_0")]; tensor x_419_cast_fp16 = reshape(shape = var_6750, x = var_6745_cast_fp16)[name = string("x_419_cast_fp16")]; string hidden_states_141_pad_type_0 = const()[name = string("hidden_states_141_pad_type_0"), val = string("valid")]; tensor hidden_states_141_strides_0 = const()[name = string("hidden_states_141_strides_0"), val = tensor([1, 1])]; tensor hidden_states_141_pad_0 = const()[name = string("hidden_states_141_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_141_dilations_0 = const()[name = string("hidden_states_141_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_141_groups_0 = const()[name = string("hidden_states_141_groups_0"), val = int32(1)]; tensor var_6757_to_fp16 = const()[name = string("op_6757_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(698570240)))]; tensor hidden_states_141_cast_fp16 = conv(dilations = hidden_states_141_dilations_0, groups = hidden_states_141_groups_0, pad = hidden_states_141_pad_0, pad_type = hidden_states_141_pad_type_0, strides = hidden_states_141_strides_0, weight = var_6757_to_fp16, x = x_419_cast_fp16)[name = string("hidden_states_141_cast_fp16")]; tensor x_421_cast_fp16 = add(x = x_413_cast_fp16, y = hidden_states_141_cast_fp16)[name = string("x_421_cast_fp16")]; int32 var_6769 = const()[name = string("op_6769"), val = int32(1)]; fp16 const_243_promoted_to_fp16 = const()[name = string("const_243_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_6772_cast_fp16 = mul(x = x_421_cast_fp16, y = const_243_promoted_to_fp16)[name = string("op_6772_cast_fp16")]; bool x_423_interleave_0 = const()[name = string("x_423_interleave_0"), val = bool(false)]; tensor x_423_cast_fp16 = concat(axis = var_6769, interleave = x_423_interleave_0, values = (x_421_cast_fp16, var_6772_cast_fp16))[name = string("x_423_cast_fp16")]; tensor out_283_axes_0 = const()[name = string("out_283_axes_0"), val = tensor([1])]; fp16 var_6782_to_fp16 = const()[name = string("op_6782_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_283_cast_fp16 = layer_norm(axes = out_283_axes_0, epsilon = var_6782_to_fp16, x = x_423_cast_fp16)[name = string("out_283_cast_fp16")]; tensor layer_layers_23_post_attention_layernorm_weight_to_fp16 = const()[name = string("layer_layers_23_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(700667456)))]; tensor out_285_cast_fp16 = mul(x = out_283_cast_fp16, y = layer_layers_23_post_attention_layernorm_weight_to_fp16)[name = string("out_285_cast_fp16")]; tensor var_6788_split_sizes_0 = const()[name = string("op_6788_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_6788_axis_0 = const()[name = string("op_6788_axis_0"), val = int32(1)]; tensor var_6788_cast_fp16_0, tensor var_6788_cast_fp16_1 = split(axis = var_6788_axis_0, split_sizes = var_6788_split_sizes_0, x = out_285_cast_fp16)[name = string("op_6788_cast_fp16")]; string input_pad_type_0 = const()[name = string("input_pad_type_0"), val = string("valid")]; tensor input_strides_0 = const()[name = string("input_strides_0"), val = tensor([1, 1])]; tensor input_pad_0 = const()[name = string("input_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_dilations_0 = const()[name = string("input_dilations_0"), val = tensor([1, 1])]; int32 input_groups_0 = const()[name = string("input_groups_0"), val = int32(1)]; tensor var_6793_to_fp16 = const()[name = string("op_6793_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(700671616)))]; tensor input_cast_fp16 = conv(dilations = input_dilations_0, groups = input_groups_0, pad = input_pad_0, pad_type = input_pad_type_0, strides = input_strides_0, weight = var_6793_to_fp16, x = var_6788_cast_fp16_0)[name = string("input_cast_fp16")]; tensor var_6804_cast_fp16 = silu(x = input_cast_fp16)[name = string("op_6804_cast_fp16")]; string var_6809_pad_type_0 = const()[name = string("op_6809_pad_type_0"), val = string("valid")]; tensor var_6809_strides_0 = const()[name = string("op_6809_strides_0"), val = tensor([1, 1])]; tensor var_6809_pad_0 = const()[name = string("op_6809_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_6809_dilations_0 = const()[name = string("op_6809_dilations_0"), val = tensor([1, 1])]; int32 var_6809_groups_0 = const()[name = string("op_6809_groups_0"), val = int32(1)]; tensor var_6792_to_fp16 = const()[name = string("op_6792_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(709060288)))]; tensor var_6809_cast_fp16 = conv(dilations = var_6809_dilations_0, groups = var_6809_groups_0, pad = var_6809_pad_0, pad_type = var_6809_pad_type_0, strides = var_6809_strides_0, weight = var_6792_to_fp16, x = var_6788_cast_fp16_0)[name = string("op_6809_cast_fp16")]; tensor x_429_cast_fp16 = mul(x = var_6804_cast_fp16, y = var_6809_cast_fp16)[name = string("x_429_cast_fp16")]; string hidden_states_pad_type_0 = const()[name = string("hidden_states_pad_type_0"), val = string("valid")]; tensor hidden_states_strides_0 = const()[name = string("hidden_states_strides_0"), val = tensor([1, 1])]; tensor hidden_states_pad_0 = const()[name = string("hidden_states_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_dilations_0 = const()[name = string("hidden_states_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_groups_0 = const()[name = string("hidden_states_groups_0"), val = int32(1)]; tensor var_6791_to_fp16 = const()[name = string("op_6791_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(717448960)))]; tensor hidden_states_cast_fp16 = conv(dilations = hidden_states_dilations_0, groups = hidden_states_groups_0, pad = hidden_states_pad_0, pad_type = hidden_states_pad_type_0, strides = hidden_states_strides_0, weight = var_6791_to_fp16, x = x_429_cast_fp16)[name = string("hidden_states_cast_fp16")]; tensor x_431_cast_fp16 = add(x = x_421_cast_fp16, y = hidden_states_cast_fp16)[name = string("x_431_cast_fp16")]; int32 var_6822 = const()[name = string("op_6822"), val = int32(1)]; fp16 const_244_promoted_to_fp16 = const()[name = string("const_244_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_6825_cast_fp16 = mul(x = x_431_cast_fp16, y = const_244_promoted_to_fp16)[name = string("op_6825_cast_fp16")]; bool x_433_interleave_0 = const()[name = string("x_433_interleave_0"), val = bool(false)]; tensor x_433_cast_fp16 = concat(axis = var_6822, interleave = x_433_interleave_0, values = (x_431_cast_fp16, var_6825_cast_fp16))[name = string("x_433_cast_fp16")]; tensor out_289_axes_0 = const()[name = string("out_289_axes_0"), val = tensor([1])]; fp16 var_6835_to_fp16 = const()[name = string("op_6835_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_289_cast_fp16 = layer_norm(axes = out_289_axes_0, epsilon = var_6835_to_fp16, x = x_433_cast_fp16)[name = string("out_289_cast_fp16")]; tensor layer_norm_weight_to_fp16 = const()[name = string("layer_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(725837632)))]; tensor out_291_cast_fp16 = mul(x = out_289_cast_fp16, y = layer_norm_weight_to_fp16)[name = string("out_291_cast_fp16")]; tensor var_6841_split_sizes_0 = const()[name = string("op_6841_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_6841_axis_0 = const()[name = string("op_6841_axis_0"), val = int32(1)]; tensor output, tensor var_6841_cast_fp16_1 = split(axis = var_6841_axis_0, split_sizes = var_6841_split_sizes_0, x = out_291_cast_fp16)[name = string("op_6841_cast_fp16")]; } -> (output); func length_32(tensor inputs_embeds, state> key_cache, tensor position_id, state> value_cache) { tensor var_260 = const()[name = string("op_260"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(726465280)))]; tensor position_ids_1 = add(x = var_260, y = position_id)[name = string("position_ids_1")]; int32 cos_batch_dims_0 = const()[name = string("cos_batch_dims_0"), val = int32(0)]; bool cos_validate_indices_0 = const()[name = string("cos_validate_indices_0"), val = bool(false)]; tensor layer_rope_emb_cos_cached_to_fp16 = const()[name = string("layer_rope_emb_cos_cached_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64)))]; int32 greater_equal_0_y_0 = const()[name = string("greater_equal_0_y_0"), val = int32(0)]; tensor greater_equal_0 = greater_equal(x = position_ids_1, y = greater_equal_0_y_0)[name = string("greater_equal_0")]; int32 slice_by_index_0 = const()[name = string("slice_by_index_0"), val = int32(32768)]; tensor add_0 = add(x = position_ids_1, y = slice_by_index_0)[name = string("add_0")]; tensor select_0 = select(a = position_ids_1, b = add_0, cond = greater_equal_0)[name = string("select_0")]; int32 cos_cast_fp16_axis_0 = const()[name = string("cos_cast_fp16_axis_0"), val = int32(0)]; tensor cos_cast_fp16 = gather(axis = cos_cast_fp16_axis_0, batch_dims = cos_batch_dims_0, indices = select_0, validate_indices = cos_validate_indices_0, x = layer_rope_emb_cos_cached_to_fp16)[name = string("cos_cast_fp16")]; int32 sin_batch_dims_0 = const()[name = string("sin_batch_dims_0"), val = int32(0)]; bool sin_validate_indices_0 = const()[name = string("sin_validate_indices_0"), val = bool(false)]; tensor layer_rope_emb_sin_cached_to_fp16 = const()[name = string("layer_rope_emb_sin_cached_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(4194432)))]; int32 sin_cast_fp16_axis_0 = const()[name = string("sin_cast_fp16_axis_0"), val = int32(0)]; tensor sin_cast_fp16 = gather(axis = sin_cast_fp16_axis_0, batch_dims = sin_batch_dims_0, indices = select_0, validate_indices = sin_validate_indices_0, x = layer_rope_emb_sin_cached_to_fp16)[name = string("sin_cast_fp16")]; tensor var_285 = const()[name = string("op_285"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(8388800)))]; tensor var_292_axes_0 = const()[name = string("op_292_axes_0"), val = tensor([2])]; tensor var_292 = expand_dims(axes = var_292_axes_0, x = position_ids_1)[name = string("op_292")]; tensor var_293 = greater(x = var_285, y = var_292)[name = string("op_293")]; tensor attention_mask_3_axes_0 = const()[name = string("attention_mask_3_axes_0"), val = tensor([1])]; string attention_mask_1_to_fp16_dtype_0 = const()[name = string("attention_mask_1_to_fp16_dtype_0"), val = string("fp16")]; tensor var_293_to_fp16 = cast(dtype = attention_mask_1_to_fp16_dtype_0, x = var_293)[name = string("cast_245")]; tensor attention_mask_3_cast_fp16 = expand_dims(axes = attention_mask_3_axes_0, x = var_293_to_fp16)[name = string("attention_mask_3_cast_fp16")]; fp16 var_301_promoted_to_fp16 = const()[name = string("op_301_promoted_to_fp16"), val = fp16(0x0p+0)]; tensor var_302_cast_fp16 = equal(x = attention_mask_3_cast_fp16, y = var_301_promoted_to_fp16)[name = string("op_302_cast_fp16")]; tensor var_303_after_broadcast_to_fp16 = const()[name = string("op_303_after_broadcast_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(726465472)))]; tensor attention_mask_cast_fp16 = select(a = attention_mask_3_cast_fp16, b = var_303_after_broadcast_to_fp16, cond = var_302_cast_fp16)[name = string("attention_mask_cast_fp16")]; tensor cos_1_perm_0 = const()[name = string("cos_1_perm_0"), val = tensor([0, -1, -2])]; tensor sin_1_perm_0 = const()[name = string("sin_1_perm_0"), val = tensor([0, -1, -2])]; tensor attn_mask_perm_0 = const()[name = string("attn_mask_perm_0"), val = tensor([0, 1, -1, -2])]; int32 var_318 = const()[name = string("op_318"), val = int32(1)]; fp16 const_4_promoted_to_fp16 = const()[name = string("const_4_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_321_cast_fp16 = mul(x = inputs_embeds, y = const_4_promoted_to_fp16)[name = string("op_321_cast_fp16")]; bool x_1_interleave_0 = const()[name = string("x_1_interleave_0"), val = bool(false)]; tensor x_1_cast_fp16 = concat(axis = var_318, interleave = x_1_interleave_0, values = (inputs_embeds, var_321_cast_fp16))[name = string("x_1_cast_fp16")]; tensor out_1_axes_0 = const()[name = string("out_1_axes_0"), val = tensor([1])]; fp16 var_331_to_fp16 = const()[name = string("op_331_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_1_cast_fp16 = layer_norm(axes = out_1_axes_0, epsilon = var_331_to_fp16, x = x_1_cast_fp16)[name = string("out_1_cast_fp16")]; tensor layer_layers_0_input_layernorm_weight_to_fp16 = const()[name = string("layer_layers_0_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(8401216)))]; tensor out_3_cast_fp16 = mul(x = out_1_cast_fp16, y = layer_layers_0_input_layernorm_weight_to_fp16)[name = string("out_3_cast_fp16")]; tensor var_337_split_sizes_0 = const()[name = string("op_337_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_337_axis_0 = const()[name = string("op_337_axis_0"), val = int32(1)]; tensor var_337_cast_fp16_0, tensor var_337_cast_fp16_1 = split(axis = var_337_axis_0, split_sizes = var_337_split_sizes_0, x = out_3_cast_fp16)[name = string("op_337_cast_fp16")]; tensor q_len_1 = const()[name = string("q_len_1"), val = tensor([32])]; string query_states_1_pad_type_0 = const()[name = string("query_states_1_pad_type_0"), val = string("valid")]; tensor query_states_1_strides_0 = const()[name = string("query_states_1_strides_0"), val = tensor([1, 1])]; tensor query_states_1_pad_0 = const()[name = string("query_states_1_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_states_1_dilations_0 = const()[name = string("query_states_1_dilations_0"), val = tensor([1, 1])]; int32 query_states_1_groups_0 = const()[name = string("query_states_1_groups_0"), val = int32(1)]; tensor var_359_to_fp16 = const()[name = string("op_359_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(8405376)))]; tensor query_states_1_cast_fp16 = conv(dilations = query_states_1_dilations_0, groups = query_states_1_groups_0, pad = query_states_1_pad_0, pad_type = query_states_1_pad_type_0, strides = query_states_1_strides_0, weight = var_359_to_fp16, x = var_337_cast_fp16_0)[name = string("query_states_1_cast_fp16")]; string key_states_1_pad_type_0 = const()[name = string("key_states_1_pad_type_0"), val = string("valid")]; tensor key_states_1_strides_0 = const()[name = string("key_states_1_strides_0"), val = tensor([1, 1])]; tensor key_states_1_pad_0 = const()[name = string("key_states_1_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_states_1_dilations_0 = const()[name = string("key_states_1_dilations_0"), val = tensor([1, 1])]; int32 key_states_1_groups_0 = const()[name = string("key_states_1_groups_0"), val = int32(1)]; tensor var_370_to_fp16 = const()[name = string("op_370_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(10502592)))]; tensor key_states_1_cast_fp16 = conv(dilations = key_states_1_dilations_0, groups = key_states_1_groups_0, pad = key_states_1_pad_0, pad_type = key_states_1_pad_type_0, strides = key_states_1_strides_0, weight = var_370_to_fp16, x = var_337_cast_fp16_0)[name = string("key_states_1_cast_fp16")]; string value_states_1_pad_type_0 = const()[name = string("value_states_1_pad_type_0"), val = string("valid")]; tensor value_states_1_strides_0 = const()[name = string("value_states_1_strides_0"), val = tensor([1, 1])]; tensor value_states_1_pad_0 = const()[name = string("value_states_1_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_states_1_dilations_0 = const()[name = string("value_states_1_dilations_0"), val = tensor([1, 1])]; int32 value_states_1_groups_0 = const()[name = string("value_states_1_groups_0"), val = int32(1)]; tensor var_381_to_fp16 = const()[name = string("op_381_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(10764800)))]; tensor value_states_1_cast_fp16 = conv(dilations = value_states_1_dilations_0, groups = value_states_1_groups_0, pad = value_states_1_pad_0, pad_type = value_states_1_pad_type_0, strides = value_states_1_strides_0, weight = var_381_to_fp16, x = var_337_cast_fp16_0)[name = string("value_states_1_cast_fp16")]; tensor var_389 = const()[name = string("op_389"), val = tensor([1, 16, 64, 32])]; tensor embed_1_cast_fp16 = reshape(shape = var_389, x = query_states_1_cast_fp16)[name = string("embed_1_cast_fp16")]; tensor var_393 = const()[name = string("op_393"), val = tensor([1, 2, 64, 32])]; tensor var_394_cast_fp16 = reshape(shape = var_393, x = key_states_1_cast_fp16)[name = string("op_394_cast_fp16")]; tensor embed_3_perm_0 = const()[name = string("embed_3_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_400 = const()[name = string("op_400"), val = tensor([1, 2, 64, 32])]; tensor var_401_cast_fp16 = reshape(shape = var_400, x = value_states_1_cast_fp16)[name = string("op_401_cast_fp16")]; tensor value_states_3_perm_0 = const()[name = string("value_states_3_perm_0"), val = tensor([0, 1, 3, 2])]; tensor cos_1_cast_fp16 = transpose(perm = cos_1_perm_0, x = cos_cast_fp16)[name = string("transpose_74")]; tensor var_405_cast_fp16 = mul(x = embed_1_cast_fp16, y = cos_1_cast_fp16)[name = string("op_405_cast_fp16")]; tensor var_406_split_sizes_0 = const()[name = string("op_406_split_sizes_0"), val = tensor([32, 32])]; int32 var_406_axis_0 = const()[name = string("op_406_axis_0"), val = int32(-2)]; tensor var_406_cast_fp16_0, tensor var_406_cast_fp16_1 = split(axis = var_406_axis_0, split_sizes = var_406_split_sizes_0, x = embed_1_cast_fp16)[name = string("op_406_cast_fp16")]; fp16 const_7_promoted_to_fp16 = const()[name = string("const_7_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_408_cast_fp16 = mul(x = var_406_cast_fp16_1, y = const_7_promoted_to_fp16)[name = string("op_408_cast_fp16")]; int32 var_410 = const()[name = string("op_410"), val = int32(-2)]; bool var_411_interleave_0 = const()[name = string("op_411_interleave_0"), val = bool(false)]; tensor var_411_cast_fp16 = concat(axis = var_410, interleave = var_411_interleave_0, values = (var_408_cast_fp16, var_406_cast_fp16_0))[name = string("op_411_cast_fp16")]; tensor sin_1_cast_fp16 = transpose(perm = sin_1_perm_0, x = sin_cast_fp16)[name = string("transpose_73")]; tensor var_412_cast_fp16 = mul(x = var_411_cast_fp16, y = sin_1_cast_fp16)[name = string("op_412_cast_fp16")]; tensor query_states_3_cast_fp16 = add(x = var_405_cast_fp16, y = var_412_cast_fp16)[name = string("query_states_3_cast_fp16")]; tensor embed_3_cast_fp16 = transpose(perm = embed_3_perm_0, x = var_394_cast_fp16)[name = string("transpose_71")]; tensor var_415_cast_fp16 = mul(x = embed_3_cast_fp16, y = cos_cast_fp16)[name = string("op_415_cast_fp16")]; tensor var_416_split_sizes_0 = const()[name = string("op_416_split_sizes_0"), val = tensor([32, 32])]; int32 var_416_axis_0 = const()[name = string("op_416_axis_0"), val = int32(-1)]; tensor var_416_cast_fp16_0, tensor var_416_cast_fp16_1 = split(axis = var_416_axis_0, split_sizes = var_416_split_sizes_0, x = embed_3_cast_fp16)[name = string("op_416_cast_fp16")]; fp16 const_8_promoted_to_fp16 = const()[name = string("const_8_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_418_cast_fp16 = mul(x = var_416_cast_fp16_1, y = const_8_promoted_to_fp16)[name = string("op_418_cast_fp16")]; int32 var_420 = const()[name = string("op_420"), val = int32(-1)]; bool var_421_interleave_0 = const()[name = string("op_421_interleave_0"), val = bool(false)]; tensor var_421_cast_fp16 = concat(axis = var_420, interleave = var_421_interleave_0, values = (var_418_cast_fp16, var_416_cast_fp16_0))[name = string("op_421_cast_fp16")]; tensor var_422_cast_fp16 = mul(x = var_421_cast_fp16, y = sin_cast_fp16)[name = string("op_422_cast_fp16")]; tensor key_states_3_cast_fp16 = add(x = var_415_cast_fp16, y = var_422_cast_fp16)[name = string("key_states_3_cast_fp16")]; tensor var_426 = add(x = position_id, y = q_len_1)[name = string("op_426")]; tensor read_state_0 = read_state(input = key_cache)[name = string("read_state_0")]; tensor expand_dims_1 = const()[name = string("expand_dims_1"), val = tensor([0])]; tensor expand_dims_2 = const()[name = string("expand_dims_2"), val = tensor([0])]; tensor expand_dims_4 = const()[name = string("expand_dims_4"), val = tensor([1])]; tensor concat_3_values3_0 = const()[name = string("concat_3_values3_0"), val = tensor([0])]; int32 concat_3_axis_0 = const()[name = string("concat_3_axis_0"), val = int32(0)]; bool concat_3_interleave_0 = const()[name = string("concat_3_interleave_0"), val = bool(false)]; tensor concat_3 = concat(axis = concat_3_axis_0, interleave = concat_3_interleave_0, values = (expand_dims_1, expand_dims_2, position_id, concat_3_values3_0))[name = string("concat_3")]; tensor concat_4_values1_0 = const()[name = string("concat_4_values1_0"), val = tensor([0])]; tensor concat_4_values3_0 = const()[name = string("concat_4_values3_0"), val = tensor([0])]; int32 concat_4_axis_0 = const()[name = string("concat_4_axis_0"), val = int32(0)]; bool concat_4_interleave_0 = const()[name = string("concat_4_interleave_0"), val = bool(false)]; tensor concat_4 = concat(axis = concat_4_axis_0, interleave = concat_4_interleave_0, values = (expand_dims_4, concat_4_values1_0, var_426, concat_4_values3_0))[name = string("concat_4")]; tensor key_cache_internal_tensor_assign_1_stride_0 = const()[name = string("key_cache_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1, 1])]; tensor key_cache_internal_tensor_assign_1_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false, true])]; tensor key_cache_internal_tensor_assign_1_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_1_end_mask_0"), val = tensor([false, true, false, true])]; tensor key_cache_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor key_cache_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_3, begin_mask = key_cache_internal_tensor_assign_1_begin_mask_0, end = concat_4, end_mask = key_cache_internal_tensor_assign_1_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_1_squeeze_mask_0, stride = key_cache_internal_tensor_assign_1_stride_0, update = key_states_3_cast_fp16, x = read_state_0)[name = string("key_cache_internal_tensor_assign_1_cast_fp16")]; write_state(data = key_cache_internal_tensor_assign_1_cast_fp16, input = key_cache)[name = string("coreml_update_state_310_write_state")]; tensor coreml_update_state_48 = read_state(input = key_cache)[name = string("coreml_update_state_310")]; tensor read_state_1 = read_state(input = value_cache)[name = string("read_state_1")]; tensor value_cache_internal_tensor_assign_1_stride_0 = const()[name = string("value_cache_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1, 1])]; tensor value_cache_internal_tensor_assign_1_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false, true])]; tensor value_cache_internal_tensor_assign_1_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_1_end_mask_0"), val = tensor([false, true, false, true])]; tensor value_cache_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor value_states_3_cast_fp16 = transpose(perm = value_states_3_perm_0, x = var_401_cast_fp16)[name = string("transpose_70")]; tensor value_cache_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_3, begin_mask = value_cache_internal_tensor_assign_1_begin_mask_0, end = concat_4, end_mask = value_cache_internal_tensor_assign_1_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_1_squeeze_mask_0, stride = value_cache_internal_tensor_assign_1_stride_0, update = value_states_3_cast_fp16, x = read_state_1)[name = string("value_cache_internal_tensor_assign_1_cast_fp16")]; write_state(data = value_cache_internal_tensor_assign_1_cast_fp16, input = value_cache)[name = string("coreml_update_state_311_write_state")]; tensor coreml_update_state_49 = read_state(input = value_cache)[name = string("coreml_update_state_311")]; tensor var_465_begin_0 = const()[name = string("op_465_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_465_end_0 = const()[name = string("op_465_end_0"), val = tensor([1, 2, 2048, 64])]; tensor var_465_end_mask_0 = const()[name = string("op_465_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_465_cast_fp16 = slice_by_index(begin = var_465_begin_0, end = var_465_end_0, end_mask = var_465_end_mask_0, x = coreml_update_state_48)[name = string("op_465_cast_fp16")]; tensor tile_0 = const()[name = string("tile_0"), val = tensor([1, 1])]; int32 var_468_axis_0 = const()[name = string("op_468_axis_0"), val = int32(1)]; tensor var_468_cast_fp16_0, tensor var_468_cast_fp16_1 = split(axis = var_468_axis_0, split_sizes = tile_0, x = var_465_cast_fp16)[name = string("op_468_cast_fp16")]; tensor var_475_begin_0 = const()[name = string("op_475_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_475_end_0 = const()[name = string("op_475_end_0"), val = tensor([1, 2, 2048, 64])]; tensor var_475_end_mask_0 = const()[name = string("op_475_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_475_cast_fp16 = slice_by_index(begin = var_475_begin_0, end = var_475_end_0, end_mask = var_475_end_mask_0, x = coreml_update_state_49)[name = string("op_475_cast_fp16")]; tensor tile_1 = const()[name = string("tile_1"), val = tensor([1, 1])]; int32 var_478_axis_0 = const()[name = string("op_478_axis_0"), val = int32(1)]; tensor var_478_cast_fp16_0, tensor var_478_cast_fp16_1 = split(axis = var_478_axis_0, split_sizes = tile_1, x = var_475_cast_fp16)[name = string("op_478_cast_fp16")]; tensor var_481_split_sizes_0 = const()[name = string("op_481_split_sizes_0"), val = tensor([8, 8])]; int32 var_481_axis_0 = const()[name = string("op_481_axis_0"), val = int32(1)]; tensor var_481_cast_fp16_0, tensor var_481_cast_fp16_1 = split(axis = var_481_axis_0, split_sizes = var_481_split_sizes_0, x = query_states_3_cast_fp16)[name = string("op_481_cast_fp16")]; bool attn_weights_1_transpose_x_0 = const()[name = string("attn_weights_1_transpose_x_0"), val = bool(false)]; bool attn_weights_1_transpose_y_0 = const()[name = string("attn_weights_1_transpose_y_0"), val = bool(false)]; tensor attn_weights_1_cast_fp16 = matmul(transpose_x = attn_weights_1_transpose_x_0, transpose_y = attn_weights_1_transpose_y_0, x = var_468_cast_fp16_0, y = var_481_cast_fp16_0)[name = string("attn_weights_1_cast_fp16")]; fp16 _inversed_attn_weights_3_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_3_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_3_cast_fp16 = mul(x = attn_weights_1_cast_fp16, y = _inversed_attn_weights_3_y_0_to_fp16)[name = string("_inversed_attn_weights_3_cast_fp16")]; tensor attn_mask_cast_fp16 = transpose(perm = attn_mask_perm_0, x = attention_mask_cast_fp16)[name = string("transpose_72")]; tensor attn_weights_5_cast_fp16 = add(x = _inversed_attn_weights_3_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_5_cast_fp16")]; int32 var_488 = const()[name = string("op_488"), val = int32(2)]; tensor attn_weights_7_cast_fp16 = softmax(axis = var_488, x = attn_weights_5_cast_fp16)[name = string("attn_weights_7_cast_fp16")]; bool var_494_transpose_x_1 = const()[name = string("op_494_transpose_x_1"), val = bool(true)]; bool var_494_transpose_y_1 = const()[name = string("op_494_transpose_y_1"), val = bool(false)]; tensor var_494_cast_fp16 = matmul(transpose_x = var_494_transpose_x_1, transpose_y = var_494_transpose_y_1, x = attn_weights_7_cast_fp16, y = var_478_cast_fp16_0)[name = string("op_494_cast_fp16")]; bool attn_weights_9_transpose_x_0 = const()[name = string("attn_weights_9_transpose_x_0"), val = bool(false)]; bool attn_weights_9_transpose_y_0 = const()[name = string("attn_weights_9_transpose_y_0"), val = bool(false)]; tensor attn_weights_9_cast_fp16 = matmul(transpose_x = attn_weights_9_transpose_x_0, transpose_y = attn_weights_9_transpose_y_0, x = var_468_cast_fp16_1, y = var_481_cast_fp16_1)[name = string("attn_weights_9_cast_fp16")]; fp16 _inversed_attn_weights_11_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_11_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_11_cast_fp16 = mul(x = attn_weights_9_cast_fp16, y = _inversed_attn_weights_11_y_0_to_fp16)[name = string("_inversed_attn_weights_11_cast_fp16")]; tensor attn_weights_13_cast_fp16 = add(x = _inversed_attn_weights_11_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_13_cast_fp16")]; int32 var_500 = const()[name = string("op_500"), val = int32(2)]; tensor attn_weights_15_cast_fp16 = softmax(axis = var_500, x = attn_weights_13_cast_fp16)[name = string("attn_weights_15_cast_fp16")]; bool attn_output_1_transpose_x_1 = const()[name = string("attn_output_1_transpose_x_1"), val = bool(true)]; bool attn_output_1_transpose_y_1 = const()[name = string("attn_output_1_transpose_y_1"), val = bool(false)]; tensor attn_output_1_cast_fp16 = matmul(transpose_x = attn_output_1_transpose_x_1, transpose_y = attn_output_1_transpose_y_1, x = attn_weights_15_cast_fp16, y = var_478_cast_fp16_1)[name = string("attn_output_1_cast_fp16")]; int32 var_508 = const()[name = string("op_508"), val = int32(1)]; bool attn_output_3_interleave_0 = const()[name = string("attn_output_3_interleave_0"), val = bool(false)]; tensor attn_output_3_cast_fp16 = concat(axis = var_508, interleave = attn_output_3_interleave_0, values = (var_494_cast_fp16, attn_output_1_cast_fp16))[name = string("attn_output_3_cast_fp16")]; tensor var_512_perm_0 = const()[name = string("op_512_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_517 = const()[name = string("op_517"), val = tensor([1, 1024, 1, 32])]; tensor var_512_cast_fp16 = transpose(perm = var_512_perm_0, x = attn_output_3_cast_fp16)[name = string("transpose_69")]; tensor x_5_cast_fp16 = reshape(shape = var_517, x = var_512_cast_fp16)[name = string("x_5_cast_fp16")]; string hidden_states_3_pad_type_0 = const()[name = string("hidden_states_3_pad_type_0"), val = string("valid")]; tensor hidden_states_3_strides_0 = const()[name = string("hidden_states_3_strides_0"), val = tensor([1, 1])]; tensor hidden_states_3_pad_0 = const()[name = string("hidden_states_3_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_3_dilations_0 = const()[name = string("hidden_states_3_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_3_groups_0 = const()[name = string("hidden_states_3_groups_0"), val = int32(1)]; tensor var_524_to_fp16 = const()[name = string("op_524_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(11027008)))]; tensor hidden_states_3_cast_fp16 = conv(dilations = hidden_states_3_dilations_0, groups = hidden_states_3_groups_0, pad = hidden_states_3_pad_0, pad_type = hidden_states_3_pad_type_0, strides = hidden_states_3_strides_0, weight = var_524_to_fp16, x = x_5_cast_fp16)[name = string("hidden_states_3_cast_fp16")]; tensor x_7_cast_fp16 = add(x = inputs_embeds, y = hidden_states_3_cast_fp16)[name = string("x_7_cast_fp16")]; int32 var_536 = const()[name = string("op_536"), val = int32(1)]; fp16 const_13_promoted_to_fp16 = const()[name = string("const_13_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_539_cast_fp16 = mul(x = x_7_cast_fp16, y = const_13_promoted_to_fp16)[name = string("op_539_cast_fp16")]; bool x_9_interleave_0 = const()[name = string("x_9_interleave_0"), val = bool(false)]; tensor x_9_cast_fp16 = concat(axis = var_536, interleave = x_9_interleave_0, values = (x_7_cast_fp16, var_539_cast_fp16))[name = string("x_9_cast_fp16")]; tensor out_7_axes_0 = const()[name = string("out_7_axes_0"), val = tensor([1])]; fp16 var_549_to_fp16 = const()[name = string("op_549_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_7_cast_fp16 = layer_norm(axes = out_7_axes_0, epsilon = var_549_to_fp16, x = x_9_cast_fp16)[name = string("out_7_cast_fp16")]; tensor layer_layers_0_post_attention_layernorm_weight_to_fp16 = const()[name = string("layer_layers_0_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(13124224)))]; tensor out_9_cast_fp16 = mul(x = out_7_cast_fp16, y = layer_layers_0_post_attention_layernorm_weight_to_fp16)[name = string("out_9_cast_fp16")]; tensor var_555_split_sizes_0 = const()[name = string("op_555_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_555_axis_0 = const()[name = string("op_555_axis_0"), val = int32(1)]; tensor var_555_cast_fp16_0, tensor var_555_cast_fp16_1 = split(axis = var_555_axis_0, split_sizes = var_555_split_sizes_0, x = out_9_cast_fp16)[name = string("op_555_cast_fp16")]; string input_1_pad_type_0 = const()[name = string("input_1_pad_type_0"), val = string("valid")]; tensor input_1_strides_0 = const()[name = string("input_1_strides_0"), val = tensor([1, 1])]; tensor input_1_pad_0 = const()[name = string("input_1_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_1_dilations_0 = const()[name = string("input_1_dilations_0"), val = tensor([1, 1])]; int32 input_1_groups_0 = const()[name = string("input_1_groups_0"), val = int32(1)]; tensor var_560_to_fp16 = const()[name = string("op_560_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(13128384)))]; tensor input_1_cast_fp16 = conv(dilations = input_1_dilations_0, groups = input_1_groups_0, pad = input_1_pad_0, pad_type = input_1_pad_type_0, strides = input_1_strides_0, weight = var_560_to_fp16, x = var_555_cast_fp16_0)[name = string("input_1_cast_fp16")]; tensor var_571_cast_fp16 = silu(x = input_1_cast_fp16)[name = string("op_571_cast_fp16")]; string var_576_pad_type_0 = const()[name = string("op_576_pad_type_0"), val = string("valid")]; tensor var_576_strides_0 = const()[name = string("op_576_strides_0"), val = tensor([1, 1])]; tensor var_576_pad_0 = const()[name = string("op_576_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_576_dilations_0 = const()[name = string("op_576_dilations_0"), val = tensor([1, 1])]; int32 var_576_groups_0 = const()[name = string("op_576_groups_0"), val = int32(1)]; tensor var_559_to_fp16 = const()[name = string("op_559_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(21517056)))]; tensor var_576_cast_fp16 = conv(dilations = var_576_dilations_0, groups = var_576_groups_0, pad = var_576_pad_0, pad_type = var_576_pad_type_0, strides = var_576_strides_0, weight = var_559_to_fp16, x = var_555_cast_fp16_0)[name = string("op_576_cast_fp16")]; tensor x_15_cast_fp16 = mul(x = var_571_cast_fp16, y = var_576_cast_fp16)[name = string("x_15_cast_fp16")]; string hidden_states_5_pad_type_0 = const()[name = string("hidden_states_5_pad_type_0"), val = string("valid")]; tensor hidden_states_5_strides_0 = const()[name = string("hidden_states_5_strides_0"), val = tensor([1, 1])]; tensor hidden_states_5_pad_0 = const()[name = string("hidden_states_5_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_5_dilations_0 = const()[name = string("hidden_states_5_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_5_groups_0 = const()[name = string("hidden_states_5_groups_0"), val = int32(1)]; tensor var_558_to_fp16 = const()[name = string("op_558_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(29905728)))]; tensor hidden_states_5_cast_fp16 = conv(dilations = hidden_states_5_dilations_0, groups = hidden_states_5_groups_0, pad = hidden_states_5_pad_0, pad_type = hidden_states_5_pad_type_0, strides = hidden_states_5_strides_0, weight = var_558_to_fp16, x = x_15_cast_fp16)[name = string("hidden_states_5_cast_fp16")]; tensor x_17_cast_fp16 = add(x = x_7_cast_fp16, y = hidden_states_5_cast_fp16)[name = string("x_17_cast_fp16")]; int32 var_589 = const()[name = string("op_589"), val = int32(1)]; fp16 const_14_promoted_to_fp16 = const()[name = string("const_14_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_592_cast_fp16 = mul(x = x_17_cast_fp16, y = const_14_promoted_to_fp16)[name = string("op_592_cast_fp16")]; bool x_19_interleave_0 = const()[name = string("x_19_interleave_0"), val = bool(false)]; tensor x_19_cast_fp16 = concat(axis = var_589, interleave = x_19_interleave_0, values = (x_17_cast_fp16, var_592_cast_fp16))[name = string("x_19_cast_fp16")]; tensor out_13_axes_0 = const()[name = string("out_13_axes_0"), val = tensor([1])]; fp16 var_602_to_fp16 = const()[name = string("op_602_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_13_cast_fp16 = layer_norm(axes = out_13_axes_0, epsilon = var_602_to_fp16, x = x_19_cast_fp16)[name = string("out_13_cast_fp16")]; tensor layer_layers_1_input_layernorm_weight_to_fp16 = const()[name = string("layer_layers_1_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(38294400)))]; tensor out_15_cast_fp16 = mul(x = out_13_cast_fp16, y = layer_layers_1_input_layernorm_weight_to_fp16)[name = string("out_15_cast_fp16")]; tensor var_608_split_sizes_0 = const()[name = string("op_608_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_608_axis_0 = const()[name = string("op_608_axis_0"), val = int32(1)]; tensor var_608_cast_fp16_0, tensor var_608_cast_fp16_1 = split(axis = var_608_axis_0, split_sizes = var_608_split_sizes_0, x = out_15_cast_fp16)[name = string("op_608_cast_fp16")]; string query_states_5_pad_type_0 = const()[name = string("query_states_5_pad_type_0"), val = string("valid")]; tensor query_states_5_strides_0 = const()[name = string("query_states_5_strides_0"), val = tensor([1, 1])]; tensor query_states_5_pad_0 = const()[name = string("query_states_5_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_states_5_dilations_0 = const()[name = string("query_states_5_dilations_0"), val = tensor([1, 1])]; int32 query_states_5_groups_0 = const()[name = string("query_states_5_groups_0"), val = int32(1)]; tensor var_630_to_fp16 = const()[name = string("op_630_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(38298560)))]; tensor query_states_5_cast_fp16 = conv(dilations = query_states_5_dilations_0, groups = query_states_5_groups_0, pad = query_states_5_pad_0, pad_type = query_states_5_pad_type_0, strides = query_states_5_strides_0, weight = var_630_to_fp16, x = var_608_cast_fp16_0)[name = string("query_states_5_cast_fp16")]; string key_states_5_pad_type_0 = const()[name = string("key_states_5_pad_type_0"), val = string("valid")]; tensor key_states_5_strides_0 = const()[name = string("key_states_5_strides_0"), val = tensor([1, 1])]; tensor key_states_5_pad_0 = const()[name = string("key_states_5_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_states_5_dilations_0 = const()[name = string("key_states_5_dilations_0"), val = tensor([1, 1])]; int32 key_states_5_groups_0 = const()[name = string("key_states_5_groups_0"), val = int32(1)]; tensor var_641_to_fp16 = const()[name = string("op_641_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(40395776)))]; tensor key_states_5_cast_fp16 = conv(dilations = key_states_5_dilations_0, groups = key_states_5_groups_0, pad = key_states_5_pad_0, pad_type = key_states_5_pad_type_0, strides = key_states_5_strides_0, weight = var_641_to_fp16, x = var_608_cast_fp16_0)[name = string("key_states_5_cast_fp16")]; string value_states_5_pad_type_0 = const()[name = string("value_states_5_pad_type_0"), val = string("valid")]; tensor value_states_5_strides_0 = const()[name = string("value_states_5_strides_0"), val = tensor([1, 1])]; tensor value_states_5_pad_0 = const()[name = string("value_states_5_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_states_5_dilations_0 = const()[name = string("value_states_5_dilations_0"), val = tensor([1, 1])]; int32 value_states_5_groups_0 = const()[name = string("value_states_5_groups_0"), val = int32(1)]; tensor var_652_to_fp16 = const()[name = string("op_652_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(40657984)))]; tensor value_states_5_cast_fp16 = conv(dilations = value_states_5_dilations_0, groups = value_states_5_groups_0, pad = value_states_5_pad_0, pad_type = value_states_5_pad_type_0, strides = value_states_5_strides_0, weight = var_652_to_fp16, x = var_608_cast_fp16_0)[name = string("value_states_5_cast_fp16")]; tensor var_660 = const()[name = string("op_660"), val = tensor([1, 16, 64, 32])]; tensor embed_5_cast_fp16 = reshape(shape = var_660, x = query_states_5_cast_fp16)[name = string("embed_5_cast_fp16")]; tensor var_664 = const()[name = string("op_664"), val = tensor([1, 2, 64, 32])]; tensor var_665_cast_fp16 = reshape(shape = var_664, x = key_states_5_cast_fp16)[name = string("op_665_cast_fp16")]; tensor embed_7_perm_0 = const()[name = string("embed_7_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_671 = const()[name = string("op_671"), val = tensor([1, 2, 64, 32])]; tensor var_672_cast_fp16 = reshape(shape = var_671, x = value_states_5_cast_fp16)[name = string("op_672_cast_fp16")]; tensor value_states_7_perm_0 = const()[name = string("value_states_7_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_676_cast_fp16 = mul(x = embed_5_cast_fp16, y = cos_1_cast_fp16)[name = string("op_676_cast_fp16")]; tensor var_677_split_sizes_0 = const()[name = string("op_677_split_sizes_0"), val = tensor([32, 32])]; int32 var_677_axis_0 = const()[name = string("op_677_axis_0"), val = int32(-2)]; tensor var_677_cast_fp16_0, tensor var_677_cast_fp16_1 = split(axis = var_677_axis_0, split_sizes = var_677_split_sizes_0, x = embed_5_cast_fp16)[name = string("op_677_cast_fp16")]; fp16 const_17_promoted_to_fp16 = const()[name = string("const_17_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_679_cast_fp16 = mul(x = var_677_cast_fp16_1, y = const_17_promoted_to_fp16)[name = string("op_679_cast_fp16")]; int32 var_681 = const()[name = string("op_681"), val = int32(-2)]; bool var_682_interleave_0 = const()[name = string("op_682_interleave_0"), val = bool(false)]; tensor var_682_cast_fp16 = concat(axis = var_681, interleave = var_682_interleave_0, values = (var_679_cast_fp16, var_677_cast_fp16_0))[name = string("op_682_cast_fp16")]; tensor var_683_cast_fp16 = mul(x = var_682_cast_fp16, y = sin_1_cast_fp16)[name = string("op_683_cast_fp16")]; tensor query_states_7_cast_fp16 = add(x = var_676_cast_fp16, y = var_683_cast_fp16)[name = string("query_states_7_cast_fp16")]; tensor embed_7_cast_fp16 = transpose(perm = embed_7_perm_0, x = var_665_cast_fp16)[name = string("transpose_68")]; tensor var_686_cast_fp16 = mul(x = embed_7_cast_fp16, y = cos_cast_fp16)[name = string("op_686_cast_fp16")]; tensor var_687_split_sizes_0 = const()[name = string("op_687_split_sizes_0"), val = tensor([32, 32])]; int32 var_687_axis_0 = const()[name = string("op_687_axis_0"), val = int32(-1)]; tensor var_687_cast_fp16_0, tensor var_687_cast_fp16_1 = split(axis = var_687_axis_0, split_sizes = var_687_split_sizes_0, x = embed_7_cast_fp16)[name = string("op_687_cast_fp16")]; fp16 const_18_promoted_to_fp16 = const()[name = string("const_18_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_689_cast_fp16 = mul(x = var_687_cast_fp16_1, y = const_18_promoted_to_fp16)[name = string("op_689_cast_fp16")]; int32 var_691 = const()[name = string("op_691"), val = int32(-1)]; bool var_692_interleave_0 = const()[name = string("op_692_interleave_0"), val = bool(false)]; tensor var_692_cast_fp16 = concat(axis = var_691, interleave = var_692_interleave_0, values = (var_689_cast_fp16, var_687_cast_fp16_0))[name = string("op_692_cast_fp16")]; tensor var_693_cast_fp16 = mul(x = var_692_cast_fp16, y = sin_cast_fp16)[name = string("op_693_cast_fp16")]; tensor key_states_7_cast_fp16 = add(x = var_686_cast_fp16, y = var_693_cast_fp16)[name = string("key_states_7_cast_fp16")]; tensor expand_dims_11 = const()[name = string("expand_dims_11"), val = tensor([1])]; tensor expand_dims_12 = const()[name = string("expand_dims_12"), val = tensor([0])]; tensor expand_dims_14 = const()[name = string("expand_dims_14"), val = tensor([2])]; tensor concat_11_values3_0 = const()[name = string("concat_11_values3_0"), val = tensor([0])]; int32 concat_11_axis_0 = const()[name = string("concat_11_axis_0"), val = int32(0)]; bool concat_11_interleave_0 = const()[name = string("concat_11_interleave_0"), val = bool(false)]; tensor concat_11 = concat(axis = concat_11_axis_0, interleave = concat_11_interleave_0, values = (expand_dims_11, expand_dims_12, position_id, concat_11_values3_0))[name = string("concat_11")]; tensor concat_12_values1_0 = const()[name = string("concat_12_values1_0"), val = tensor([0])]; tensor concat_12_values3_0 = const()[name = string("concat_12_values3_0"), val = tensor([0])]; int32 concat_12_axis_0 = const()[name = string("concat_12_axis_0"), val = int32(0)]; bool concat_12_interleave_0 = const()[name = string("concat_12_interleave_0"), val = bool(false)]; tensor concat_12 = concat(axis = concat_12_axis_0, interleave = concat_12_interleave_0, values = (expand_dims_14, concat_12_values1_0, var_426, concat_12_values3_0))[name = string("concat_12")]; tensor key_cache_internal_tensor_assign_2_stride_0 = const()[name = string("key_cache_internal_tensor_assign_2_stride_0"), val = tensor([1, 1, 1, 1])]; tensor key_cache_internal_tensor_assign_2_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_2_begin_mask_0"), val = tensor([false, false, false, true])]; tensor key_cache_internal_tensor_assign_2_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_2_end_mask_0"), val = tensor([false, true, false, true])]; tensor key_cache_internal_tensor_assign_2_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_2_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor key_cache_internal_tensor_assign_2_cast_fp16 = slice_update(begin = concat_11, begin_mask = key_cache_internal_tensor_assign_2_begin_mask_0, end = concat_12, end_mask = key_cache_internal_tensor_assign_2_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_2_squeeze_mask_0, stride = key_cache_internal_tensor_assign_2_stride_0, update = key_states_7_cast_fp16, x = coreml_update_state_48)[name = string("key_cache_internal_tensor_assign_2_cast_fp16")]; write_state(data = key_cache_internal_tensor_assign_2_cast_fp16, input = key_cache)[name = string("coreml_update_state_312_write_state")]; tensor coreml_update_state_50 = read_state(input = key_cache)[name = string("coreml_update_state_312")]; tensor value_cache_internal_tensor_assign_2_stride_0 = const()[name = string("value_cache_internal_tensor_assign_2_stride_0"), val = tensor([1, 1, 1, 1])]; tensor value_cache_internal_tensor_assign_2_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_2_begin_mask_0"), val = tensor([false, false, false, true])]; tensor value_cache_internal_tensor_assign_2_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_2_end_mask_0"), val = tensor([false, true, false, true])]; tensor value_cache_internal_tensor_assign_2_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_2_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor value_states_7_cast_fp16 = transpose(perm = value_states_7_perm_0, x = var_672_cast_fp16)[name = string("transpose_67")]; tensor value_cache_internal_tensor_assign_2_cast_fp16 = slice_update(begin = concat_11, begin_mask = value_cache_internal_tensor_assign_2_begin_mask_0, end = concat_12, end_mask = value_cache_internal_tensor_assign_2_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_2_squeeze_mask_0, stride = value_cache_internal_tensor_assign_2_stride_0, update = value_states_7_cast_fp16, x = coreml_update_state_49)[name = string("value_cache_internal_tensor_assign_2_cast_fp16")]; write_state(data = value_cache_internal_tensor_assign_2_cast_fp16, input = value_cache)[name = string("coreml_update_state_313_write_state")]; tensor coreml_update_state_51 = read_state(input = value_cache)[name = string("coreml_update_state_313")]; tensor var_736_begin_0 = const()[name = string("op_736_begin_0"), val = tensor([1, 0, 0, 0])]; tensor var_736_end_0 = const()[name = string("op_736_end_0"), val = tensor([2, 2, 2048, 64])]; tensor var_736_end_mask_0 = const()[name = string("op_736_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_736_cast_fp16 = slice_by_index(begin = var_736_begin_0, end = var_736_end_0, end_mask = var_736_end_mask_0, x = coreml_update_state_50)[name = string("op_736_cast_fp16")]; tensor tile_2 = const()[name = string("tile_2"), val = tensor([1, 1])]; int32 var_739_axis_0 = const()[name = string("op_739_axis_0"), val = int32(1)]; tensor var_739_cast_fp16_0, tensor var_739_cast_fp16_1 = split(axis = var_739_axis_0, split_sizes = tile_2, x = var_736_cast_fp16)[name = string("op_739_cast_fp16")]; tensor var_746_begin_0 = const()[name = string("op_746_begin_0"), val = tensor([1, 0, 0, 0])]; tensor var_746_end_0 = const()[name = string("op_746_end_0"), val = tensor([2, 2, 2048, 64])]; tensor var_746_end_mask_0 = const()[name = string("op_746_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_746_cast_fp16 = slice_by_index(begin = var_746_begin_0, end = var_746_end_0, end_mask = var_746_end_mask_0, x = coreml_update_state_51)[name = string("op_746_cast_fp16")]; tensor tile_3 = const()[name = string("tile_3"), val = tensor([1, 1])]; int32 var_749_axis_0 = const()[name = string("op_749_axis_0"), val = int32(1)]; tensor var_749_cast_fp16_0, tensor var_749_cast_fp16_1 = split(axis = var_749_axis_0, split_sizes = tile_3, x = var_746_cast_fp16)[name = string("op_749_cast_fp16")]; tensor var_752_split_sizes_0 = const()[name = string("op_752_split_sizes_0"), val = tensor([8, 8])]; int32 var_752_axis_0 = const()[name = string("op_752_axis_0"), val = int32(1)]; tensor var_752_cast_fp16_0, tensor var_752_cast_fp16_1 = split(axis = var_752_axis_0, split_sizes = var_752_split_sizes_0, x = query_states_7_cast_fp16)[name = string("op_752_cast_fp16")]; bool attn_weights_17_transpose_x_0 = const()[name = string("attn_weights_17_transpose_x_0"), val = bool(false)]; bool attn_weights_17_transpose_y_0 = const()[name = string("attn_weights_17_transpose_y_0"), val = bool(false)]; tensor attn_weights_17_cast_fp16 = matmul(transpose_x = attn_weights_17_transpose_x_0, transpose_y = attn_weights_17_transpose_y_0, x = var_739_cast_fp16_0, y = var_752_cast_fp16_0)[name = string("attn_weights_17_cast_fp16")]; fp16 _inversed_attn_weights_19_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_19_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_19_cast_fp16 = mul(x = attn_weights_17_cast_fp16, y = _inversed_attn_weights_19_y_0_to_fp16)[name = string("_inversed_attn_weights_19_cast_fp16")]; tensor attn_weights_21_cast_fp16 = add(x = _inversed_attn_weights_19_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_21_cast_fp16")]; int32 var_759 = const()[name = string("op_759"), val = int32(2)]; tensor attn_weights_23_cast_fp16 = softmax(axis = var_759, x = attn_weights_21_cast_fp16)[name = string("attn_weights_23_cast_fp16")]; bool var_765_transpose_x_1 = const()[name = string("op_765_transpose_x_1"), val = bool(true)]; bool var_765_transpose_y_1 = const()[name = string("op_765_transpose_y_1"), val = bool(false)]; tensor var_765_cast_fp16 = matmul(transpose_x = var_765_transpose_x_1, transpose_y = var_765_transpose_y_1, x = attn_weights_23_cast_fp16, y = var_749_cast_fp16_0)[name = string("op_765_cast_fp16")]; bool attn_weights_25_transpose_x_0 = const()[name = string("attn_weights_25_transpose_x_0"), val = bool(false)]; bool attn_weights_25_transpose_y_0 = const()[name = string("attn_weights_25_transpose_y_0"), val = bool(false)]; tensor attn_weights_25_cast_fp16 = matmul(transpose_x = attn_weights_25_transpose_x_0, transpose_y = attn_weights_25_transpose_y_0, x = var_739_cast_fp16_1, y = var_752_cast_fp16_1)[name = string("attn_weights_25_cast_fp16")]; fp16 _inversed_attn_weights_27_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_27_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_27_cast_fp16 = mul(x = attn_weights_25_cast_fp16, y = _inversed_attn_weights_27_y_0_to_fp16)[name = string("_inversed_attn_weights_27_cast_fp16")]; tensor attn_weights_29_cast_fp16 = add(x = _inversed_attn_weights_27_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_29_cast_fp16")]; int32 var_771 = const()[name = string("op_771"), val = int32(2)]; tensor attn_weights_31_cast_fp16 = softmax(axis = var_771, x = attn_weights_29_cast_fp16)[name = string("attn_weights_31_cast_fp16")]; bool attn_output_7_transpose_x_1 = const()[name = string("attn_output_7_transpose_x_1"), val = bool(true)]; bool attn_output_7_transpose_y_1 = const()[name = string("attn_output_7_transpose_y_1"), val = bool(false)]; tensor attn_output_7_cast_fp16 = matmul(transpose_x = attn_output_7_transpose_x_1, transpose_y = attn_output_7_transpose_y_1, x = attn_weights_31_cast_fp16, y = var_749_cast_fp16_1)[name = string("attn_output_7_cast_fp16")]; int32 var_779 = const()[name = string("op_779"), val = int32(1)]; bool attn_output_9_interleave_0 = const()[name = string("attn_output_9_interleave_0"), val = bool(false)]; tensor attn_output_9_cast_fp16 = concat(axis = var_779, interleave = attn_output_9_interleave_0, values = (var_765_cast_fp16, attn_output_7_cast_fp16))[name = string("attn_output_9_cast_fp16")]; tensor var_783_perm_0 = const()[name = string("op_783_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_788 = const()[name = string("op_788"), val = tensor([1, 1024, 1, 32])]; tensor var_783_cast_fp16 = transpose(perm = var_783_perm_0, x = attn_output_9_cast_fp16)[name = string("transpose_66")]; tensor x_23_cast_fp16 = reshape(shape = var_788, x = var_783_cast_fp16)[name = string("x_23_cast_fp16")]; string hidden_states_9_pad_type_0 = const()[name = string("hidden_states_9_pad_type_0"), val = string("valid")]; tensor hidden_states_9_strides_0 = const()[name = string("hidden_states_9_strides_0"), val = tensor([1, 1])]; tensor hidden_states_9_pad_0 = const()[name = string("hidden_states_9_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_9_dilations_0 = const()[name = string("hidden_states_9_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_9_groups_0 = const()[name = string("hidden_states_9_groups_0"), val = int32(1)]; tensor var_795_to_fp16 = const()[name = string("op_795_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(40920192)))]; tensor hidden_states_9_cast_fp16 = conv(dilations = hidden_states_9_dilations_0, groups = hidden_states_9_groups_0, pad = hidden_states_9_pad_0, pad_type = hidden_states_9_pad_type_0, strides = hidden_states_9_strides_0, weight = var_795_to_fp16, x = x_23_cast_fp16)[name = string("hidden_states_9_cast_fp16")]; tensor x_25_cast_fp16 = add(x = x_17_cast_fp16, y = hidden_states_9_cast_fp16)[name = string("x_25_cast_fp16")]; int32 var_807 = const()[name = string("op_807"), val = int32(1)]; fp16 const_23_promoted_to_fp16 = const()[name = string("const_23_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_810_cast_fp16 = mul(x = x_25_cast_fp16, y = const_23_promoted_to_fp16)[name = string("op_810_cast_fp16")]; bool x_27_interleave_0 = const()[name = string("x_27_interleave_0"), val = bool(false)]; tensor x_27_cast_fp16 = concat(axis = var_807, interleave = x_27_interleave_0, values = (x_25_cast_fp16, var_810_cast_fp16))[name = string("x_27_cast_fp16")]; tensor out_19_axes_0 = const()[name = string("out_19_axes_0"), val = tensor([1])]; fp16 var_820_to_fp16 = const()[name = string("op_820_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_19_cast_fp16 = layer_norm(axes = out_19_axes_0, epsilon = var_820_to_fp16, x = x_27_cast_fp16)[name = string("out_19_cast_fp16")]; tensor layer_layers_1_post_attention_layernorm_weight_to_fp16 = const()[name = string("layer_layers_1_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(43017408)))]; tensor out_21_cast_fp16 = mul(x = out_19_cast_fp16, y = layer_layers_1_post_attention_layernorm_weight_to_fp16)[name = string("out_21_cast_fp16")]; tensor var_826_split_sizes_0 = const()[name = string("op_826_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_826_axis_0 = const()[name = string("op_826_axis_0"), val = int32(1)]; tensor var_826_cast_fp16_0, tensor var_826_cast_fp16_1 = split(axis = var_826_axis_0, split_sizes = var_826_split_sizes_0, x = out_21_cast_fp16)[name = string("op_826_cast_fp16")]; string input_3_pad_type_0 = const()[name = string("input_3_pad_type_0"), val = string("valid")]; tensor input_3_strides_0 = const()[name = string("input_3_strides_0"), val = tensor([1, 1])]; tensor input_3_pad_0 = const()[name = string("input_3_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_3_dilations_0 = const()[name = string("input_3_dilations_0"), val = tensor([1, 1])]; int32 input_3_groups_0 = const()[name = string("input_3_groups_0"), val = int32(1)]; tensor var_831_to_fp16 = const()[name = string("op_831_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(43021568)))]; tensor input_3_cast_fp16 = conv(dilations = input_3_dilations_0, groups = input_3_groups_0, pad = input_3_pad_0, pad_type = input_3_pad_type_0, strides = input_3_strides_0, weight = var_831_to_fp16, x = var_826_cast_fp16_0)[name = string("input_3_cast_fp16")]; tensor var_842_cast_fp16 = silu(x = input_3_cast_fp16)[name = string("op_842_cast_fp16")]; string var_847_pad_type_0 = const()[name = string("op_847_pad_type_0"), val = string("valid")]; tensor var_847_strides_0 = const()[name = string("op_847_strides_0"), val = tensor([1, 1])]; tensor var_847_pad_0 = const()[name = string("op_847_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_847_dilations_0 = const()[name = string("op_847_dilations_0"), val = tensor([1, 1])]; int32 var_847_groups_0 = const()[name = string("op_847_groups_0"), val = int32(1)]; tensor var_830_to_fp16 = const()[name = string("op_830_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(51410240)))]; tensor var_847_cast_fp16 = conv(dilations = var_847_dilations_0, groups = var_847_groups_0, pad = var_847_pad_0, pad_type = var_847_pad_type_0, strides = var_847_strides_0, weight = var_830_to_fp16, x = var_826_cast_fp16_0)[name = string("op_847_cast_fp16")]; tensor x_33_cast_fp16 = mul(x = var_842_cast_fp16, y = var_847_cast_fp16)[name = string("x_33_cast_fp16")]; string hidden_states_11_pad_type_0 = const()[name = string("hidden_states_11_pad_type_0"), val = string("valid")]; tensor hidden_states_11_strides_0 = const()[name = string("hidden_states_11_strides_0"), val = tensor([1, 1])]; tensor hidden_states_11_pad_0 = const()[name = string("hidden_states_11_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_11_dilations_0 = const()[name = string("hidden_states_11_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_11_groups_0 = const()[name = string("hidden_states_11_groups_0"), val = int32(1)]; tensor var_829_to_fp16 = const()[name = string("op_829_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(59798912)))]; tensor hidden_states_11_cast_fp16 = conv(dilations = hidden_states_11_dilations_0, groups = hidden_states_11_groups_0, pad = hidden_states_11_pad_0, pad_type = hidden_states_11_pad_type_0, strides = hidden_states_11_strides_0, weight = var_829_to_fp16, x = x_33_cast_fp16)[name = string("hidden_states_11_cast_fp16")]; tensor x_35_cast_fp16 = add(x = x_25_cast_fp16, y = hidden_states_11_cast_fp16)[name = string("x_35_cast_fp16")]; int32 var_860 = const()[name = string("op_860"), val = int32(1)]; fp16 const_24_promoted_to_fp16 = const()[name = string("const_24_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_863_cast_fp16 = mul(x = x_35_cast_fp16, y = const_24_promoted_to_fp16)[name = string("op_863_cast_fp16")]; bool x_37_interleave_0 = const()[name = string("x_37_interleave_0"), val = bool(false)]; tensor x_37_cast_fp16 = concat(axis = var_860, interleave = x_37_interleave_0, values = (x_35_cast_fp16, var_863_cast_fp16))[name = string("x_37_cast_fp16")]; tensor out_25_axes_0 = const()[name = string("out_25_axes_0"), val = tensor([1])]; fp16 var_873_to_fp16 = const()[name = string("op_873_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_25_cast_fp16 = layer_norm(axes = out_25_axes_0, epsilon = var_873_to_fp16, x = x_37_cast_fp16)[name = string("out_25_cast_fp16")]; tensor layer_layers_2_input_layernorm_weight_to_fp16 = const()[name = string("layer_layers_2_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(68187584)))]; tensor out_27_cast_fp16 = mul(x = out_25_cast_fp16, y = layer_layers_2_input_layernorm_weight_to_fp16)[name = string("out_27_cast_fp16")]; tensor var_879_split_sizes_0 = const()[name = string("op_879_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_879_axis_0 = const()[name = string("op_879_axis_0"), val = int32(1)]; tensor var_879_cast_fp16_0, tensor var_879_cast_fp16_1 = split(axis = var_879_axis_0, split_sizes = var_879_split_sizes_0, x = out_27_cast_fp16)[name = string("op_879_cast_fp16")]; string query_states_9_pad_type_0 = const()[name = string("query_states_9_pad_type_0"), val = string("valid")]; tensor query_states_9_strides_0 = const()[name = string("query_states_9_strides_0"), val = tensor([1, 1])]; tensor query_states_9_pad_0 = const()[name = string("query_states_9_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_states_9_dilations_0 = const()[name = string("query_states_9_dilations_0"), val = tensor([1, 1])]; int32 query_states_9_groups_0 = const()[name = string("query_states_9_groups_0"), val = int32(1)]; tensor var_901_to_fp16 = const()[name = string("op_901_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(68191744)))]; tensor query_states_9_cast_fp16 = conv(dilations = query_states_9_dilations_0, groups = query_states_9_groups_0, pad = query_states_9_pad_0, pad_type = query_states_9_pad_type_0, strides = query_states_9_strides_0, weight = var_901_to_fp16, x = var_879_cast_fp16_0)[name = string("query_states_9_cast_fp16")]; string key_states_9_pad_type_0 = const()[name = string("key_states_9_pad_type_0"), val = string("valid")]; tensor key_states_9_strides_0 = const()[name = string("key_states_9_strides_0"), val = tensor([1, 1])]; tensor key_states_9_pad_0 = const()[name = string("key_states_9_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_states_9_dilations_0 = const()[name = string("key_states_9_dilations_0"), val = tensor([1, 1])]; int32 key_states_9_groups_0 = const()[name = string("key_states_9_groups_0"), val = int32(1)]; tensor var_912_to_fp16 = const()[name = string("op_912_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(70288960)))]; tensor key_states_9_cast_fp16 = conv(dilations = key_states_9_dilations_0, groups = key_states_9_groups_0, pad = key_states_9_pad_0, pad_type = key_states_9_pad_type_0, strides = key_states_9_strides_0, weight = var_912_to_fp16, x = var_879_cast_fp16_0)[name = string("key_states_9_cast_fp16")]; string value_states_9_pad_type_0 = const()[name = string("value_states_9_pad_type_0"), val = string("valid")]; tensor value_states_9_strides_0 = const()[name = string("value_states_9_strides_0"), val = tensor([1, 1])]; tensor value_states_9_pad_0 = const()[name = string("value_states_9_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_states_9_dilations_0 = const()[name = string("value_states_9_dilations_0"), val = tensor([1, 1])]; int32 value_states_9_groups_0 = const()[name = string("value_states_9_groups_0"), val = int32(1)]; tensor var_923_to_fp16 = const()[name = string("op_923_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(70551168)))]; tensor value_states_9_cast_fp16 = conv(dilations = value_states_9_dilations_0, groups = value_states_9_groups_0, pad = value_states_9_pad_0, pad_type = value_states_9_pad_type_0, strides = value_states_9_strides_0, weight = var_923_to_fp16, x = var_879_cast_fp16_0)[name = string("value_states_9_cast_fp16")]; tensor var_931 = const()[name = string("op_931"), val = tensor([1, 16, 64, 32])]; tensor embed_9_cast_fp16 = reshape(shape = var_931, x = query_states_9_cast_fp16)[name = string("embed_9_cast_fp16")]; tensor var_935 = const()[name = string("op_935"), val = tensor([1, 2, 64, 32])]; tensor var_936_cast_fp16 = reshape(shape = var_935, x = key_states_9_cast_fp16)[name = string("op_936_cast_fp16")]; tensor embed_11_perm_0 = const()[name = string("embed_11_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_942 = const()[name = string("op_942"), val = tensor([1, 2, 64, 32])]; tensor var_943_cast_fp16 = reshape(shape = var_942, x = value_states_9_cast_fp16)[name = string("op_943_cast_fp16")]; tensor value_states_11_perm_0 = const()[name = string("value_states_11_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_947_cast_fp16 = mul(x = embed_9_cast_fp16, y = cos_1_cast_fp16)[name = string("op_947_cast_fp16")]; tensor var_948_split_sizes_0 = const()[name = string("op_948_split_sizes_0"), val = tensor([32, 32])]; int32 var_948_axis_0 = const()[name = string("op_948_axis_0"), val = int32(-2)]; tensor var_948_cast_fp16_0, tensor var_948_cast_fp16_1 = split(axis = var_948_axis_0, split_sizes = var_948_split_sizes_0, x = embed_9_cast_fp16)[name = string("op_948_cast_fp16")]; fp16 const_27_promoted_to_fp16 = const()[name = string("const_27_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_950_cast_fp16 = mul(x = var_948_cast_fp16_1, y = const_27_promoted_to_fp16)[name = string("op_950_cast_fp16")]; int32 var_952 = const()[name = string("op_952"), val = int32(-2)]; bool var_953_interleave_0 = const()[name = string("op_953_interleave_0"), val = bool(false)]; tensor var_953_cast_fp16 = concat(axis = var_952, interleave = var_953_interleave_0, values = (var_950_cast_fp16, var_948_cast_fp16_0))[name = string("op_953_cast_fp16")]; tensor var_954_cast_fp16 = mul(x = var_953_cast_fp16, y = sin_1_cast_fp16)[name = string("op_954_cast_fp16")]; tensor query_states_11_cast_fp16 = add(x = var_947_cast_fp16, y = var_954_cast_fp16)[name = string("query_states_11_cast_fp16")]; tensor embed_11_cast_fp16 = transpose(perm = embed_11_perm_0, x = var_936_cast_fp16)[name = string("transpose_65")]; tensor var_957_cast_fp16 = mul(x = embed_11_cast_fp16, y = cos_cast_fp16)[name = string("op_957_cast_fp16")]; tensor var_958_split_sizes_0 = const()[name = string("op_958_split_sizes_0"), val = tensor([32, 32])]; int32 var_958_axis_0 = const()[name = string("op_958_axis_0"), val = int32(-1)]; tensor var_958_cast_fp16_0, tensor var_958_cast_fp16_1 = split(axis = var_958_axis_0, split_sizes = var_958_split_sizes_0, x = embed_11_cast_fp16)[name = string("op_958_cast_fp16")]; fp16 const_28_promoted_to_fp16 = const()[name = string("const_28_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_960_cast_fp16 = mul(x = var_958_cast_fp16_1, y = const_28_promoted_to_fp16)[name = string("op_960_cast_fp16")]; int32 var_962 = const()[name = string("op_962"), val = int32(-1)]; bool var_963_interleave_0 = const()[name = string("op_963_interleave_0"), val = bool(false)]; tensor var_963_cast_fp16 = concat(axis = var_962, interleave = var_963_interleave_0, values = (var_960_cast_fp16, var_958_cast_fp16_0))[name = string("op_963_cast_fp16")]; tensor var_964_cast_fp16 = mul(x = var_963_cast_fp16, y = sin_cast_fp16)[name = string("op_964_cast_fp16")]; tensor key_states_11_cast_fp16 = add(x = var_957_cast_fp16, y = var_964_cast_fp16)[name = string("key_states_11_cast_fp16")]; tensor expand_dims_21 = const()[name = string("expand_dims_21"), val = tensor([2])]; tensor expand_dims_22 = const()[name = string("expand_dims_22"), val = tensor([0])]; tensor expand_dims_24 = const()[name = string("expand_dims_24"), val = tensor([3])]; tensor concat_19_values3_0 = const()[name = string("concat_19_values3_0"), val = tensor([0])]; int32 concat_19_axis_0 = const()[name = string("concat_19_axis_0"), val = int32(0)]; bool concat_19_interleave_0 = const()[name = string("concat_19_interleave_0"), val = bool(false)]; tensor concat_19 = concat(axis = concat_19_axis_0, interleave = concat_19_interleave_0, values = (expand_dims_21, expand_dims_22, position_id, concat_19_values3_0))[name = string("concat_19")]; tensor concat_20_values1_0 = const()[name = string("concat_20_values1_0"), val = tensor([0])]; tensor concat_20_values3_0 = const()[name = string("concat_20_values3_0"), val = tensor([0])]; int32 concat_20_axis_0 = const()[name = string("concat_20_axis_0"), val = int32(0)]; bool concat_20_interleave_0 = const()[name = string("concat_20_interleave_0"), val = bool(false)]; tensor concat_20 = concat(axis = concat_20_axis_0, interleave = concat_20_interleave_0, values = (expand_dims_24, concat_20_values1_0, var_426, concat_20_values3_0))[name = string("concat_20")]; tensor key_cache_internal_tensor_assign_3_stride_0 = const()[name = string("key_cache_internal_tensor_assign_3_stride_0"), val = tensor([1, 1, 1, 1])]; tensor key_cache_internal_tensor_assign_3_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_3_begin_mask_0"), val = tensor([false, false, false, true])]; tensor key_cache_internal_tensor_assign_3_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_3_end_mask_0"), val = tensor([false, true, false, true])]; tensor key_cache_internal_tensor_assign_3_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_3_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor key_cache_internal_tensor_assign_3_cast_fp16 = slice_update(begin = concat_19, begin_mask = key_cache_internal_tensor_assign_3_begin_mask_0, end = concat_20, end_mask = key_cache_internal_tensor_assign_3_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_3_squeeze_mask_0, stride = key_cache_internal_tensor_assign_3_stride_0, update = key_states_11_cast_fp16, x = coreml_update_state_50)[name = string("key_cache_internal_tensor_assign_3_cast_fp16")]; write_state(data = key_cache_internal_tensor_assign_3_cast_fp16, input = key_cache)[name = string("coreml_update_state_314_write_state")]; tensor coreml_update_state_52 = read_state(input = key_cache)[name = string("coreml_update_state_314")]; tensor value_cache_internal_tensor_assign_3_stride_0 = const()[name = string("value_cache_internal_tensor_assign_3_stride_0"), val = tensor([1, 1, 1, 1])]; tensor value_cache_internal_tensor_assign_3_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_3_begin_mask_0"), val = tensor([false, false, false, true])]; tensor value_cache_internal_tensor_assign_3_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_3_end_mask_0"), val = tensor([false, true, false, true])]; tensor value_cache_internal_tensor_assign_3_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_3_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor value_states_11_cast_fp16 = transpose(perm = value_states_11_perm_0, x = var_943_cast_fp16)[name = string("transpose_64")]; tensor value_cache_internal_tensor_assign_3_cast_fp16 = slice_update(begin = concat_19, begin_mask = value_cache_internal_tensor_assign_3_begin_mask_0, end = concat_20, end_mask = value_cache_internal_tensor_assign_3_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_3_squeeze_mask_0, stride = value_cache_internal_tensor_assign_3_stride_0, update = value_states_11_cast_fp16, x = coreml_update_state_51)[name = string("value_cache_internal_tensor_assign_3_cast_fp16")]; write_state(data = value_cache_internal_tensor_assign_3_cast_fp16, input = value_cache)[name = string("coreml_update_state_315_write_state")]; tensor coreml_update_state_53 = read_state(input = value_cache)[name = string("coreml_update_state_315")]; tensor var_1007_begin_0 = const()[name = string("op_1007_begin_0"), val = tensor([2, 0, 0, 0])]; tensor var_1007_end_0 = const()[name = string("op_1007_end_0"), val = tensor([3, 2, 2048, 64])]; tensor var_1007_end_mask_0 = const()[name = string("op_1007_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_1007_cast_fp16 = slice_by_index(begin = var_1007_begin_0, end = var_1007_end_0, end_mask = var_1007_end_mask_0, x = coreml_update_state_52)[name = string("op_1007_cast_fp16")]; tensor tile_4 = const()[name = string("tile_4"), val = tensor([1, 1])]; int32 var_1010_axis_0 = const()[name = string("op_1010_axis_0"), val = int32(1)]; tensor var_1010_cast_fp16_0, tensor var_1010_cast_fp16_1 = split(axis = var_1010_axis_0, split_sizes = tile_4, x = var_1007_cast_fp16)[name = string("op_1010_cast_fp16")]; tensor var_1017_begin_0 = const()[name = string("op_1017_begin_0"), val = tensor([2, 0, 0, 0])]; tensor var_1017_end_0 = const()[name = string("op_1017_end_0"), val = tensor([3, 2, 2048, 64])]; tensor var_1017_end_mask_0 = const()[name = string("op_1017_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_1017_cast_fp16 = slice_by_index(begin = var_1017_begin_0, end = var_1017_end_0, end_mask = var_1017_end_mask_0, x = coreml_update_state_53)[name = string("op_1017_cast_fp16")]; tensor tile_5 = const()[name = string("tile_5"), val = tensor([1, 1])]; int32 var_1020_axis_0 = const()[name = string("op_1020_axis_0"), val = int32(1)]; tensor var_1020_cast_fp16_0, tensor var_1020_cast_fp16_1 = split(axis = var_1020_axis_0, split_sizes = tile_5, x = var_1017_cast_fp16)[name = string("op_1020_cast_fp16")]; tensor var_1023_split_sizes_0 = const()[name = string("op_1023_split_sizes_0"), val = tensor([8, 8])]; int32 var_1023_axis_0 = const()[name = string("op_1023_axis_0"), val = int32(1)]; tensor var_1023_cast_fp16_0, tensor var_1023_cast_fp16_1 = split(axis = var_1023_axis_0, split_sizes = var_1023_split_sizes_0, x = query_states_11_cast_fp16)[name = string("op_1023_cast_fp16")]; bool attn_weights_33_transpose_x_0 = const()[name = string("attn_weights_33_transpose_x_0"), val = bool(false)]; bool attn_weights_33_transpose_y_0 = const()[name = string("attn_weights_33_transpose_y_0"), val = bool(false)]; tensor attn_weights_33_cast_fp16 = matmul(transpose_x = attn_weights_33_transpose_x_0, transpose_y = attn_weights_33_transpose_y_0, x = var_1010_cast_fp16_0, y = var_1023_cast_fp16_0)[name = string("attn_weights_33_cast_fp16")]; fp16 _inversed_attn_weights_35_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_35_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_35_cast_fp16 = mul(x = attn_weights_33_cast_fp16, y = _inversed_attn_weights_35_y_0_to_fp16)[name = string("_inversed_attn_weights_35_cast_fp16")]; tensor attn_weights_37_cast_fp16 = add(x = _inversed_attn_weights_35_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_37_cast_fp16")]; int32 var_1030 = const()[name = string("op_1030"), val = int32(2)]; tensor attn_weights_39_cast_fp16 = softmax(axis = var_1030, x = attn_weights_37_cast_fp16)[name = string("attn_weights_39_cast_fp16")]; bool var_1036_transpose_x_1 = const()[name = string("op_1036_transpose_x_1"), val = bool(true)]; bool var_1036_transpose_y_1 = const()[name = string("op_1036_transpose_y_1"), val = bool(false)]; tensor var_1036_cast_fp16 = matmul(transpose_x = var_1036_transpose_x_1, transpose_y = var_1036_transpose_y_1, x = attn_weights_39_cast_fp16, y = var_1020_cast_fp16_0)[name = string("op_1036_cast_fp16")]; bool attn_weights_41_transpose_x_0 = const()[name = string("attn_weights_41_transpose_x_0"), val = bool(false)]; bool attn_weights_41_transpose_y_0 = const()[name = string("attn_weights_41_transpose_y_0"), val = bool(false)]; tensor attn_weights_41_cast_fp16 = matmul(transpose_x = attn_weights_41_transpose_x_0, transpose_y = attn_weights_41_transpose_y_0, x = var_1010_cast_fp16_1, y = var_1023_cast_fp16_1)[name = string("attn_weights_41_cast_fp16")]; fp16 _inversed_attn_weights_43_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_43_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_43_cast_fp16 = mul(x = attn_weights_41_cast_fp16, y = _inversed_attn_weights_43_y_0_to_fp16)[name = string("_inversed_attn_weights_43_cast_fp16")]; tensor attn_weights_45_cast_fp16 = add(x = _inversed_attn_weights_43_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_45_cast_fp16")]; int32 var_1042 = const()[name = string("op_1042"), val = int32(2)]; tensor attn_weights_47_cast_fp16 = softmax(axis = var_1042, x = attn_weights_45_cast_fp16)[name = string("attn_weights_47_cast_fp16")]; bool attn_output_13_transpose_x_1 = const()[name = string("attn_output_13_transpose_x_1"), val = bool(true)]; bool attn_output_13_transpose_y_1 = const()[name = string("attn_output_13_transpose_y_1"), val = bool(false)]; tensor attn_output_13_cast_fp16 = matmul(transpose_x = attn_output_13_transpose_x_1, transpose_y = attn_output_13_transpose_y_1, x = attn_weights_47_cast_fp16, y = var_1020_cast_fp16_1)[name = string("attn_output_13_cast_fp16")]; int32 var_1050 = const()[name = string("op_1050"), val = int32(1)]; bool attn_output_15_interleave_0 = const()[name = string("attn_output_15_interleave_0"), val = bool(false)]; tensor attn_output_15_cast_fp16 = concat(axis = var_1050, interleave = attn_output_15_interleave_0, values = (var_1036_cast_fp16, attn_output_13_cast_fp16))[name = string("attn_output_15_cast_fp16")]; tensor var_1054_perm_0 = const()[name = string("op_1054_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_1059 = const()[name = string("op_1059"), val = tensor([1, 1024, 1, 32])]; tensor var_1054_cast_fp16 = transpose(perm = var_1054_perm_0, x = attn_output_15_cast_fp16)[name = string("transpose_63")]; tensor x_41_cast_fp16 = reshape(shape = var_1059, x = var_1054_cast_fp16)[name = string("x_41_cast_fp16")]; string hidden_states_15_pad_type_0 = const()[name = string("hidden_states_15_pad_type_0"), val = string("valid")]; tensor hidden_states_15_strides_0 = const()[name = string("hidden_states_15_strides_0"), val = tensor([1, 1])]; tensor hidden_states_15_pad_0 = const()[name = string("hidden_states_15_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_15_dilations_0 = const()[name = string("hidden_states_15_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_15_groups_0 = const()[name = string("hidden_states_15_groups_0"), val = int32(1)]; tensor var_1066_to_fp16 = const()[name = string("op_1066_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(70813376)))]; tensor hidden_states_15_cast_fp16 = conv(dilations = hidden_states_15_dilations_0, groups = hidden_states_15_groups_0, pad = hidden_states_15_pad_0, pad_type = hidden_states_15_pad_type_0, strides = hidden_states_15_strides_0, weight = var_1066_to_fp16, x = x_41_cast_fp16)[name = string("hidden_states_15_cast_fp16")]; tensor x_43_cast_fp16 = add(x = x_35_cast_fp16, y = hidden_states_15_cast_fp16)[name = string("x_43_cast_fp16")]; int32 var_1078 = const()[name = string("op_1078"), val = int32(1)]; fp16 const_33_promoted_to_fp16 = const()[name = string("const_33_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_1081_cast_fp16 = mul(x = x_43_cast_fp16, y = const_33_promoted_to_fp16)[name = string("op_1081_cast_fp16")]; bool x_45_interleave_0 = const()[name = string("x_45_interleave_0"), val = bool(false)]; tensor x_45_cast_fp16 = concat(axis = var_1078, interleave = x_45_interleave_0, values = (x_43_cast_fp16, var_1081_cast_fp16))[name = string("x_45_cast_fp16")]; tensor out_31_axes_0 = const()[name = string("out_31_axes_0"), val = tensor([1])]; fp16 var_1091_to_fp16 = const()[name = string("op_1091_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_31_cast_fp16 = layer_norm(axes = out_31_axes_0, epsilon = var_1091_to_fp16, x = x_45_cast_fp16)[name = string("out_31_cast_fp16")]; tensor layer_layers_2_post_attention_layernorm_weight_to_fp16 = const()[name = string("layer_layers_2_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(72910592)))]; tensor out_33_cast_fp16 = mul(x = out_31_cast_fp16, y = layer_layers_2_post_attention_layernorm_weight_to_fp16)[name = string("out_33_cast_fp16")]; tensor var_1097_split_sizes_0 = const()[name = string("op_1097_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_1097_axis_0 = const()[name = string("op_1097_axis_0"), val = int32(1)]; tensor var_1097_cast_fp16_0, tensor var_1097_cast_fp16_1 = split(axis = var_1097_axis_0, split_sizes = var_1097_split_sizes_0, x = out_33_cast_fp16)[name = string("op_1097_cast_fp16")]; string input_5_pad_type_0 = const()[name = string("input_5_pad_type_0"), val = string("valid")]; tensor input_5_strides_0 = const()[name = string("input_5_strides_0"), val = tensor([1, 1])]; tensor input_5_pad_0 = const()[name = string("input_5_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_5_dilations_0 = const()[name = string("input_5_dilations_0"), val = tensor([1, 1])]; int32 input_5_groups_0 = const()[name = string("input_5_groups_0"), val = int32(1)]; tensor var_1102_to_fp16 = const()[name = string("op_1102_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(72914752)))]; tensor input_5_cast_fp16 = conv(dilations = input_5_dilations_0, groups = input_5_groups_0, pad = input_5_pad_0, pad_type = input_5_pad_type_0, strides = input_5_strides_0, weight = var_1102_to_fp16, x = var_1097_cast_fp16_0)[name = string("input_5_cast_fp16")]; tensor var_1113_cast_fp16 = silu(x = input_5_cast_fp16)[name = string("op_1113_cast_fp16")]; string var_1118_pad_type_0 = const()[name = string("op_1118_pad_type_0"), val = string("valid")]; tensor var_1118_strides_0 = const()[name = string("op_1118_strides_0"), val = tensor([1, 1])]; tensor var_1118_pad_0 = const()[name = string("op_1118_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_1118_dilations_0 = const()[name = string("op_1118_dilations_0"), val = tensor([1, 1])]; int32 var_1118_groups_0 = const()[name = string("op_1118_groups_0"), val = int32(1)]; tensor var_1101_to_fp16 = const()[name = string("op_1101_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(81303424)))]; tensor var_1118_cast_fp16 = conv(dilations = var_1118_dilations_0, groups = var_1118_groups_0, pad = var_1118_pad_0, pad_type = var_1118_pad_type_0, strides = var_1118_strides_0, weight = var_1101_to_fp16, x = var_1097_cast_fp16_0)[name = string("op_1118_cast_fp16")]; tensor x_51_cast_fp16 = mul(x = var_1113_cast_fp16, y = var_1118_cast_fp16)[name = string("x_51_cast_fp16")]; string hidden_states_17_pad_type_0 = const()[name = string("hidden_states_17_pad_type_0"), val = string("valid")]; tensor hidden_states_17_strides_0 = const()[name = string("hidden_states_17_strides_0"), val = tensor([1, 1])]; tensor hidden_states_17_pad_0 = const()[name = string("hidden_states_17_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_17_dilations_0 = const()[name = string("hidden_states_17_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_17_groups_0 = const()[name = string("hidden_states_17_groups_0"), val = int32(1)]; tensor var_1100_to_fp16 = const()[name = string("op_1100_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(89692096)))]; tensor hidden_states_17_cast_fp16 = conv(dilations = hidden_states_17_dilations_0, groups = hidden_states_17_groups_0, pad = hidden_states_17_pad_0, pad_type = hidden_states_17_pad_type_0, strides = hidden_states_17_strides_0, weight = var_1100_to_fp16, x = x_51_cast_fp16)[name = string("hidden_states_17_cast_fp16")]; tensor x_53_cast_fp16 = add(x = x_43_cast_fp16, y = hidden_states_17_cast_fp16)[name = string("x_53_cast_fp16")]; int32 var_1131 = const()[name = string("op_1131"), val = int32(1)]; fp16 const_34_promoted_to_fp16 = const()[name = string("const_34_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_1134_cast_fp16 = mul(x = x_53_cast_fp16, y = const_34_promoted_to_fp16)[name = string("op_1134_cast_fp16")]; bool x_55_interleave_0 = const()[name = string("x_55_interleave_0"), val = bool(false)]; tensor x_55_cast_fp16 = concat(axis = var_1131, interleave = x_55_interleave_0, values = (x_53_cast_fp16, var_1134_cast_fp16))[name = string("x_55_cast_fp16")]; tensor out_37_axes_0 = const()[name = string("out_37_axes_0"), val = tensor([1])]; fp16 var_1144_to_fp16 = const()[name = string("op_1144_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_37_cast_fp16 = layer_norm(axes = out_37_axes_0, epsilon = var_1144_to_fp16, x = x_55_cast_fp16)[name = string("out_37_cast_fp16")]; tensor layer_layers_3_input_layernorm_weight_to_fp16 = const()[name = string("layer_layers_3_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(98080768)))]; tensor out_39_cast_fp16 = mul(x = out_37_cast_fp16, y = layer_layers_3_input_layernorm_weight_to_fp16)[name = string("out_39_cast_fp16")]; tensor var_1150_split_sizes_0 = const()[name = string("op_1150_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_1150_axis_0 = const()[name = string("op_1150_axis_0"), val = int32(1)]; tensor var_1150_cast_fp16_0, tensor var_1150_cast_fp16_1 = split(axis = var_1150_axis_0, split_sizes = var_1150_split_sizes_0, x = out_39_cast_fp16)[name = string("op_1150_cast_fp16")]; string query_states_13_pad_type_0 = const()[name = string("query_states_13_pad_type_0"), val = string("valid")]; tensor query_states_13_strides_0 = const()[name = string("query_states_13_strides_0"), val = tensor([1, 1])]; tensor query_states_13_pad_0 = const()[name = string("query_states_13_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_states_13_dilations_0 = const()[name = string("query_states_13_dilations_0"), val = tensor([1, 1])]; int32 query_states_13_groups_0 = const()[name = string("query_states_13_groups_0"), val = int32(1)]; tensor var_1172_to_fp16 = const()[name = string("op_1172_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(98084928)))]; tensor query_states_13_cast_fp16 = conv(dilations = query_states_13_dilations_0, groups = query_states_13_groups_0, pad = query_states_13_pad_0, pad_type = query_states_13_pad_type_0, strides = query_states_13_strides_0, weight = var_1172_to_fp16, x = var_1150_cast_fp16_0)[name = string("query_states_13_cast_fp16")]; string key_states_13_pad_type_0 = const()[name = string("key_states_13_pad_type_0"), val = string("valid")]; tensor key_states_13_strides_0 = const()[name = string("key_states_13_strides_0"), val = tensor([1, 1])]; tensor key_states_13_pad_0 = const()[name = string("key_states_13_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_states_13_dilations_0 = const()[name = string("key_states_13_dilations_0"), val = tensor([1, 1])]; int32 key_states_13_groups_0 = const()[name = string("key_states_13_groups_0"), val = int32(1)]; tensor var_1183_to_fp16 = const()[name = string("op_1183_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(100182144)))]; tensor key_states_13_cast_fp16 = conv(dilations = key_states_13_dilations_0, groups = key_states_13_groups_0, pad = key_states_13_pad_0, pad_type = key_states_13_pad_type_0, strides = key_states_13_strides_0, weight = var_1183_to_fp16, x = var_1150_cast_fp16_0)[name = string("key_states_13_cast_fp16")]; string value_states_13_pad_type_0 = const()[name = string("value_states_13_pad_type_0"), val = string("valid")]; tensor value_states_13_strides_0 = const()[name = string("value_states_13_strides_0"), val = tensor([1, 1])]; tensor value_states_13_pad_0 = const()[name = string("value_states_13_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_states_13_dilations_0 = const()[name = string("value_states_13_dilations_0"), val = tensor([1, 1])]; int32 value_states_13_groups_0 = const()[name = string("value_states_13_groups_0"), val = int32(1)]; tensor var_1194_to_fp16 = const()[name = string("op_1194_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(100444352)))]; tensor value_states_13_cast_fp16 = conv(dilations = value_states_13_dilations_0, groups = value_states_13_groups_0, pad = value_states_13_pad_0, pad_type = value_states_13_pad_type_0, strides = value_states_13_strides_0, weight = var_1194_to_fp16, x = var_1150_cast_fp16_0)[name = string("value_states_13_cast_fp16")]; tensor var_1202 = const()[name = string("op_1202"), val = tensor([1, 16, 64, 32])]; tensor embed_13_cast_fp16 = reshape(shape = var_1202, x = query_states_13_cast_fp16)[name = string("embed_13_cast_fp16")]; tensor var_1206 = const()[name = string("op_1206"), val = tensor([1, 2, 64, 32])]; tensor var_1207_cast_fp16 = reshape(shape = var_1206, x = key_states_13_cast_fp16)[name = string("op_1207_cast_fp16")]; tensor embed_15_perm_0 = const()[name = string("embed_15_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_1213 = const()[name = string("op_1213"), val = tensor([1, 2, 64, 32])]; tensor var_1214_cast_fp16 = reshape(shape = var_1213, x = value_states_13_cast_fp16)[name = string("op_1214_cast_fp16")]; tensor value_states_15_perm_0 = const()[name = string("value_states_15_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_1218_cast_fp16 = mul(x = embed_13_cast_fp16, y = cos_1_cast_fp16)[name = string("op_1218_cast_fp16")]; tensor var_1219_split_sizes_0 = const()[name = string("op_1219_split_sizes_0"), val = tensor([32, 32])]; int32 var_1219_axis_0 = const()[name = string("op_1219_axis_0"), val = int32(-2)]; tensor var_1219_cast_fp16_0, tensor var_1219_cast_fp16_1 = split(axis = var_1219_axis_0, split_sizes = var_1219_split_sizes_0, x = embed_13_cast_fp16)[name = string("op_1219_cast_fp16")]; fp16 const_37_promoted_to_fp16 = const()[name = string("const_37_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_1221_cast_fp16 = mul(x = var_1219_cast_fp16_1, y = const_37_promoted_to_fp16)[name = string("op_1221_cast_fp16")]; int32 var_1223 = const()[name = string("op_1223"), val = int32(-2)]; bool var_1224_interleave_0 = const()[name = string("op_1224_interleave_0"), val = bool(false)]; tensor var_1224_cast_fp16 = concat(axis = var_1223, interleave = var_1224_interleave_0, values = (var_1221_cast_fp16, var_1219_cast_fp16_0))[name = string("op_1224_cast_fp16")]; tensor var_1225_cast_fp16 = mul(x = var_1224_cast_fp16, y = sin_1_cast_fp16)[name = string("op_1225_cast_fp16")]; tensor query_states_15_cast_fp16 = add(x = var_1218_cast_fp16, y = var_1225_cast_fp16)[name = string("query_states_15_cast_fp16")]; tensor embed_15_cast_fp16 = transpose(perm = embed_15_perm_0, x = var_1207_cast_fp16)[name = string("transpose_62")]; tensor var_1228_cast_fp16 = mul(x = embed_15_cast_fp16, y = cos_cast_fp16)[name = string("op_1228_cast_fp16")]; tensor var_1229_split_sizes_0 = const()[name = string("op_1229_split_sizes_0"), val = tensor([32, 32])]; int32 var_1229_axis_0 = const()[name = string("op_1229_axis_0"), val = int32(-1)]; tensor var_1229_cast_fp16_0, tensor var_1229_cast_fp16_1 = split(axis = var_1229_axis_0, split_sizes = var_1229_split_sizes_0, x = embed_15_cast_fp16)[name = string("op_1229_cast_fp16")]; fp16 const_38_promoted_to_fp16 = const()[name = string("const_38_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_1231_cast_fp16 = mul(x = var_1229_cast_fp16_1, y = const_38_promoted_to_fp16)[name = string("op_1231_cast_fp16")]; int32 var_1233 = const()[name = string("op_1233"), val = int32(-1)]; bool var_1234_interleave_0 = const()[name = string("op_1234_interleave_0"), val = bool(false)]; tensor var_1234_cast_fp16 = concat(axis = var_1233, interleave = var_1234_interleave_0, values = (var_1231_cast_fp16, var_1229_cast_fp16_0))[name = string("op_1234_cast_fp16")]; tensor var_1235_cast_fp16 = mul(x = var_1234_cast_fp16, y = sin_cast_fp16)[name = string("op_1235_cast_fp16")]; tensor key_states_15_cast_fp16 = add(x = var_1228_cast_fp16, y = var_1235_cast_fp16)[name = string("key_states_15_cast_fp16")]; tensor expand_dims_31 = const()[name = string("expand_dims_31"), val = tensor([3])]; tensor expand_dims_32 = const()[name = string("expand_dims_32"), val = tensor([0])]; tensor expand_dims_34 = const()[name = string("expand_dims_34"), val = tensor([4])]; tensor concat_27_values3_0 = const()[name = string("concat_27_values3_0"), val = tensor([0])]; int32 concat_27_axis_0 = const()[name = string("concat_27_axis_0"), val = int32(0)]; bool concat_27_interleave_0 = const()[name = string("concat_27_interleave_0"), val = bool(false)]; tensor concat_27 = concat(axis = concat_27_axis_0, interleave = concat_27_interleave_0, values = (expand_dims_31, expand_dims_32, position_id, concat_27_values3_0))[name = string("concat_27")]; tensor concat_28_values1_0 = const()[name = string("concat_28_values1_0"), val = tensor([0])]; tensor concat_28_values3_0 = const()[name = string("concat_28_values3_0"), val = tensor([0])]; int32 concat_28_axis_0 = const()[name = string("concat_28_axis_0"), val = int32(0)]; bool concat_28_interleave_0 = const()[name = string("concat_28_interleave_0"), val = bool(false)]; tensor concat_28 = concat(axis = concat_28_axis_0, interleave = concat_28_interleave_0, values = (expand_dims_34, concat_28_values1_0, var_426, concat_28_values3_0))[name = string("concat_28")]; tensor key_cache_internal_tensor_assign_4_stride_0 = const()[name = string("key_cache_internal_tensor_assign_4_stride_0"), val = tensor([1, 1, 1, 1])]; tensor key_cache_internal_tensor_assign_4_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_4_begin_mask_0"), val = tensor([false, false, false, true])]; tensor key_cache_internal_tensor_assign_4_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_4_end_mask_0"), val = tensor([false, true, false, true])]; tensor key_cache_internal_tensor_assign_4_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_4_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor key_cache_internal_tensor_assign_4_cast_fp16 = slice_update(begin = concat_27, begin_mask = key_cache_internal_tensor_assign_4_begin_mask_0, end = concat_28, end_mask = key_cache_internal_tensor_assign_4_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_4_squeeze_mask_0, stride = key_cache_internal_tensor_assign_4_stride_0, update = key_states_15_cast_fp16, x = coreml_update_state_52)[name = string("key_cache_internal_tensor_assign_4_cast_fp16")]; write_state(data = key_cache_internal_tensor_assign_4_cast_fp16, input = key_cache)[name = string("coreml_update_state_316_write_state")]; tensor coreml_update_state_54 = read_state(input = key_cache)[name = string("coreml_update_state_316")]; tensor value_cache_internal_tensor_assign_4_stride_0 = const()[name = string("value_cache_internal_tensor_assign_4_stride_0"), val = tensor([1, 1, 1, 1])]; tensor value_cache_internal_tensor_assign_4_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_4_begin_mask_0"), val = tensor([false, false, false, true])]; tensor value_cache_internal_tensor_assign_4_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_4_end_mask_0"), val = tensor([false, true, false, true])]; tensor value_cache_internal_tensor_assign_4_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_4_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor value_states_15_cast_fp16 = transpose(perm = value_states_15_perm_0, x = var_1214_cast_fp16)[name = string("transpose_61")]; tensor value_cache_internal_tensor_assign_4_cast_fp16 = slice_update(begin = concat_27, begin_mask = value_cache_internal_tensor_assign_4_begin_mask_0, end = concat_28, end_mask = value_cache_internal_tensor_assign_4_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_4_squeeze_mask_0, stride = value_cache_internal_tensor_assign_4_stride_0, update = value_states_15_cast_fp16, x = coreml_update_state_53)[name = string("value_cache_internal_tensor_assign_4_cast_fp16")]; write_state(data = value_cache_internal_tensor_assign_4_cast_fp16, input = value_cache)[name = string("coreml_update_state_317_write_state")]; tensor coreml_update_state_55 = read_state(input = value_cache)[name = string("coreml_update_state_317")]; tensor var_1278_begin_0 = const()[name = string("op_1278_begin_0"), val = tensor([3, 0, 0, 0])]; tensor var_1278_end_0 = const()[name = string("op_1278_end_0"), val = tensor([4, 2, 2048, 64])]; tensor var_1278_end_mask_0 = const()[name = string("op_1278_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_1278_cast_fp16 = slice_by_index(begin = var_1278_begin_0, end = var_1278_end_0, end_mask = var_1278_end_mask_0, x = coreml_update_state_54)[name = string("op_1278_cast_fp16")]; tensor tile_6 = const()[name = string("tile_6"), val = tensor([1, 1])]; int32 var_1281_axis_0 = const()[name = string("op_1281_axis_0"), val = int32(1)]; tensor var_1281_cast_fp16_0, tensor var_1281_cast_fp16_1 = split(axis = var_1281_axis_0, split_sizes = tile_6, x = var_1278_cast_fp16)[name = string("op_1281_cast_fp16")]; tensor var_1288_begin_0 = const()[name = string("op_1288_begin_0"), val = tensor([3, 0, 0, 0])]; tensor var_1288_end_0 = const()[name = string("op_1288_end_0"), val = tensor([4, 2, 2048, 64])]; tensor var_1288_end_mask_0 = const()[name = string("op_1288_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_1288_cast_fp16 = slice_by_index(begin = var_1288_begin_0, end = var_1288_end_0, end_mask = var_1288_end_mask_0, x = coreml_update_state_55)[name = string("op_1288_cast_fp16")]; tensor tile_7 = const()[name = string("tile_7"), val = tensor([1, 1])]; int32 var_1291_axis_0 = const()[name = string("op_1291_axis_0"), val = int32(1)]; tensor var_1291_cast_fp16_0, tensor var_1291_cast_fp16_1 = split(axis = var_1291_axis_0, split_sizes = tile_7, x = var_1288_cast_fp16)[name = string("op_1291_cast_fp16")]; tensor var_1294_split_sizes_0 = const()[name = string("op_1294_split_sizes_0"), val = tensor([8, 8])]; int32 var_1294_axis_0 = const()[name = string("op_1294_axis_0"), val = int32(1)]; tensor var_1294_cast_fp16_0, tensor var_1294_cast_fp16_1 = split(axis = var_1294_axis_0, split_sizes = var_1294_split_sizes_0, x = query_states_15_cast_fp16)[name = string("op_1294_cast_fp16")]; bool attn_weights_49_transpose_x_0 = const()[name = string("attn_weights_49_transpose_x_0"), val = bool(false)]; bool attn_weights_49_transpose_y_0 = const()[name = string("attn_weights_49_transpose_y_0"), val = bool(false)]; tensor attn_weights_49_cast_fp16 = matmul(transpose_x = attn_weights_49_transpose_x_0, transpose_y = attn_weights_49_transpose_y_0, x = var_1281_cast_fp16_0, y = var_1294_cast_fp16_0)[name = string("attn_weights_49_cast_fp16")]; fp16 _inversed_attn_weights_51_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_51_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_51_cast_fp16 = mul(x = attn_weights_49_cast_fp16, y = _inversed_attn_weights_51_y_0_to_fp16)[name = string("_inversed_attn_weights_51_cast_fp16")]; tensor attn_weights_53_cast_fp16 = add(x = _inversed_attn_weights_51_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_53_cast_fp16")]; int32 var_1301 = const()[name = string("op_1301"), val = int32(2)]; tensor attn_weights_55_cast_fp16 = softmax(axis = var_1301, x = attn_weights_53_cast_fp16)[name = string("attn_weights_55_cast_fp16")]; bool var_1307_transpose_x_1 = const()[name = string("op_1307_transpose_x_1"), val = bool(true)]; bool var_1307_transpose_y_1 = const()[name = string("op_1307_transpose_y_1"), val = bool(false)]; tensor var_1307_cast_fp16 = matmul(transpose_x = var_1307_transpose_x_1, transpose_y = var_1307_transpose_y_1, x = attn_weights_55_cast_fp16, y = var_1291_cast_fp16_0)[name = string("op_1307_cast_fp16")]; bool attn_weights_57_transpose_x_0 = const()[name = string("attn_weights_57_transpose_x_0"), val = bool(false)]; bool attn_weights_57_transpose_y_0 = const()[name = string("attn_weights_57_transpose_y_0"), val = bool(false)]; tensor attn_weights_57_cast_fp16 = matmul(transpose_x = attn_weights_57_transpose_x_0, transpose_y = attn_weights_57_transpose_y_0, x = var_1281_cast_fp16_1, y = var_1294_cast_fp16_1)[name = string("attn_weights_57_cast_fp16")]; fp16 _inversed_attn_weights_59_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_59_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_59_cast_fp16 = mul(x = attn_weights_57_cast_fp16, y = _inversed_attn_weights_59_y_0_to_fp16)[name = string("_inversed_attn_weights_59_cast_fp16")]; tensor attn_weights_61_cast_fp16 = add(x = _inversed_attn_weights_59_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_61_cast_fp16")]; int32 var_1313 = const()[name = string("op_1313"), val = int32(2)]; tensor attn_weights_63_cast_fp16 = softmax(axis = var_1313, x = attn_weights_61_cast_fp16)[name = string("attn_weights_63_cast_fp16")]; bool attn_output_19_transpose_x_1 = const()[name = string("attn_output_19_transpose_x_1"), val = bool(true)]; bool attn_output_19_transpose_y_1 = const()[name = string("attn_output_19_transpose_y_1"), val = bool(false)]; tensor attn_output_19_cast_fp16 = matmul(transpose_x = attn_output_19_transpose_x_1, transpose_y = attn_output_19_transpose_y_1, x = attn_weights_63_cast_fp16, y = var_1291_cast_fp16_1)[name = string("attn_output_19_cast_fp16")]; int32 var_1321 = const()[name = string("op_1321"), val = int32(1)]; bool attn_output_21_interleave_0 = const()[name = string("attn_output_21_interleave_0"), val = bool(false)]; tensor attn_output_21_cast_fp16 = concat(axis = var_1321, interleave = attn_output_21_interleave_0, values = (var_1307_cast_fp16, attn_output_19_cast_fp16))[name = string("attn_output_21_cast_fp16")]; tensor var_1325_perm_0 = const()[name = string("op_1325_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_1330 = const()[name = string("op_1330"), val = tensor([1, 1024, 1, 32])]; tensor var_1325_cast_fp16 = transpose(perm = var_1325_perm_0, x = attn_output_21_cast_fp16)[name = string("transpose_60")]; tensor x_59_cast_fp16 = reshape(shape = var_1330, x = var_1325_cast_fp16)[name = string("x_59_cast_fp16")]; string hidden_states_21_pad_type_0 = const()[name = string("hidden_states_21_pad_type_0"), val = string("valid")]; tensor hidden_states_21_strides_0 = const()[name = string("hidden_states_21_strides_0"), val = tensor([1, 1])]; tensor hidden_states_21_pad_0 = const()[name = string("hidden_states_21_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_21_dilations_0 = const()[name = string("hidden_states_21_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_21_groups_0 = const()[name = string("hidden_states_21_groups_0"), val = int32(1)]; tensor var_1337_to_fp16 = const()[name = string("op_1337_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(100706560)))]; tensor hidden_states_21_cast_fp16 = conv(dilations = hidden_states_21_dilations_0, groups = hidden_states_21_groups_0, pad = hidden_states_21_pad_0, pad_type = hidden_states_21_pad_type_0, strides = hidden_states_21_strides_0, weight = var_1337_to_fp16, x = x_59_cast_fp16)[name = string("hidden_states_21_cast_fp16")]; tensor x_61_cast_fp16 = add(x = x_53_cast_fp16, y = hidden_states_21_cast_fp16)[name = string("x_61_cast_fp16")]; int32 var_1349 = const()[name = string("op_1349"), val = int32(1)]; fp16 const_43_promoted_to_fp16 = const()[name = string("const_43_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_1352_cast_fp16 = mul(x = x_61_cast_fp16, y = const_43_promoted_to_fp16)[name = string("op_1352_cast_fp16")]; bool x_63_interleave_0 = const()[name = string("x_63_interleave_0"), val = bool(false)]; tensor x_63_cast_fp16 = concat(axis = var_1349, interleave = x_63_interleave_0, values = (x_61_cast_fp16, var_1352_cast_fp16))[name = string("x_63_cast_fp16")]; tensor out_43_axes_0 = const()[name = string("out_43_axes_0"), val = tensor([1])]; fp16 var_1362_to_fp16 = const()[name = string("op_1362_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_43_cast_fp16 = layer_norm(axes = out_43_axes_0, epsilon = var_1362_to_fp16, x = x_63_cast_fp16)[name = string("out_43_cast_fp16")]; tensor layer_layers_3_post_attention_layernorm_weight_to_fp16 = const()[name = string("layer_layers_3_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(102803776)))]; tensor out_45_cast_fp16 = mul(x = out_43_cast_fp16, y = layer_layers_3_post_attention_layernorm_weight_to_fp16)[name = string("out_45_cast_fp16")]; tensor var_1368_split_sizes_0 = const()[name = string("op_1368_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_1368_axis_0 = const()[name = string("op_1368_axis_0"), val = int32(1)]; tensor var_1368_cast_fp16_0, tensor var_1368_cast_fp16_1 = split(axis = var_1368_axis_0, split_sizes = var_1368_split_sizes_0, x = out_45_cast_fp16)[name = string("op_1368_cast_fp16")]; string input_7_pad_type_0 = const()[name = string("input_7_pad_type_0"), val = string("valid")]; tensor input_7_strides_0 = const()[name = string("input_7_strides_0"), val = tensor([1, 1])]; tensor input_7_pad_0 = const()[name = string("input_7_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_7_dilations_0 = const()[name = string("input_7_dilations_0"), val = tensor([1, 1])]; int32 input_7_groups_0 = const()[name = string("input_7_groups_0"), val = int32(1)]; tensor var_1373_to_fp16 = const()[name = string("op_1373_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(102807936)))]; tensor input_7_cast_fp16 = conv(dilations = input_7_dilations_0, groups = input_7_groups_0, pad = input_7_pad_0, pad_type = input_7_pad_type_0, strides = input_7_strides_0, weight = var_1373_to_fp16, x = var_1368_cast_fp16_0)[name = string("input_7_cast_fp16")]; tensor var_1384_cast_fp16 = silu(x = input_7_cast_fp16)[name = string("op_1384_cast_fp16")]; string var_1389_pad_type_0 = const()[name = string("op_1389_pad_type_0"), val = string("valid")]; tensor var_1389_strides_0 = const()[name = string("op_1389_strides_0"), val = tensor([1, 1])]; tensor var_1389_pad_0 = const()[name = string("op_1389_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_1389_dilations_0 = const()[name = string("op_1389_dilations_0"), val = tensor([1, 1])]; int32 var_1389_groups_0 = const()[name = string("op_1389_groups_0"), val = int32(1)]; tensor var_1372_to_fp16 = const()[name = string("op_1372_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(111196608)))]; tensor var_1389_cast_fp16 = conv(dilations = var_1389_dilations_0, groups = var_1389_groups_0, pad = var_1389_pad_0, pad_type = var_1389_pad_type_0, strides = var_1389_strides_0, weight = var_1372_to_fp16, x = var_1368_cast_fp16_0)[name = string("op_1389_cast_fp16")]; tensor x_69_cast_fp16 = mul(x = var_1384_cast_fp16, y = var_1389_cast_fp16)[name = string("x_69_cast_fp16")]; string hidden_states_23_pad_type_0 = const()[name = string("hidden_states_23_pad_type_0"), val = string("valid")]; tensor hidden_states_23_strides_0 = const()[name = string("hidden_states_23_strides_0"), val = tensor([1, 1])]; tensor hidden_states_23_pad_0 = const()[name = string("hidden_states_23_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_23_dilations_0 = const()[name = string("hidden_states_23_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_23_groups_0 = const()[name = string("hidden_states_23_groups_0"), val = int32(1)]; tensor var_1371_to_fp16 = const()[name = string("op_1371_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(119585280)))]; tensor hidden_states_23_cast_fp16 = conv(dilations = hidden_states_23_dilations_0, groups = hidden_states_23_groups_0, pad = hidden_states_23_pad_0, pad_type = hidden_states_23_pad_type_0, strides = hidden_states_23_strides_0, weight = var_1371_to_fp16, x = x_69_cast_fp16)[name = string("hidden_states_23_cast_fp16")]; tensor x_71_cast_fp16 = add(x = x_61_cast_fp16, y = hidden_states_23_cast_fp16)[name = string("x_71_cast_fp16")]; int32 var_1402 = const()[name = string("op_1402"), val = int32(1)]; fp16 const_44_promoted_to_fp16 = const()[name = string("const_44_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_1405_cast_fp16 = mul(x = x_71_cast_fp16, y = const_44_promoted_to_fp16)[name = string("op_1405_cast_fp16")]; bool x_73_interleave_0 = const()[name = string("x_73_interleave_0"), val = bool(false)]; tensor x_73_cast_fp16 = concat(axis = var_1402, interleave = x_73_interleave_0, values = (x_71_cast_fp16, var_1405_cast_fp16))[name = string("x_73_cast_fp16")]; tensor out_49_axes_0 = const()[name = string("out_49_axes_0"), val = tensor([1])]; fp16 var_1415_to_fp16 = const()[name = string("op_1415_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_49_cast_fp16 = layer_norm(axes = out_49_axes_0, epsilon = var_1415_to_fp16, x = x_73_cast_fp16)[name = string("out_49_cast_fp16")]; tensor layer_layers_4_input_layernorm_weight_to_fp16 = const()[name = string("layer_layers_4_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(127973952)))]; tensor out_51_cast_fp16 = mul(x = out_49_cast_fp16, y = layer_layers_4_input_layernorm_weight_to_fp16)[name = string("out_51_cast_fp16")]; tensor var_1421_split_sizes_0 = const()[name = string("op_1421_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_1421_axis_0 = const()[name = string("op_1421_axis_0"), val = int32(1)]; tensor var_1421_cast_fp16_0, tensor var_1421_cast_fp16_1 = split(axis = var_1421_axis_0, split_sizes = var_1421_split_sizes_0, x = out_51_cast_fp16)[name = string("op_1421_cast_fp16")]; string query_states_17_pad_type_0 = const()[name = string("query_states_17_pad_type_0"), val = string("valid")]; tensor query_states_17_strides_0 = const()[name = string("query_states_17_strides_0"), val = tensor([1, 1])]; tensor query_states_17_pad_0 = const()[name = string("query_states_17_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_states_17_dilations_0 = const()[name = string("query_states_17_dilations_0"), val = tensor([1, 1])]; int32 query_states_17_groups_0 = const()[name = string("query_states_17_groups_0"), val = int32(1)]; tensor var_1443_to_fp16 = const()[name = string("op_1443_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(127978112)))]; tensor query_states_17_cast_fp16 = conv(dilations = query_states_17_dilations_0, groups = query_states_17_groups_0, pad = query_states_17_pad_0, pad_type = query_states_17_pad_type_0, strides = query_states_17_strides_0, weight = var_1443_to_fp16, x = var_1421_cast_fp16_0)[name = string("query_states_17_cast_fp16")]; string key_states_17_pad_type_0 = const()[name = string("key_states_17_pad_type_0"), val = string("valid")]; tensor key_states_17_strides_0 = const()[name = string("key_states_17_strides_0"), val = tensor([1, 1])]; tensor key_states_17_pad_0 = const()[name = string("key_states_17_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_states_17_dilations_0 = const()[name = string("key_states_17_dilations_0"), val = tensor([1, 1])]; int32 key_states_17_groups_0 = const()[name = string("key_states_17_groups_0"), val = int32(1)]; tensor var_1454_to_fp16 = const()[name = string("op_1454_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(130075328)))]; tensor key_states_17_cast_fp16 = conv(dilations = key_states_17_dilations_0, groups = key_states_17_groups_0, pad = key_states_17_pad_0, pad_type = key_states_17_pad_type_0, strides = key_states_17_strides_0, weight = var_1454_to_fp16, x = var_1421_cast_fp16_0)[name = string("key_states_17_cast_fp16")]; string value_states_17_pad_type_0 = const()[name = string("value_states_17_pad_type_0"), val = string("valid")]; tensor value_states_17_strides_0 = const()[name = string("value_states_17_strides_0"), val = tensor([1, 1])]; tensor value_states_17_pad_0 = const()[name = string("value_states_17_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_states_17_dilations_0 = const()[name = string("value_states_17_dilations_0"), val = tensor([1, 1])]; int32 value_states_17_groups_0 = const()[name = string("value_states_17_groups_0"), val = int32(1)]; tensor var_1465_to_fp16 = const()[name = string("op_1465_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(130337536)))]; tensor value_states_17_cast_fp16 = conv(dilations = value_states_17_dilations_0, groups = value_states_17_groups_0, pad = value_states_17_pad_0, pad_type = value_states_17_pad_type_0, strides = value_states_17_strides_0, weight = var_1465_to_fp16, x = var_1421_cast_fp16_0)[name = string("value_states_17_cast_fp16")]; tensor var_1473 = const()[name = string("op_1473"), val = tensor([1, 16, 64, 32])]; tensor embed_17_cast_fp16 = reshape(shape = var_1473, x = query_states_17_cast_fp16)[name = string("embed_17_cast_fp16")]; tensor var_1477 = const()[name = string("op_1477"), val = tensor([1, 2, 64, 32])]; tensor var_1478_cast_fp16 = reshape(shape = var_1477, x = key_states_17_cast_fp16)[name = string("op_1478_cast_fp16")]; tensor embed_19_perm_0 = const()[name = string("embed_19_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_1484 = const()[name = string("op_1484"), val = tensor([1, 2, 64, 32])]; tensor var_1485_cast_fp16 = reshape(shape = var_1484, x = value_states_17_cast_fp16)[name = string("op_1485_cast_fp16")]; tensor value_states_19_perm_0 = const()[name = string("value_states_19_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_1489_cast_fp16 = mul(x = embed_17_cast_fp16, y = cos_1_cast_fp16)[name = string("op_1489_cast_fp16")]; tensor var_1490_split_sizes_0 = const()[name = string("op_1490_split_sizes_0"), val = tensor([32, 32])]; int32 var_1490_axis_0 = const()[name = string("op_1490_axis_0"), val = int32(-2)]; tensor var_1490_cast_fp16_0, tensor var_1490_cast_fp16_1 = split(axis = var_1490_axis_0, split_sizes = var_1490_split_sizes_0, x = embed_17_cast_fp16)[name = string("op_1490_cast_fp16")]; fp16 const_47_promoted_to_fp16 = const()[name = string("const_47_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_1492_cast_fp16 = mul(x = var_1490_cast_fp16_1, y = const_47_promoted_to_fp16)[name = string("op_1492_cast_fp16")]; int32 var_1494 = const()[name = string("op_1494"), val = int32(-2)]; bool var_1495_interleave_0 = const()[name = string("op_1495_interleave_0"), val = bool(false)]; tensor var_1495_cast_fp16 = concat(axis = var_1494, interleave = var_1495_interleave_0, values = (var_1492_cast_fp16, var_1490_cast_fp16_0))[name = string("op_1495_cast_fp16")]; tensor var_1496_cast_fp16 = mul(x = var_1495_cast_fp16, y = sin_1_cast_fp16)[name = string("op_1496_cast_fp16")]; tensor query_states_19_cast_fp16 = add(x = var_1489_cast_fp16, y = var_1496_cast_fp16)[name = string("query_states_19_cast_fp16")]; tensor embed_19_cast_fp16 = transpose(perm = embed_19_perm_0, x = var_1478_cast_fp16)[name = string("transpose_59")]; tensor var_1499_cast_fp16 = mul(x = embed_19_cast_fp16, y = cos_cast_fp16)[name = string("op_1499_cast_fp16")]; tensor var_1500_split_sizes_0 = const()[name = string("op_1500_split_sizes_0"), val = tensor([32, 32])]; int32 var_1500_axis_0 = const()[name = string("op_1500_axis_0"), val = int32(-1)]; tensor var_1500_cast_fp16_0, tensor var_1500_cast_fp16_1 = split(axis = var_1500_axis_0, split_sizes = var_1500_split_sizes_0, x = embed_19_cast_fp16)[name = string("op_1500_cast_fp16")]; fp16 const_48_promoted_to_fp16 = const()[name = string("const_48_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_1502_cast_fp16 = mul(x = var_1500_cast_fp16_1, y = const_48_promoted_to_fp16)[name = string("op_1502_cast_fp16")]; int32 var_1504 = const()[name = string("op_1504"), val = int32(-1)]; bool var_1505_interleave_0 = const()[name = string("op_1505_interleave_0"), val = bool(false)]; tensor var_1505_cast_fp16 = concat(axis = var_1504, interleave = var_1505_interleave_0, values = (var_1502_cast_fp16, var_1500_cast_fp16_0))[name = string("op_1505_cast_fp16")]; tensor var_1506_cast_fp16 = mul(x = var_1505_cast_fp16, y = sin_cast_fp16)[name = string("op_1506_cast_fp16")]; tensor key_states_19_cast_fp16 = add(x = var_1499_cast_fp16, y = var_1506_cast_fp16)[name = string("key_states_19_cast_fp16")]; tensor expand_dims_41 = const()[name = string("expand_dims_41"), val = tensor([4])]; tensor expand_dims_42 = const()[name = string("expand_dims_42"), val = tensor([0])]; tensor expand_dims_44 = const()[name = string("expand_dims_44"), val = tensor([5])]; tensor concat_35_values3_0 = const()[name = string("concat_35_values3_0"), val = tensor([0])]; int32 concat_35_axis_0 = const()[name = string("concat_35_axis_0"), val = int32(0)]; bool concat_35_interleave_0 = const()[name = string("concat_35_interleave_0"), val = bool(false)]; tensor concat_35 = concat(axis = concat_35_axis_0, interleave = concat_35_interleave_0, values = (expand_dims_41, expand_dims_42, position_id, concat_35_values3_0))[name = string("concat_35")]; tensor concat_36_values1_0 = const()[name = string("concat_36_values1_0"), val = tensor([0])]; tensor concat_36_values3_0 = const()[name = string("concat_36_values3_0"), val = tensor([0])]; int32 concat_36_axis_0 = const()[name = string("concat_36_axis_0"), val = int32(0)]; bool concat_36_interleave_0 = const()[name = string("concat_36_interleave_0"), val = bool(false)]; tensor concat_36 = concat(axis = concat_36_axis_0, interleave = concat_36_interleave_0, values = (expand_dims_44, concat_36_values1_0, var_426, concat_36_values3_0))[name = string("concat_36")]; tensor key_cache_internal_tensor_assign_5_stride_0 = const()[name = string("key_cache_internal_tensor_assign_5_stride_0"), val = tensor([1, 1, 1, 1])]; tensor key_cache_internal_tensor_assign_5_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_5_begin_mask_0"), val = tensor([false, false, false, true])]; tensor key_cache_internal_tensor_assign_5_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_5_end_mask_0"), val = tensor([false, true, false, true])]; tensor key_cache_internal_tensor_assign_5_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_5_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor key_cache_internal_tensor_assign_5_cast_fp16 = slice_update(begin = concat_35, begin_mask = key_cache_internal_tensor_assign_5_begin_mask_0, end = concat_36, end_mask = key_cache_internal_tensor_assign_5_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_5_squeeze_mask_0, stride = key_cache_internal_tensor_assign_5_stride_0, update = key_states_19_cast_fp16, x = coreml_update_state_54)[name = string("key_cache_internal_tensor_assign_5_cast_fp16")]; write_state(data = key_cache_internal_tensor_assign_5_cast_fp16, input = key_cache)[name = string("coreml_update_state_318_write_state")]; tensor coreml_update_state_56 = read_state(input = key_cache)[name = string("coreml_update_state_318")]; tensor value_cache_internal_tensor_assign_5_stride_0 = const()[name = string("value_cache_internal_tensor_assign_5_stride_0"), val = tensor([1, 1, 1, 1])]; tensor value_cache_internal_tensor_assign_5_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_5_begin_mask_0"), val = tensor([false, false, false, true])]; tensor value_cache_internal_tensor_assign_5_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_5_end_mask_0"), val = tensor([false, true, false, true])]; tensor value_cache_internal_tensor_assign_5_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_5_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor value_states_19_cast_fp16 = transpose(perm = value_states_19_perm_0, x = var_1485_cast_fp16)[name = string("transpose_58")]; tensor value_cache_internal_tensor_assign_5_cast_fp16 = slice_update(begin = concat_35, begin_mask = value_cache_internal_tensor_assign_5_begin_mask_0, end = concat_36, end_mask = value_cache_internal_tensor_assign_5_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_5_squeeze_mask_0, stride = value_cache_internal_tensor_assign_5_stride_0, update = value_states_19_cast_fp16, x = coreml_update_state_55)[name = string("value_cache_internal_tensor_assign_5_cast_fp16")]; write_state(data = value_cache_internal_tensor_assign_5_cast_fp16, input = value_cache)[name = string("coreml_update_state_319_write_state")]; tensor coreml_update_state_57 = read_state(input = value_cache)[name = string("coreml_update_state_319")]; tensor var_1549_begin_0 = const()[name = string("op_1549_begin_0"), val = tensor([4, 0, 0, 0])]; tensor var_1549_end_0 = const()[name = string("op_1549_end_0"), val = tensor([5, 2, 2048, 64])]; tensor var_1549_end_mask_0 = const()[name = string("op_1549_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_1549_cast_fp16 = slice_by_index(begin = var_1549_begin_0, end = var_1549_end_0, end_mask = var_1549_end_mask_0, x = coreml_update_state_56)[name = string("op_1549_cast_fp16")]; tensor tile_8 = const()[name = string("tile_8"), val = tensor([1, 1])]; int32 var_1552_axis_0 = const()[name = string("op_1552_axis_0"), val = int32(1)]; tensor var_1552_cast_fp16_0, tensor var_1552_cast_fp16_1 = split(axis = var_1552_axis_0, split_sizes = tile_8, x = var_1549_cast_fp16)[name = string("op_1552_cast_fp16")]; tensor var_1559_begin_0 = const()[name = string("op_1559_begin_0"), val = tensor([4, 0, 0, 0])]; tensor var_1559_end_0 = const()[name = string("op_1559_end_0"), val = tensor([5, 2, 2048, 64])]; tensor var_1559_end_mask_0 = const()[name = string("op_1559_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_1559_cast_fp16 = slice_by_index(begin = var_1559_begin_0, end = var_1559_end_0, end_mask = var_1559_end_mask_0, x = coreml_update_state_57)[name = string("op_1559_cast_fp16")]; tensor tile_9 = const()[name = string("tile_9"), val = tensor([1, 1])]; int32 var_1562_axis_0 = const()[name = string("op_1562_axis_0"), val = int32(1)]; tensor var_1562_cast_fp16_0, tensor var_1562_cast_fp16_1 = split(axis = var_1562_axis_0, split_sizes = tile_9, x = var_1559_cast_fp16)[name = string("op_1562_cast_fp16")]; tensor var_1565_split_sizes_0 = const()[name = string("op_1565_split_sizes_0"), val = tensor([8, 8])]; int32 var_1565_axis_0 = const()[name = string("op_1565_axis_0"), val = int32(1)]; tensor var_1565_cast_fp16_0, tensor var_1565_cast_fp16_1 = split(axis = var_1565_axis_0, split_sizes = var_1565_split_sizes_0, x = query_states_19_cast_fp16)[name = string("op_1565_cast_fp16")]; bool attn_weights_65_transpose_x_0 = const()[name = string("attn_weights_65_transpose_x_0"), val = bool(false)]; bool attn_weights_65_transpose_y_0 = const()[name = string("attn_weights_65_transpose_y_0"), val = bool(false)]; tensor attn_weights_65_cast_fp16 = matmul(transpose_x = attn_weights_65_transpose_x_0, transpose_y = attn_weights_65_transpose_y_0, x = var_1552_cast_fp16_0, y = var_1565_cast_fp16_0)[name = string("attn_weights_65_cast_fp16")]; fp16 _inversed_attn_weights_67_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_67_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_67_cast_fp16 = mul(x = attn_weights_65_cast_fp16, y = _inversed_attn_weights_67_y_0_to_fp16)[name = string("_inversed_attn_weights_67_cast_fp16")]; tensor attn_weights_69_cast_fp16 = add(x = _inversed_attn_weights_67_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_69_cast_fp16")]; int32 var_1572 = const()[name = string("op_1572"), val = int32(2)]; tensor attn_weights_71_cast_fp16 = softmax(axis = var_1572, x = attn_weights_69_cast_fp16)[name = string("attn_weights_71_cast_fp16")]; bool var_1578_transpose_x_1 = const()[name = string("op_1578_transpose_x_1"), val = bool(true)]; bool var_1578_transpose_y_1 = const()[name = string("op_1578_transpose_y_1"), val = bool(false)]; tensor var_1578_cast_fp16 = matmul(transpose_x = var_1578_transpose_x_1, transpose_y = var_1578_transpose_y_1, x = attn_weights_71_cast_fp16, y = var_1562_cast_fp16_0)[name = string("op_1578_cast_fp16")]; bool attn_weights_73_transpose_x_0 = const()[name = string("attn_weights_73_transpose_x_0"), val = bool(false)]; bool attn_weights_73_transpose_y_0 = const()[name = string("attn_weights_73_transpose_y_0"), val = bool(false)]; tensor attn_weights_73_cast_fp16 = matmul(transpose_x = attn_weights_73_transpose_x_0, transpose_y = attn_weights_73_transpose_y_0, x = var_1552_cast_fp16_1, y = var_1565_cast_fp16_1)[name = string("attn_weights_73_cast_fp16")]; fp16 _inversed_attn_weights_75_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_75_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_75_cast_fp16 = mul(x = attn_weights_73_cast_fp16, y = _inversed_attn_weights_75_y_0_to_fp16)[name = string("_inversed_attn_weights_75_cast_fp16")]; tensor attn_weights_77_cast_fp16 = add(x = _inversed_attn_weights_75_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_77_cast_fp16")]; int32 var_1584 = const()[name = string("op_1584"), val = int32(2)]; tensor attn_weights_79_cast_fp16 = softmax(axis = var_1584, x = attn_weights_77_cast_fp16)[name = string("attn_weights_79_cast_fp16")]; bool attn_output_25_transpose_x_1 = const()[name = string("attn_output_25_transpose_x_1"), val = bool(true)]; bool attn_output_25_transpose_y_1 = const()[name = string("attn_output_25_transpose_y_1"), val = bool(false)]; tensor attn_output_25_cast_fp16 = matmul(transpose_x = attn_output_25_transpose_x_1, transpose_y = attn_output_25_transpose_y_1, x = attn_weights_79_cast_fp16, y = var_1562_cast_fp16_1)[name = string("attn_output_25_cast_fp16")]; int32 var_1592 = const()[name = string("op_1592"), val = int32(1)]; bool attn_output_27_interleave_0 = const()[name = string("attn_output_27_interleave_0"), val = bool(false)]; tensor attn_output_27_cast_fp16 = concat(axis = var_1592, interleave = attn_output_27_interleave_0, values = (var_1578_cast_fp16, attn_output_25_cast_fp16))[name = string("attn_output_27_cast_fp16")]; tensor var_1596_perm_0 = const()[name = string("op_1596_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_1601 = const()[name = string("op_1601"), val = tensor([1, 1024, 1, 32])]; tensor var_1596_cast_fp16 = transpose(perm = var_1596_perm_0, x = attn_output_27_cast_fp16)[name = string("transpose_57")]; tensor x_77_cast_fp16 = reshape(shape = var_1601, x = var_1596_cast_fp16)[name = string("x_77_cast_fp16")]; string hidden_states_27_pad_type_0 = const()[name = string("hidden_states_27_pad_type_0"), val = string("valid")]; tensor hidden_states_27_strides_0 = const()[name = string("hidden_states_27_strides_0"), val = tensor([1, 1])]; tensor hidden_states_27_pad_0 = const()[name = string("hidden_states_27_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_27_dilations_0 = const()[name = string("hidden_states_27_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_27_groups_0 = const()[name = string("hidden_states_27_groups_0"), val = int32(1)]; tensor var_1608_to_fp16 = const()[name = string("op_1608_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(130599744)))]; tensor hidden_states_27_cast_fp16 = conv(dilations = hidden_states_27_dilations_0, groups = hidden_states_27_groups_0, pad = hidden_states_27_pad_0, pad_type = hidden_states_27_pad_type_0, strides = hidden_states_27_strides_0, weight = var_1608_to_fp16, x = x_77_cast_fp16)[name = string("hidden_states_27_cast_fp16")]; tensor x_79_cast_fp16 = add(x = x_71_cast_fp16, y = hidden_states_27_cast_fp16)[name = string("x_79_cast_fp16")]; int32 var_1620 = const()[name = string("op_1620"), val = int32(1)]; fp16 const_53_promoted_to_fp16 = const()[name = string("const_53_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_1623_cast_fp16 = mul(x = x_79_cast_fp16, y = const_53_promoted_to_fp16)[name = string("op_1623_cast_fp16")]; bool x_81_interleave_0 = const()[name = string("x_81_interleave_0"), val = bool(false)]; tensor x_81_cast_fp16 = concat(axis = var_1620, interleave = x_81_interleave_0, values = (x_79_cast_fp16, var_1623_cast_fp16))[name = string("x_81_cast_fp16")]; tensor out_55_axes_0 = const()[name = string("out_55_axes_0"), val = tensor([1])]; fp16 var_1633_to_fp16 = const()[name = string("op_1633_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_55_cast_fp16 = layer_norm(axes = out_55_axes_0, epsilon = var_1633_to_fp16, x = x_81_cast_fp16)[name = string("out_55_cast_fp16")]; tensor layer_layers_4_post_attention_layernorm_weight_to_fp16 = const()[name = string("layer_layers_4_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(132696960)))]; tensor out_57_cast_fp16 = mul(x = out_55_cast_fp16, y = layer_layers_4_post_attention_layernorm_weight_to_fp16)[name = string("out_57_cast_fp16")]; tensor var_1639_split_sizes_0 = const()[name = string("op_1639_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_1639_axis_0 = const()[name = string("op_1639_axis_0"), val = int32(1)]; tensor var_1639_cast_fp16_0, tensor var_1639_cast_fp16_1 = split(axis = var_1639_axis_0, split_sizes = var_1639_split_sizes_0, x = out_57_cast_fp16)[name = string("op_1639_cast_fp16")]; string input_9_pad_type_0 = const()[name = string("input_9_pad_type_0"), val = string("valid")]; tensor input_9_strides_0 = const()[name = string("input_9_strides_0"), val = tensor([1, 1])]; tensor input_9_pad_0 = const()[name = string("input_9_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_9_dilations_0 = const()[name = string("input_9_dilations_0"), val = tensor([1, 1])]; int32 input_9_groups_0 = const()[name = string("input_9_groups_0"), val = int32(1)]; tensor var_1644_to_fp16 = const()[name = string("op_1644_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(132701120)))]; tensor input_9_cast_fp16 = conv(dilations = input_9_dilations_0, groups = input_9_groups_0, pad = input_9_pad_0, pad_type = input_9_pad_type_0, strides = input_9_strides_0, weight = var_1644_to_fp16, x = var_1639_cast_fp16_0)[name = string("input_9_cast_fp16")]; tensor var_1655_cast_fp16 = silu(x = input_9_cast_fp16)[name = string("op_1655_cast_fp16")]; string var_1660_pad_type_0 = const()[name = string("op_1660_pad_type_0"), val = string("valid")]; tensor var_1660_strides_0 = const()[name = string("op_1660_strides_0"), val = tensor([1, 1])]; tensor var_1660_pad_0 = const()[name = string("op_1660_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_1660_dilations_0 = const()[name = string("op_1660_dilations_0"), val = tensor([1, 1])]; int32 var_1660_groups_0 = const()[name = string("op_1660_groups_0"), val = int32(1)]; tensor var_1643_to_fp16 = const()[name = string("op_1643_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(141089792)))]; tensor var_1660_cast_fp16 = conv(dilations = var_1660_dilations_0, groups = var_1660_groups_0, pad = var_1660_pad_0, pad_type = var_1660_pad_type_0, strides = var_1660_strides_0, weight = var_1643_to_fp16, x = var_1639_cast_fp16_0)[name = string("op_1660_cast_fp16")]; tensor x_87_cast_fp16 = mul(x = var_1655_cast_fp16, y = var_1660_cast_fp16)[name = string("x_87_cast_fp16")]; string hidden_states_29_pad_type_0 = const()[name = string("hidden_states_29_pad_type_0"), val = string("valid")]; tensor hidden_states_29_strides_0 = const()[name = string("hidden_states_29_strides_0"), val = tensor([1, 1])]; tensor hidden_states_29_pad_0 = const()[name = string("hidden_states_29_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_29_dilations_0 = const()[name = string("hidden_states_29_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_29_groups_0 = const()[name = string("hidden_states_29_groups_0"), val = int32(1)]; tensor var_1642_to_fp16 = const()[name = string("op_1642_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(149478464)))]; tensor hidden_states_29_cast_fp16 = conv(dilations = hidden_states_29_dilations_0, groups = hidden_states_29_groups_0, pad = hidden_states_29_pad_0, pad_type = hidden_states_29_pad_type_0, strides = hidden_states_29_strides_0, weight = var_1642_to_fp16, x = x_87_cast_fp16)[name = string("hidden_states_29_cast_fp16")]; tensor x_89_cast_fp16 = add(x = x_79_cast_fp16, y = hidden_states_29_cast_fp16)[name = string("x_89_cast_fp16")]; int32 var_1673 = const()[name = string("op_1673"), val = int32(1)]; fp16 const_54_promoted_to_fp16 = const()[name = string("const_54_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_1676_cast_fp16 = mul(x = x_89_cast_fp16, y = const_54_promoted_to_fp16)[name = string("op_1676_cast_fp16")]; bool x_91_interleave_0 = const()[name = string("x_91_interleave_0"), val = bool(false)]; tensor x_91_cast_fp16 = concat(axis = var_1673, interleave = x_91_interleave_0, values = (x_89_cast_fp16, var_1676_cast_fp16))[name = string("x_91_cast_fp16")]; tensor out_61_axes_0 = const()[name = string("out_61_axes_0"), val = tensor([1])]; fp16 var_1686_to_fp16 = const()[name = string("op_1686_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_61_cast_fp16 = layer_norm(axes = out_61_axes_0, epsilon = var_1686_to_fp16, x = x_91_cast_fp16)[name = string("out_61_cast_fp16")]; tensor layer_layers_5_input_layernorm_weight_to_fp16 = const()[name = string("layer_layers_5_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(157867136)))]; tensor out_63_cast_fp16 = mul(x = out_61_cast_fp16, y = layer_layers_5_input_layernorm_weight_to_fp16)[name = string("out_63_cast_fp16")]; tensor var_1692_split_sizes_0 = const()[name = string("op_1692_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_1692_axis_0 = const()[name = string("op_1692_axis_0"), val = int32(1)]; tensor var_1692_cast_fp16_0, tensor var_1692_cast_fp16_1 = split(axis = var_1692_axis_0, split_sizes = var_1692_split_sizes_0, x = out_63_cast_fp16)[name = string("op_1692_cast_fp16")]; string query_states_21_pad_type_0 = const()[name = string("query_states_21_pad_type_0"), val = string("valid")]; tensor query_states_21_strides_0 = const()[name = string("query_states_21_strides_0"), val = tensor([1, 1])]; tensor query_states_21_pad_0 = const()[name = string("query_states_21_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_states_21_dilations_0 = const()[name = string("query_states_21_dilations_0"), val = tensor([1, 1])]; int32 query_states_21_groups_0 = const()[name = string("query_states_21_groups_0"), val = int32(1)]; tensor var_1714_to_fp16 = const()[name = string("op_1714_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(157871296)))]; tensor query_states_21_cast_fp16 = conv(dilations = query_states_21_dilations_0, groups = query_states_21_groups_0, pad = query_states_21_pad_0, pad_type = query_states_21_pad_type_0, strides = query_states_21_strides_0, weight = var_1714_to_fp16, x = var_1692_cast_fp16_0)[name = string("query_states_21_cast_fp16")]; string key_states_21_pad_type_0 = const()[name = string("key_states_21_pad_type_0"), val = string("valid")]; tensor key_states_21_strides_0 = const()[name = string("key_states_21_strides_0"), val = tensor([1, 1])]; tensor key_states_21_pad_0 = const()[name = string("key_states_21_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_states_21_dilations_0 = const()[name = string("key_states_21_dilations_0"), val = tensor([1, 1])]; int32 key_states_21_groups_0 = const()[name = string("key_states_21_groups_0"), val = int32(1)]; tensor var_1725_to_fp16 = const()[name = string("op_1725_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(159968512)))]; tensor key_states_21_cast_fp16 = conv(dilations = key_states_21_dilations_0, groups = key_states_21_groups_0, pad = key_states_21_pad_0, pad_type = key_states_21_pad_type_0, strides = key_states_21_strides_0, weight = var_1725_to_fp16, x = var_1692_cast_fp16_0)[name = string("key_states_21_cast_fp16")]; string value_states_21_pad_type_0 = const()[name = string("value_states_21_pad_type_0"), val = string("valid")]; tensor value_states_21_strides_0 = const()[name = string("value_states_21_strides_0"), val = tensor([1, 1])]; tensor value_states_21_pad_0 = const()[name = string("value_states_21_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_states_21_dilations_0 = const()[name = string("value_states_21_dilations_0"), val = tensor([1, 1])]; int32 value_states_21_groups_0 = const()[name = string("value_states_21_groups_0"), val = int32(1)]; tensor var_1736_to_fp16 = const()[name = string("op_1736_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(160230720)))]; tensor value_states_21_cast_fp16 = conv(dilations = value_states_21_dilations_0, groups = value_states_21_groups_0, pad = value_states_21_pad_0, pad_type = value_states_21_pad_type_0, strides = value_states_21_strides_0, weight = var_1736_to_fp16, x = var_1692_cast_fp16_0)[name = string("value_states_21_cast_fp16")]; tensor var_1744 = const()[name = string("op_1744"), val = tensor([1, 16, 64, 32])]; tensor embed_21_cast_fp16 = reshape(shape = var_1744, x = query_states_21_cast_fp16)[name = string("embed_21_cast_fp16")]; tensor var_1748 = const()[name = string("op_1748"), val = tensor([1, 2, 64, 32])]; tensor var_1749_cast_fp16 = reshape(shape = var_1748, x = key_states_21_cast_fp16)[name = string("op_1749_cast_fp16")]; tensor embed_23_perm_0 = const()[name = string("embed_23_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_1755 = const()[name = string("op_1755"), val = tensor([1, 2, 64, 32])]; tensor var_1756_cast_fp16 = reshape(shape = var_1755, x = value_states_21_cast_fp16)[name = string("op_1756_cast_fp16")]; tensor value_states_23_perm_0 = const()[name = string("value_states_23_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_1760_cast_fp16 = mul(x = embed_21_cast_fp16, y = cos_1_cast_fp16)[name = string("op_1760_cast_fp16")]; tensor var_1761_split_sizes_0 = const()[name = string("op_1761_split_sizes_0"), val = tensor([32, 32])]; int32 var_1761_axis_0 = const()[name = string("op_1761_axis_0"), val = int32(-2)]; tensor var_1761_cast_fp16_0, tensor var_1761_cast_fp16_1 = split(axis = var_1761_axis_0, split_sizes = var_1761_split_sizes_0, x = embed_21_cast_fp16)[name = string("op_1761_cast_fp16")]; fp16 const_57_promoted_to_fp16 = const()[name = string("const_57_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_1763_cast_fp16 = mul(x = var_1761_cast_fp16_1, y = const_57_promoted_to_fp16)[name = string("op_1763_cast_fp16")]; int32 var_1765 = const()[name = string("op_1765"), val = int32(-2)]; bool var_1766_interleave_0 = const()[name = string("op_1766_interleave_0"), val = bool(false)]; tensor var_1766_cast_fp16 = concat(axis = var_1765, interleave = var_1766_interleave_0, values = (var_1763_cast_fp16, var_1761_cast_fp16_0))[name = string("op_1766_cast_fp16")]; tensor var_1767_cast_fp16 = mul(x = var_1766_cast_fp16, y = sin_1_cast_fp16)[name = string("op_1767_cast_fp16")]; tensor query_states_23_cast_fp16 = add(x = var_1760_cast_fp16, y = var_1767_cast_fp16)[name = string("query_states_23_cast_fp16")]; tensor embed_23_cast_fp16 = transpose(perm = embed_23_perm_0, x = var_1749_cast_fp16)[name = string("transpose_56")]; tensor var_1770_cast_fp16 = mul(x = embed_23_cast_fp16, y = cos_cast_fp16)[name = string("op_1770_cast_fp16")]; tensor var_1771_split_sizes_0 = const()[name = string("op_1771_split_sizes_0"), val = tensor([32, 32])]; int32 var_1771_axis_0 = const()[name = string("op_1771_axis_0"), val = int32(-1)]; tensor var_1771_cast_fp16_0, tensor var_1771_cast_fp16_1 = split(axis = var_1771_axis_0, split_sizes = var_1771_split_sizes_0, x = embed_23_cast_fp16)[name = string("op_1771_cast_fp16")]; fp16 const_58_promoted_to_fp16 = const()[name = string("const_58_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_1773_cast_fp16 = mul(x = var_1771_cast_fp16_1, y = const_58_promoted_to_fp16)[name = string("op_1773_cast_fp16")]; int32 var_1775 = const()[name = string("op_1775"), val = int32(-1)]; bool var_1776_interleave_0 = const()[name = string("op_1776_interleave_0"), val = bool(false)]; tensor var_1776_cast_fp16 = concat(axis = var_1775, interleave = var_1776_interleave_0, values = (var_1773_cast_fp16, var_1771_cast_fp16_0))[name = string("op_1776_cast_fp16")]; tensor var_1777_cast_fp16 = mul(x = var_1776_cast_fp16, y = sin_cast_fp16)[name = string("op_1777_cast_fp16")]; tensor key_states_23_cast_fp16 = add(x = var_1770_cast_fp16, y = var_1777_cast_fp16)[name = string("key_states_23_cast_fp16")]; tensor expand_dims_51 = const()[name = string("expand_dims_51"), val = tensor([5])]; tensor expand_dims_52 = const()[name = string("expand_dims_52"), val = tensor([0])]; tensor expand_dims_54 = const()[name = string("expand_dims_54"), val = tensor([6])]; tensor concat_43_values3_0 = const()[name = string("concat_43_values3_0"), val = tensor([0])]; int32 concat_43_axis_0 = const()[name = string("concat_43_axis_0"), val = int32(0)]; bool concat_43_interleave_0 = const()[name = string("concat_43_interleave_0"), val = bool(false)]; tensor concat_43 = concat(axis = concat_43_axis_0, interleave = concat_43_interleave_0, values = (expand_dims_51, expand_dims_52, position_id, concat_43_values3_0))[name = string("concat_43")]; tensor concat_44_values1_0 = const()[name = string("concat_44_values1_0"), val = tensor([0])]; tensor concat_44_values3_0 = const()[name = string("concat_44_values3_0"), val = tensor([0])]; int32 concat_44_axis_0 = const()[name = string("concat_44_axis_0"), val = int32(0)]; bool concat_44_interleave_0 = const()[name = string("concat_44_interleave_0"), val = bool(false)]; tensor concat_44 = concat(axis = concat_44_axis_0, interleave = concat_44_interleave_0, values = (expand_dims_54, concat_44_values1_0, var_426, concat_44_values3_0))[name = string("concat_44")]; tensor key_cache_internal_tensor_assign_6_stride_0 = const()[name = string("key_cache_internal_tensor_assign_6_stride_0"), val = tensor([1, 1, 1, 1])]; tensor key_cache_internal_tensor_assign_6_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_6_begin_mask_0"), val = tensor([false, false, false, true])]; tensor key_cache_internal_tensor_assign_6_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_6_end_mask_0"), val = tensor([false, true, false, true])]; tensor key_cache_internal_tensor_assign_6_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_6_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor key_cache_internal_tensor_assign_6_cast_fp16 = slice_update(begin = concat_43, begin_mask = key_cache_internal_tensor_assign_6_begin_mask_0, end = concat_44, end_mask = key_cache_internal_tensor_assign_6_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_6_squeeze_mask_0, stride = key_cache_internal_tensor_assign_6_stride_0, update = key_states_23_cast_fp16, x = coreml_update_state_56)[name = string("key_cache_internal_tensor_assign_6_cast_fp16")]; write_state(data = key_cache_internal_tensor_assign_6_cast_fp16, input = key_cache)[name = string("coreml_update_state_320_write_state")]; tensor coreml_update_state_58 = read_state(input = key_cache)[name = string("coreml_update_state_320")]; tensor value_cache_internal_tensor_assign_6_stride_0 = const()[name = string("value_cache_internal_tensor_assign_6_stride_0"), val = tensor([1, 1, 1, 1])]; tensor value_cache_internal_tensor_assign_6_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_6_begin_mask_0"), val = tensor([false, false, false, true])]; tensor value_cache_internal_tensor_assign_6_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_6_end_mask_0"), val = tensor([false, true, false, true])]; tensor value_cache_internal_tensor_assign_6_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_6_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor value_states_23_cast_fp16 = transpose(perm = value_states_23_perm_0, x = var_1756_cast_fp16)[name = string("transpose_55")]; tensor value_cache_internal_tensor_assign_6_cast_fp16 = slice_update(begin = concat_43, begin_mask = value_cache_internal_tensor_assign_6_begin_mask_0, end = concat_44, end_mask = value_cache_internal_tensor_assign_6_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_6_squeeze_mask_0, stride = value_cache_internal_tensor_assign_6_stride_0, update = value_states_23_cast_fp16, x = coreml_update_state_57)[name = string("value_cache_internal_tensor_assign_6_cast_fp16")]; write_state(data = value_cache_internal_tensor_assign_6_cast_fp16, input = value_cache)[name = string("coreml_update_state_321_write_state")]; tensor coreml_update_state_59 = read_state(input = value_cache)[name = string("coreml_update_state_321")]; tensor var_1820_begin_0 = const()[name = string("op_1820_begin_0"), val = tensor([5, 0, 0, 0])]; tensor var_1820_end_0 = const()[name = string("op_1820_end_0"), val = tensor([6, 2, 2048, 64])]; tensor var_1820_end_mask_0 = const()[name = string("op_1820_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_1820_cast_fp16 = slice_by_index(begin = var_1820_begin_0, end = var_1820_end_0, end_mask = var_1820_end_mask_0, x = coreml_update_state_58)[name = string("op_1820_cast_fp16")]; tensor tile_10 = const()[name = string("tile_10"), val = tensor([1, 1])]; int32 var_1823_axis_0 = const()[name = string("op_1823_axis_0"), val = int32(1)]; tensor var_1823_cast_fp16_0, tensor var_1823_cast_fp16_1 = split(axis = var_1823_axis_0, split_sizes = tile_10, x = var_1820_cast_fp16)[name = string("op_1823_cast_fp16")]; tensor var_1830_begin_0 = const()[name = string("op_1830_begin_0"), val = tensor([5, 0, 0, 0])]; tensor var_1830_end_0 = const()[name = string("op_1830_end_0"), val = tensor([6, 2, 2048, 64])]; tensor var_1830_end_mask_0 = const()[name = string("op_1830_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_1830_cast_fp16 = slice_by_index(begin = var_1830_begin_0, end = var_1830_end_0, end_mask = var_1830_end_mask_0, x = coreml_update_state_59)[name = string("op_1830_cast_fp16")]; tensor tile_11 = const()[name = string("tile_11"), val = tensor([1, 1])]; int32 var_1833_axis_0 = const()[name = string("op_1833_axis_0"), val = int32(1)]; tensor var_1833_cast_fp16_0, tensor var_1833_cast_fp16_1 = split(axis = var_1833_axis_0, split_sizes = tile_11, x = var_1830_cast_fp16)[name = string("op_1833_cast_fp16")]; tensor var_1836_split_sizes_0 = const()[name = string("op_1836_split_sizes_0"), val = tensor([8, 8])]; int32 var_1836_axis_0 = const()[name = string("op_1836_axis_0"), val = int32(1)]; tensor var_1836_cast_fp16_0, tensor var_1836_cast_fp16_1 = split(axis = var_1836_axis_0, split_sizes = var_1836_split_sizes_0, x = query_states_23_cast_fp16)[name = string("op_1836_cast_fp16")]; bool attn_weights_81_transpose_x_0 = const()[name = string("attn_weights_81_transpose_x_0"), val = bool(false)]; bool attn_weights_81_transpose_y_0 = const()[name = string("attn_weights_81_transpose_y_0"), val = bool(false)]; tensor attn_weights_81_cast_fp16 = matmul(transpose_x = attn_weights_81_transpose_x_0, transpose_y = attn_weights_81_transpose_y_0, x = var_1823_cast_fp16_0, y = var_1836_cast_fp16_0)[name = string("attn_weights_81_cast_fp16")]; fp16 _inversed_attn_weights_83_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_83_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_83_cast_fp16 = mul(x = attn_weights_81_cast_fp16, y = _inversed_attn_weights_83_y_0_to_fp16)[name = string("_inversed_attn_weights_83_cast_fp16")]; tensor attn_weights_85_cast_fp16 = add(x = _inversed_attn_weights_83_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_85_cast_fp16")]; int32 var_1843 = const()[name = string("op_1843"), val = int32(2)]; tensor attn_weights_87_cast_fp16 = softmax(axis = var_1843, x = attn_weights_85_cast_fp16)[name = string("attn_weights_87_cast_fp16")]; bool var_1849_transpose_x_1 = const()[name = string("op_1849_transpose_x_1"), val = bool(true)]; bool var_1849_transpose_y_1 = const()[name = string("op_1849_transpose_y_1"), val = bool(false)]; tensor var_1849_cast_fp16 = matmul(transpose_x = var_1849_transpose_x_1, transpose_y = var_1849_transpose_y_1, x = attn_weights_87_cast_fp16, y = var_1833_cast_fp16_0)[name = string("op_1849_cast_fp16")]; bool attn_weights_89_transpose_x_0 = const()[name = string("attn_weights_89_transpose_x_0"), val = bool(false)]; bool attn_weights_89_transpose_y_0 = const()[name = string("attn_weights_89_transpose_y_0"), val = bool(false)]; tensor attn_weights_89_cast_fp16 = matmul(transpose_x = attn_weights_89_transpose_x_0, transpose_y = attn_weights_89_transpose_y_0, x = var_1823_cast_fp16_1, y = var_1836_cast_fp16_1)[name = string("attn_weights_89_cast_fp16")]; fp16 _inversed_attn_weights_91_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_91_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_91_cast_fp16 = mul(x = attn_weights_89_cast_fp16, y = _inversed_attn_weights_91_y_0_to_fp16)[name = string("_inversed_attn_weights_91_cast_fp16")]; tensor attn_weights_93_cast_fp16 = add(x = _inversed_attn_weights_91_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_93_cast_fp16")]; int32 var_1855 = const()[name = string("op_1855"), val = int32(2)]; tensor attn_weights_95_cast_fp16 = softmax(axis = var_1855, x = attn_weights_93_cast_fp16)[name = string("attn_weights_95_cast_fp16")]; bool attn_output_31_transpose_x_1 = const()[name = string("attn_output_31_transpose_x_1"), val = bool(true)]; bool attn_output_31_transpose_y_1 = const()[name = string("attn_output_31_transpose_y_1"), val = bool(false)]; tensor attn_output_31_cast_fp16 = matmul(transpose_x = attn_output_31_transpose_x_1, transpose_y = attn_output_31_transpose_y_1, x = attn_weights_95_cast_fp16, y = var_1833_cast_fp16_1)[name = string("attn_output_31_cast_fp16")]; int32 var_1863 = const()[name = string("op_1863"), val = int32(1)]; bool attn_output_33_interleave_0 = const()[name = string("attn_output_33_interleave_0"), val = bool(false)]; tensor attn_output_33_cast_fp16 = concat(axis = var_1863, interleave = attn_output_33_interleave_0, values = (var_1849_cast_fp16, attn_output_31_cast_fp16))[name = string("attn_output_33_cast_fp16")]; tensor var_1867_perm_0 = const()[name = string("op_1867_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_1872 = const()[name = string("op_1872"), val = tensor([1, 1024, 1, 32])]; tensor var_1867_cast_fp16 = transpose(perm = var_1867_perm_0, x = attn_output_33_cast_fp16)[name = string("transpose_54")]; tensor x_95_cast_fp16 = reshape(shape = var_1872, x = var_1867_cast_fp16)[name = string("x_95_cast_fp16")]; string hidden_states_33_pad_type_0 = const()[name = string("hidden_states_33_pad_type_0"), val = string("valid")]; tensor hidden_states_33_strides_0 = const()[name = string("hidden_states_33_strides_0"), val = tensor([1, 1])]; tensor hidden_states_33_pad_0 = const()[name = string("hidden_states_33_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_33_dilations_0 = const()[name = string("hidden_states_33_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_33_groups_0 = const()[name = string("hidden_states_33_groups_0"), val = int32(1)]; tensor var_1879_to_fp16 = const()[name = string("op_1879_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(160492928)))]; tensor hidden_states_33_cast_fp16 = conv(dilations = hidden_states_33_dilations_0, groups = hidden_states_33_groups_0, pad = hidden_states_33_pad_0, pad_type = hidden_states_33_pad_type_0, strides = hidden_states_33_strides_0, weight = var_1879_to_fp16, x = x_95_cast_fp16)[name = string("hidden_states_33_cast_fp16")]; tensor x_97_cast_fp16 = add(x = x_89_cast_fp16, y = hidden_states_33_cast_fp16)[name = string("x_97_cast_fp16")]; int32 var_1891 = const()[name = string("op_1891"), val = int32(1)]; fp16 const_63_promoted_to_fp16 = const()[name = string("const_63_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_1894_cast_fp16 = mul(x = x_97_cast_fp16, y = const_63_promoted_to_fp16)[name = string("op_1894_cast_fp16")]; bool x_99_interleave_0 = const()[name = string("x_99_interleave_0"), val = bool(false)]; tensor x_99_cast_fp16 = concat(axis = var_1891, interleave = x_99_interleave_0, values = (x_97_cast_fp16, var_1894_cast_fp16))[name = string("x_99_cast_fp16")]; tensor out_67_axes_0 = const()[name = string("out_67_axes_0"), val = tensor([1])]; fp16 var_1904_to_fp16 = const()[name = string("op_1904_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_67_cast_fp16 = layer_norm(axes = out_67_axes_0, epsilon = var_1904_to_fp16, x = x_99_cast_fp16)[name = string("out_67_cast_fp16")]; tensor layer_layers_5_post_attention_layernorm_weight_to_fp16 = const()[name = string("layer_layers_5_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(162590144)))]; tensor out_69_cast_fp16 = mul(x = out_67_cast_fp16, y = layer_layers_5_post_attention_layernorm_weight_to_fp16)[name = string("out_69_cast_fp16")]; tensor var_1910_split_sizes_0 = const()[name = string("op_1910_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_1910_axis_0 = const()[name = string("op_1910_axis_0"), val = int32(1)]; tensor var_1910_cast_fp16_0, tensor var_1910_cast_fp16_1 = split(axis = var_1910_axis_0, split_sizes = var_1910_split_sizes_0, x = out_69_cast_fp16)[name = string("op_1910_cast_fp16")]; string input_11_pad_type_0 = const()[name = string("input_11_pad_type_0"), val = string("valid")]; tensor input_11_strides_0 = const()[name = string("input_11_strides_0"), val = tensor([1, 1])]; tensor input_11_pad_0 = const()[name = string("input_11_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_11_dilations_0 = const()[name = string("input_11_dilations_0"), val = tensor([1, 1])]; int32 input_11_groups_0 = const()[name = string("input_11_groups_0"), val = int32(1)]; tensor var_1915_to_fp16 = const()[name = string("op_1915_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(162594304)))]; tensor input_11_cast_fp16 = conv(dilations = input_11_dilations_0, groups = input_11_groups_0, pad = input_11_pad_0, pad_type = input_11_pad_type_0, strides = input_11_strides_0, weight = var_1915_to_fp16, x = var_1910_cast_fp16_0)[name = string("input_11_cast_fp16")]; tensor var_1926_cast_fp16 = silu(x = input_11_cast_fp16)[name = string("op_1926_cast_fp16")]; string var_1931_pad_type_0 = const()[name = string("op_1931_pad_type_0"), val = string("valid")]; tensor var_1931_strides_0 = const()[name = string("op_1931_strides_0"), val = tensor([1, 1])]; tensor var_1931_pad_0 = const()[name = string("op_1931_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_1931_dilations_0 = const()[name = string("op_1931_dilations_0"), val = tensor([1, 1])]; int32 var_1931_groups_0 = const()[name = string("op_1931_groups_0"), val = int32(1)]; tensor var_1914_to_fp16 = const()[name = string("op_1914_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(170982976)))]; tensor var_1931_cast_fp16 = conv(dilations = var_1931_dilations_0, groups = var_1931_groups_0, pad = var_1931_pad_0, pad_type = var_1931_pad_type_0, strides = var_1931_strides_0, weight = var_1914_to_fp16, x = var_1910_cast_fp16_0)[name = string("op_1931_cast_fp16")]; tensor x_105_cast_fp16 = mul(x = var_1926_cast_fp16, y = var_1931_cast_fp16)[name = string("x_105_cast_fp16")]; string hidden_states_35_pad_type_0 = const()[name = string("hidden_states_35_pad_type_0"), val = string("valid")]; tensor hidden_states_35_strides_0 = const()[name = string("hidden_states_35_strides_0"), val = tensor([1, 1])]; tensor hidden_states_35_pad_0 = const()[name = string("hidden_states_35_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_35_dilations_0 = const()[name = string("hidden_states_35_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_35_groups_0 = const()[name = string("hidden_states_35_groups_0"), val = int32(1)]; tensor var_1913_to_fp16 = const()[name = string("op_1913_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(179371648)))]; tensor hidden_states_35_cast_fp16 = conv(dilations = hidden_states_35_dilations_0, groups = hidden_states_35_groups_0, pad = hidden_states_35_pad_0, pad_type = hidden_states_35_pad_type_0, strides = hidden_states_35_strides_0, weight = var_1913_to_fp16, x = x_105_cast_fp16)[name = string("hidden_states_35_cast_fp16")]; tensor x_107_cast_fp16 = add(x = x_97_cast_fp16, y = hidden_states_35_cast_fp16)[name = string("x_107_cast_fp16")]; int32 var_1944 = const()[name = string("op_1944"), val = int32(1)]; fp16 const_64_promoted_to_fp16 = const()[name = string("const_64_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_1947_cast_fp16 = mul(x = x_107_cast_fp16, y = const_64_promoted_to_fp16)[name = string("op_1947_cast_fp16")]; bool x_109_interleave_0 = const()[name = string("x_109_interleave_0"), val = bool(false)]; tensor x_109_cast_fp16 = concat(axis = var_1944, interleave = x_109_interleave_0, values = (x_107_cast_fp16, var_1947_cast_fp16))[name = string("x_109_cast_fp16")]; tensor out_73_axes_0 = const()[name = string("out_73_axes_0"), val = tensor([1])]; fp16 var_1957_to_fp16 = const()[name = string("op_1957_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_73_cast_fp16 = layer_norm(axes = out_73_axes_0, epsilon = var_1957_to_fp16, x = x_109_cast_fp16)[name = string("out_73_cast_fp16")]; tensor layer_layers_6_input_layernorm_weight_to_fp16 = const()[name = string("layer_layers_6_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(187760320)))]; tensor out_75_cast_fp16 = mul(x = out_73_cast_fp16, y = layer_layers_6_input_layernorm_weight_to_fp16)[name = string("out_75_cast_fp16")]; tensor var_1963_split_sizes_0 = const()[name = string("op_1963_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_1963_axis_0 = const()[name = string("op_1963_axis_0"), val = int32(1)]; tensor var_1963_cast_fp16_0, tensor var_1963_cast_fp16_1 = split(axis = var_1963_axis_0, split_sizes = var_1963_split_sizes_0, x = out_75_cast_fp16)[name = string("op_1963_cast_fp16")]; string query_states_25_pad_type_0 = const()[name = string("query_states_25_pad_type_0"), val = string("valid")]; tensor query_states_25_strides_0 = const()[name = string("query_states_25_strides_0"), val = tensor([1, 1])]; tensor query_states_25_pad_0 = const()[name = string("query_states_25_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_states_25_dilations_0 = const()[name = string("query_states_25_dilations_0"), val = tensor([1, 1])]; int32 query_states_25_groups_0 = const()[name = string("query_states_25_groups_0"), val = int32(1)]; tensor var_1985_to_fp16 = const()[name = string("op_1985_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(187764480)))]; tensor query_states_25_cast_fp16 = conv(dilations = query_states_25_dilations_0, groups = query_states_25_groups_0, pad = query_states_25_pad_0, pad_type = query_states_25_pad_type_0, strides = query_states_25_strides_0, weight = var_1985_to_fp16, x = var_1963_cast_fp16_0)[name = string("query_states_25_cast_fp16")]; string key_states_25_pad_type_0 = const()[name = string("key_states_25_pad_type_0"), val = string("valid")]; tensor key_states_25_strides_0 = const()[name = string("key_states_25_strides_0"), val = tensor([1, 1])]; tensor key_states_25_pad_0 = const()[name = string("key_states_25_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_states_25_dilations_0 = const()[name = string("key_states_25_dilations_0"), val = tensor([1, 1])]; int32 key_states_25_groups_0 = const()[name = string("key_states_25_groups_0"), val = int32(1)]; tensor var_1996_to_fp16 = const()[name = string("op_1996_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(189861696)))]; tensor key_states_25_cast_fp16 = conv(dilations = key_states_25_dilations_0, groups = key_states_25_groups_0, pad = key_states_25_pad_0, pad_type = key_states_25_pad_type_0, strides = key_states_25_strides_0, weight = var_1996_to_fp16, x = var_1963_cast_fp16_0)[name = string("key_states_25_cast_fp16")]; string value_states_25_pad_type_0 = const()[name = string("value_states_25_pad_type_0"), val = string("valid")]; tensor value_states_25_strides_0 = const()[name = string("value_states_25_strides_0"), val = tensor([1, 1])]; tensor value_states_25_pad_0 = const()[name = string("value_states_25_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_states_25_dilations_0 = const()[name = string("value_states_25_dilations_0"), val = tensor([1, 1])]; int32 value_states_25_groups_0 = const()[name = string("value_states_25_groups_0"), val = int32(1)]; tensor var_2007_to_fp16 = const()[name = string("op_2007_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(190123904)))]; tensor value_states_25_cast_fp16 = conv(dilations = value_states_25_dilations_0, groups = value_states_25_groups_0, pad = value_states_25_pad_0, pad_type = value_states_25_pad_type_0, strides = value_states_25_strides_0, weight = var_2007_to_fp16, x = var_1963_cast_fp16_0)[name = string("value_states_25_cast_fp16")]; tensor var_2015 = const()[name = string("op_2015"), val = tensor([1, 16, 64, 32])]; tensor embed_25_cast_fp16 = reshape(shape = var_2015, x = query_states_25_cast_fp16)[name = string("embed_25_cast_fp16")]; tensor var_2019 = const()[name = string("op_2019"), val = tensor([1, 2, 64, 32])]; tensor var_2020_cast_fp16 = reshape(shape = var_2019, x = key_states_25_cast_fp16)[name = string("op_2020_cast_fp16")]; tensor embed_27_perm_0 = const()[name = string("embed_27_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_2026 = const()[name = string("op_2026"), val = tensor([1, 2, 64, 32])]; tensor var_2027_cast_fp16 = reshape(shape = var_2026, x = value_states_25_cast_fp16)[name = string("op_2027_cast_fp16")]; tensor value_states_27_perm_0 = const()[name = string("value_states_27_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_2031_cast_fp16 = mul(x = embed_25_cast_fp16, y = cos_1_cast_fp16)[name = string("op_2031_cast_fp16")]; tensor var_2032_split_sizes_0 = const()[name = string("op_2032_split_sizes_0"), val = tensor([32, 32])]; int32 var_2032_axis_0 = const()[name = string("op_2032_axis_0"), val = int32(-2)]; tensor var_2032_cast_fp16_0, tensor var_2032_cast_fp16_1 = split(axis = var_2032_axis_0, split_sizes = var_2032_split_sizes_0, x = embed_25_cast_fp16)[name = string("op_2032_cast_fp16")]; fp16 const_67_promoted_to_fp16 = const()[name = string("const_67_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_2034_cast_fp16 = mul(x = var_2032_cast_fp16_1, y = const_67_promoted_to_fp16)[name = string("op_2034_cast_fp16")]; int32 var_2036 = const()[name = string("op_2036"), val = int32(-2)]; bool var_2037_interleave_0 = const()[name = string("op_2037_interleave_0"), val = bool(false)]; tensor var_2037_cast_fp16 = concat(axis = var_2036, interleave = var_2037_interleave_0, values = (var_2034_cast_fp16, var_2032_cast_fp16_0))[name = string("op_2037_cast_fp16")]; tensor var_2038_cast_fp16 = mul(x = var_2037_cast_fp16, y = sin_1_cast_fp16)[name = string("op_2038_cast_fp16")]; tensor query_states_27_cast_fp16 = add(x = var_2031_cast_fp16, y = var_2038_cast_fp16)[name = string("query_states_27_cast_fp16")]; tensor embed_27_cast_fp16 = transpose(perm = embed_27_perm_0, x = var_2020_cast_fp16)[name = string("transpose_53")]; tensor var_2041_cast_fp16 = mul(x = embed_27_cast_fp16, y = cos_cast_fp16)[name = string("op_2041_cast_fp16")]; tensor var_2042_split_sizes_0 = const()[name = string("op_2042_split_sizes_0"), val = tensor([32, 32])]; int32 var_2042_axis_0 = const()[name = string("op_2042_axis_0"), val = int32(-1)]; tensor var_2042_cast_fp16_0, tensor var_2042_cast_fp16_1 = split(axis = var_2042_axis_0, split_sizes = var_2042_split_sizes_0, x = embed_27_cast_fp16)[name = string("op_2042_cast_fp16")]; fp16 const_68_promoted_to_fp16 = const()[name = string("const_68_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_2044_cast_fp16 = mul(x = var_2042_cast_fp16_1, y = const_68_promoted_to_fp16)[name = string("op_2044_cast_fp16")]; int32 var_2046 = const()[name = string("op_2046"), val = int32(-1)]; bool var_2047_interleave_0 = const()[name = string("op_2047_interleave_0"), val = bool(false)]; tensor var_2047_cast_fp16 = concat(axis = var_2046, interleave = var_2047_interleave_0, values = (var_2044_cast_fp16, var_2042_cast_fp16_0))[name = string("op_2047_cast_fp16")]; tensor var_2048_cast_fp16 = mul(x = var_2047_cast_fp16, y = sin_cast_fp16)[name = string("op_2048_cast_fp16")]; tensor key_states_27_cast_fp16 = add(x = var_2041_cast_fp16, y = var_2048_cast_fp16)[name = string("key_states_27_cast_fp16")]; tensor expand_dims_61 = const()[name = string("expand_dims_61"), val = tensor([6])]; tensor expand_dims_62 = const()[name = string("expand_dims_62"), val = tensor([0])]; tensor expand_dims_64 = const()[name = string("expand_dims_64"), val = tensor([7])]; tensor concat_51_values3_0 = const()[name = string("concat_51_values3_0"), val = tensor([0])]; int32 concat_51_axis_0 = const()[name = string("concat_51_axis_0"), val = int32(0)]; bool concat_51_interleave_0 = const()[name = string("concat_51_interleave_0"), val = bool(false)]; tensor concat_51 = concat(axis = concat_51_axis_0, interleave = concat_51_interleave_0, values = (expand_dims_61, expand_dims_62, position_id, concat_51_values3_0))[name = string("concat_51")]; tensor concat_52_values1_0 = const()[name = string("concat_52_values1_0"), val = tensor([0])]; tensor concat_52_values3_0 = const()[name = string("concat_52_values3_0"), val = tensor([0])]; int32 concat_52_axis_0 = const()[name = string("concat_52_axis_0"), val = int32(0)]; bool concat_52_interleave_0 = const()[name = string("concat_52_interleave_0"), val = bool(false)]; tensor concat_52 = concat(axis = concat_52_axis_0, interleave = concat_52_interleave_0, values = (expand_dims_64, concat_52_values1_0, var_426, concat_52_values3_0))[name = string("concat_52")]; tensor key_cache_internal_tensor_assign_7_stride_0 = const()[name = string("key_cache_internal_tensor_assign_7_stride_0"), val = tensor([1, 1, 1, 1])]; tensor key_cache_internal_tensor_assign_7_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_7_begin_mask_0"), val = tensor([false, false, false, true])]; tensor key_cache_internal_tensor_assign_7_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_7_end_mask_0"), val = tensor([false, true, false, true])]; tensor key_cache_internal_tensor_assign_7_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_7_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor key_cache_internal_tensor_assign_7_cast_fp16 = slice_update(begin = concat_51, begin_mask = key_cache_internal_tensor_assign_7_begin_mask_0, end = concat_52, end_mask = key_cache_internal_tensor_assign_7_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_7_squeeze_mask_0, stride = key_cache_internal_tensor_assign_7_stride_0, update = key_states_27_cast_fp16, x = coreml_update_state_58)[name = string("key_cache_internal_tensor_assign_7_cast_fp16")]; write_state(data = key_cache_internal_tensor_assign_7_cast_fp16, input = key_cache)[name = string("coreml_update_state_322_write_state")]; tensor coreml_update_state_60 = read_state(input = key_cache)[name = string("coreml_update_state_322")]; tensor value_cache_internal_tensor_assign_7_stride_0 = const()[name = string("value_cache_internal_tensor_assign_7_stride_0"), val = tensor([1, 1, 1, 1])]; tensor value_cache_internal_tensor_assign_7_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_7_begin_mask_0"), val = tensor([false, false, false, true])]; tensor value_cache_internal_tensor_assign_7_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_7_end_mask_0"), val = tensor([false, true, false, true])]; tensor value_cache_internal_tensor_assign_7_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_7_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor value_states_27_cast_fp16 = transpose(perm = value_states_27_perm_0, x = var_2027_cast_fp16)[name = string("transpose_52")]; tensor value_cache_internal_tensor_assign_7_cast_fp16 = slice_update(begin = concat_51, begin_mask = value_cache_internal_tensor_assign_7_begin_mask_0, end = concat_52, end_mask = value_cache_internal_tensor_assign_7_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_7_squeeze_mask_0, stride = value_cache_internal_tensor_assign_7_stride_0, update = value_states_27_cast_fp16, x = coreml_update_state_59)[name = string("value_cache_internal_tensor_assign_7_cast_fp16")]; write_state(data = value_cache_internal_tensor_assign_7_cast_fp16, input = value_cache)[name = string("coreml_update_state_323_write_state")]; tensor coreml_update_state_61 = read_state(input = value_cache)[name = string("coreml_update_state_323")]; tensor var_2091_begin_0 = const()[name = string("op_2091_begin_0"), val = tensor([6, 0, 0, 0])]; tensor var_2091_end_0 = const()[name = string("op_2091_end_0"), val = tensor([7, 2, 2048, 64])]; tensor var_2091_end_mask_0 = const()[name = string("op_2091_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_2091_cast_fp16 = slice_by_index(begin = var_2091_begin_0, end = var_2091_end_0, end_mask = var_2091_end_mask_0, x = coreml_update_state_60)[name = string("op_2091_cast_fp16")]; tensor tile_12 = const()[name = string("tile_12"), val = tensor([1, 1])]; int32 var_2094_axis_0 = const()[name = string("op_2094_axis_0"), val = int32(1)]; tensor var_2094_cast_fp16_0, tensor var_2094_cast_fp16_1 = split(axis = var_2094_axis_0, split_sizes = tile_12, x = var_2091_cast_fp16)[name = string("op_2094_cast_fp16")]; tensor var_2101_begin_0 = const()[name = string("op_2101_begin_0"), val = tensor([6, 0, 0, 0])]; tensor var_2101_end_0 = const()[name = string("op_2101_end_0"), val = tensor([7, 2, 2048, 64])]; tensor var_2101_end_mask_0 = const()[name = string("op_2101_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_2101_cast_fp16 = slice_by_index(begin = var_2101_begin_0, end = var_2101_end_0, end_mask = var_2101_end_mask_0, x = coreml_update_state_61)[name = string("op_2101_cast_fp16")]; tensor tile_13 = const()[name = string("tile_13"), val = tensor([1, 1])]; int32 var_2104_axis_0 = const()[name = string("op_2104_axis_0"), val = int32(1)]; tensor var_2104_cast_fp16_0, tensor var_2104_cast_fp16_1 = split(axis = var_2104_axis_0, split_sizes = tile_13, x = var_2101_cast_fp16)[name = string("op_2104_cast_fp16")]; tensor var_2107_split_sizes_0 = const()[name = string("op_2107_split_sizes_0"), val = tensor([8, 8])]; int32 var_2107_axis_0 = const()[name = string("op_2107_axis_0"), val = int32(1)]; tensor var_2107_cast_fp16_0, tensor var_2107_cast_fp16_1 = split(axis = var_2107_axis_0, split_sizes = var_2107_split_sizes_0, x = query_states_27_cast_fp16)[name = string("op_2107_cast_fp16")]; bool attn_weights_97_transpose_x_0 = const()[name = string("attn_weights_97_transpose_x_0"), val = bool(false)]; bool attn_weights_97_transpose_y_0 = const()[name = string("attn_weights_97_transpose_y_0"), val = bool(false)]; tensor attn_weights_97_cast_fp16 = matmul(transpose_x = attn_weights_97_transpose_x_0, transpose_y = attn_weights_97_transpose_y_0, x = var_2094_cast_fp16_0, y = var_2107_cast_fp16_0)[name = string("attn_weights_97_cast_fp16")]; fp16 _inversed_attn_weights_99_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_99_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_99_cast_fp16 = mul(x = attn_weights_97_cast_fp16, y = _inversed_attn_weights_99_y_0_to_fp16)[name = string("_inversed_attn_weights_99_cast_fp16")]; tensor attn_weights_101_cast_fp16 = add(x = _inversed_attn_weights_99_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_101_cast_fp16")]; int32 var_2114 = const()[name = string("op_2114"), val = int32(2)]; tensor attn_weights_103_cast_fp16 = softmax(axis = var_2114, x = attn_weights_101_cast_fp16)[name = string("attn_weights_103_cast_fp16")]; bool var_2120_transpose_x_1 = const()[name = string("op_2120_transpose_x_1"), val = bool(true)]; bool var_2120_transpose_y_1 = const()[name = string("op_2120_transpose_y_1"), val = bool(false)]; tensor var_2120_cast_fp16 = matmul(transpose_x = var_2120_transpose_x_1, transpose_y = var_2120_transpose_y_1, x = attn_weights_103_cast_fp16, y = var_2104_cast_fp16_0)[name = string("op_2120_cast_fp16")]; bool attn_weights_105_transpose_x_0 = const()[name = string("attn_weights_105_transpose_x_0"), val = bool(false)]; bool attn_weights_105_transpose_y_0 = const()[name = string("attn_weights_105_transpose_y_0"), val = bool(false)]; tensor attn_weights_105_cast_fp16 = matmul(transpose_x = attn_weights_105_transpose_x_0, transpose_y = attn_weights_105_transpose_y_0, x = var_2094_cast_fp16_1, y = var_2107_cast_fp16_1)[name = string("attn_weights_105_cast_fp16")]; fp16 _inversed_attn_weights_107_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_107_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_107_cast_fp16 = mul(x = attn_weights_105_cast_fp16, y = _inversed_attn_weights_107_y_0_to_fp16)[name = string("_inversed_attn_weights_107_cast_fp16")]; tensor attn_weights_109_cast_fp16 = add(x = _inversed_attn_weights_107_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_109_cast_fp16")]; int32 var_2126 = const()[name = string("op_2126"), val = int32(2)]; tensor attn_weights_111_cast_fp16 = softmax(axis = var_2126, x = attn_weights_109_cast_fp16)[name = string("attn_weights_111_cast_fp16")]; bool attn_output_37_transpose_x_1 = const()[name = string("attn_output_37_transpose_x_1"), val = bool(true)]; bool attn_output_37_transpose_y_1 = const()[name = string("attn_output_37_transpose_y_1"), val = bool(false)]; tensor attn_output_37_cast_fp16 = matmul(transpose_x = attn_output_37_transpose_x_1, transpose_y = attn_output_37_transpose_y_1, x = attn_weights_111_cast_fp16, y = var_2104_cast_fp16_1)[name = string("attn_output_37_cast_fp16")]; int32 var_2134 = const()[name = string("op_2134"), val = int32(1)]; bool attn_output_39_interleave_0 = const()[name = string("attn_output_39_interleave_0"), val = bool(false)]; tensor attn_output_39_cast_fp16 = concat(axis = var_2134, interleave = attn_output_39_interleave_0, values = (var_2120_cast_fp16, attn_output_37_cast_fp16))[name = string("attn_output_39_cast_fp16")]; tensor var_2138_perm_0 = const()[name = string("op_2138_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_2143 = const()[name = string("op_2143"), val = tensor([1, 1024, 1, 32])]; tensor var_2138_cast_fp16 = transpose(perm = var_2138_perm_0, x = attn_output_39_cast_fp16)[name = string("transpose_51")]; tensor x_113_cast_fp16 = reshape(shape = var_2143, x = var_2138_cast_fp16)[name = string("x_113_cast_fp16")]; string hidden_states_39_pad_type_0 = const()[name = string("hidden_states_39_pad_type_0"), val = string("valid")]; tensor hidden_states_39_strides_0 = const()[name = string("hidden_states_39_strides_0"), val = tensor([1, 1])]; tensor hidden_states_39_pad_0 = const()[name = string("hidden_states_39_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_39_dilations_0 = const()[name = string("hidden_states_39_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_39_groups_0 = const()[name = string("hidden_states_39_groups_0"), val = int32(1)]; tensor var_2150_to_fp16 = const()[name = string("op_2150_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(190386112)))]; tensor hidden_states_39_cast_fp16 = conv(dilations = hidden_states_39_dilations_0, groups = hidden_states_39_groups_0, pad = hidden_states_39_pad_0, pad_type = hidden_states_39_pad_type_0, strides = hidden_states_39_strides_0, weight = var_2150_to_fp16, x = x_113_cast_fp16)[name = string("hidden_states_39_cast_fp16")]; tensor x_115_cast_fp16 = add(x = x_107_cast_fp16, y = hidden_states_39_cast_fp16)[name = string("x_115_cast_fp16")]; int32 var_2162 = const()[name = string("op_2162"), val = int32(1)]; fp16 const_73_promoted_to_fp16 = const()[name = string("const_73_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_2165_cast_fp16 = mul(x = x_115_cast_fp16, y = const_73_promoted_to_fp16)[name = string("op_2165_cast_fp16")]; bool x_117_interleave_0 = const()[name = string("x_117_interleave_0"), val = bool(false)]; tensor x_117_cast_fp16 = concat(axis = var_2162, interleave = x_117_interleave_0, values = (x_115_cast_fp16, var_2165_cast_fp16))[name = string("x_117_cast_fp16")]; tensor out_79_axes_0 = const()[name = string("out_79_axes_0"), val = tensor([1])]; fp16 var_2175_to_fp16 = const()[name = string("op_2175_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_79_cast_fp16 = layer_norm(axes = out_79_axes_0, epsilon = var_2175_to_fp16, x = x_117_cast_fp16)[name = string("out_79_cast_fp16")]; tensor layer_layers_6_post_attention_layernorm_weight_to_fp16 = const()[name = string("layer_layers_6_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(192483328)))]; tensor out_81_cast_fp16 = mul(x = out_79_cast_fp16, y = layer_layers_6_post_attention_layernorm_weight_to_fp16)[name = string("out_81_cast_fp16")]; tensor var_2181_split_sizes_0 = const()[name = string("op_2181_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_2181_axis_0 = const()[name = string("op_2181_axis_0"), val = int32(1)]; tensor var_2181_cast_fp16_0, tensor var_2181_cast_fp16_1 = split(axis = var_2181_axis_0, split_sizes = var_2181_split_sizes_0, x = out_81_cast_fp16)[name = string("op_2181_cast_fp16")]; string input_13_pad_type_0 = const()[name = string("input_13_pad_type_0"), val = string("valid")]; tensor input_13_strides_0 = const()[name = string("input_13_strides_0"), val = tensor([1, 1])]; tensor input_13_pad_0 = const()[name = string("input_13_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_13_dilations_0 = const()[name = string("input_13_dilations_0"), val = tensor([1, 1])]; int32 input_13_groups_0 = const()[name = string("input_13_groups_0"), val = int32(1)]; tensor var_2186_to_fp16 = const()[name = string("op_2186_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(192487488)))]; tensor input_13_cast_fp16 = conv(dilations = input_13_dilations_0, groups = input_13_groups_0, pad = input_13_pad_0, pad_type = input_13_pad_type_0, strides = input_13_strides_0, weight = var_2186_to_fp16, x = var_2181_cast_fp16_0)[name = string("input_13_cast_fp16")]; tensor var_2197_cast_fp16 = silu(x = input_13_cast_fp16)[name = string("op_2197_cast_fp16")]; string var_2202_pad_type_0 = const()[name = string("op_2202_pad_type_0"), val = string("valid")]; tensor var_2202_strides_0 = const()[name = string("op_2202_strides_0"), val = tensor([1, 1])]; tensor var_2202_pad_0 = const()[name = string("op_2202_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_2202_dilations_0 = const()[name = string("op_2202_dilations_0"), val = tensor([1, 1])]; int32 var_2202_groups_0 = const()[name = string("op_2202_groups_0"), val = int32(1)]; tensor var_2185_to_fp16 = const()[name = string("op_2185_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(200876160)))]; tensor var_2202_cast_fp16 = conv(dilations = var_2202_dilations_0, groups = var_2202_groups_0, pad = var_2202_pad_0, pad_type = var_2202_pad_type_0, strides = var_2202_strides_0, weight = var_2185_to_fp16, x = var_2181_cast_fp16_0)[name = string("op_2202_cast_fp16")]; tensor x_123_cast_fp16 = mul(x = var_2197_cast_fp16, y = var_2202_cast_fp16)[name = string("x_123_cast_fp16")]; string hidden_states_41_pad_type_0 = const()[name = string("hidden_states_41_pad_type_0"), val = string("valid")]; tensor hidden_states_41_strides_0 = const()[name = string("hidden_states_41_strides_0"), val = tensor([1, 1])]; tensor hidden_states_41_pad_0 = const()[name = string("hidden_states_41_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_41_dilations_0 = const()[name = string("hidden_states_41_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_41_groups_0 = const()[name = string("hidden_states_41_groups_0"), val = int32(1)]; tensor var_2184_to_fp16 = const()[name = string("op_2184_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(209264832)))]; tensor hidden_states_41_cast_fp16 = conv(dilations = hidden_states_41_dilations_0, groups = hidden_states_41_groups_0, pad = hidden_states_41_pad_0, pad_type = hidden_states_41_pad_type_0, strides = hidden_states_41_strides_0, weight = var_2184_to_fp16, x = x_123_cast_fp16)[name = string("hidden_states_41_cast_fp16")]; tensor x_125_cast_fp16 = add(x = x_115_cast_fp16, y = hidden_states_41_cast_fp16)[name = string("x_125_cast_fp16")]; int32 var_2215 = const()[name = string("op_2215"), val = int32(1)]; fp16 const_74_promoted_to_fp16 = const()[name = string("const_74_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_2218_cast_fp16 = mul(x = x_125_cast_fp16, y = const_74_promoted_to_fp16)[name = string("op_2218_cast_fp16")]; bool x_127_interleave_0 = const()[name = string("x_127_interleave_0"), val = bool(false)]; tensor x_127_cast_fp16 = concat(axis = var_2215, interleave = x_127_interleave_0, values = (x_125_cast_fp16, var_2218_cast_fp16))[name = string("x_127_cast_fp16")]; tensor out_85_axes_0 = const()[name = string("out_85_axes_0"), val = tensor([1])]; fp16 var_2228_to_fp16 = const()[name = string("op_2228_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_85_cast_fp16 = layer_norm(axes = out_85_axes_0, epsilon = var_2228_to_fp16, x = x_127_cast_fp16)[name = string("out_85_cast_fp16")]; tensor layer_layers_7_input_layernorm_weight_to_fp16 = const()[name = string("layer_layers_7_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(217653504)))]; tensor out_87_cast_fp16 = mul(x = out_85_cast_fp16, y = layer_layers_7_input_layernorm_weight_to_fp16)[name = string("out_87_cast_fp16")]; tensor var_2234_split_sizes_0 = const()[name = string("op_2234_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_2234_axis_0 = const()[name = string("op_2234_axis_0"), val = int32(1)]; tensor var_2234_cast_fp16_0, tensor var_2234_cast_fp16_1 = split(axis = var_2234_axis_0, split_sizes = var_2234_split_sizes_0, x = out_87_cast_fp16)[name = string("op_2234_cast_fp16")]; string query_states_29_pad_type_0 = const()[name = string("query_states_29_pad_type_0"), val = string("valid")]; tensor query_states_29_strides_0 = const()[name = string("query_states_29_strides_0"), val = tensor([1, 1])]; tensor query_states_29_pad_0 = const()[name = string("query_states_29_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_states_29_dilations_0 = const()[name = string("query_states_29_dilations_0"), val = tensor([1, 1])]; int32 query_states_29_groups_0 = const()[name = string("query_states_29_groups_0"), val = int32(1)]; tensor var_2256_to_fp16 = const()[name = string("op_2256_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(217657664)))]; tensor query_states_29_cast_fp16 = conv(dilations = query_states_29_dilations_0, groups = query_states_29_groups_0, pad = query_states_29_pad_0, pad_type = query_states_29_pad_type_0, strides = query_states_29_strides_0, weight = var_2256_to_fp16, x = var_2234_cast_fp16_0)[name = string("query_states_29_cast_fp16")]; string key_states_29_pad_type_0 = const()[name = string("key_states_29_pad_type_0"), val = string("valid")]; tensor key_states_29_strides_0 = const()[name = string("key_states_29_strides_0"), val = tensor([1, 1])]; tensor key_states_29_pad_0 = const()[name = string("key_states_29_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_states_29_dilations_0 = const()[name = string("key_states_29_dilations_0"), val = tensor([1, 1])]; int32 key_states_29_groups_0 = const()[name = string("key_states_29_groups_0"), val = int32(1)]; tensor var_2267_to_fp16 = const()[name = string("op_2267_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(219754880)))]; tensor key_states_29_cast_fp16 = conv(dilations = key_states_29_dilations_0, groups = key_states_29_groups_0, pad = key_states_29_pad_0, pad_type = key_states_29_pad_type_0, strides = key_states_29_strides_0, weight = var_2267_to_fp16, x = var_2234_cast_fp16_0)[name = string("key_states_29_cast_fp16")]; string value_states_29_pad_type_0 = const()[name = string("value_states_29_pad_type_0"), val = string("valid")]; tensor value_states_29_strides_0 = const()[name = string("value_states_29_strides_0"), val = tensor([1, 1])]; tensor value_states_29_pad_0 = const()[name = string("value_states_29_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_states_29_dilations_0 = const()[name = string("value_states_29_dilations_0"), val = tensor([1, 1])]; int32 value_states_29_groups_0 = const()[name = string("value_states_29_groups_0"), val = int32(1)]; tensor var_2278_to_fp16 = const()[name = string("op_2278_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(220017088)))]; tensor value_states_29_cast_fp16 = conv(dilations = value_states_29_dilations_0, groups = value_states_29_groups_0, pad = value_states_29_pad_0, pad_type = value_states_29_pad_type_0, strides = value_states_29_strides_0, weight = var_2278_to_fp16, x = var_2234_cast_fp16_0)[name = string("value_states_29_cast_fp16")]; tensor var_2286 = const()[name = string("op_2286"), val = tensor([1, 16, 64, 32])]; tensor embed_29_cast_fp16 = reshape(shape = var_2286, x = query_states_29_cast_fp16)[name = string("embed_29_cast_fp16")]; tensor var_2290 = const()[name = string("op_2290"), val = tensor([1, 2, 64, 32])]; tensor var_2291_cast_fp16 = reshape(shape = var_2290, x = key_states_29_cast_fp16)[name = string("op_2291_cast_fp16")]; tensor embed_31_perm_0 = const()[name = string("embed_31_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_2297 = const()[name = string("op_2297"), val = tensor([1, 2, 64, 32])]; tensor var_2298_cast_fp16 = reshape(shape = var_2297, x = value_states_29_cast_fp16)[name = string("op_2298_cast_fp16")]; tensor value_states_31_perm_0 = const()[name = string("value_states_31_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_2302_cast_fp16 = mul(x = embed_29_cast_fp16, y = cos_1_cast_fp16)[name = string("op_2302_cast_fp16")]; tensor var_2303_split_sizes_0 = const()[name = string("op_2303_split_sizes_0"), val = tensor([32, 32])]; int32 var_2303_axis_0 = const()[name = string("op_2303_axis_0"), val = int32(-2)]; tensor var_2303_cast_fp16_0, tensor var_2303_cast_fp16_1 = split(axis = var_2303_axis_0, split_sizes = var_2303_split_sizes_0, x = embed_29_cast_fp16)[name = string("op_2303_cast_fp16")]; fp16 const_77_promoted_to_fp16 = const()[name = string("const_77_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_2305_cast_fp16 = mul(x = var_2303_cast_fp16_1, y = const_77_promoted_to_fp16)[name = string("op_2305_cast_fp16")]; int32 var_2307 = const()[name = string("op_2307"), val = int32(-2)]; bool var_2308_interleave_0 = const()[name = string("op_2308_interleave_0"), val = bool(false)]; tensor var_2308_cast_fp16 = concat(axis = var_2307, interleave = var_2308_interleave_0, values = (var_2305_cast_fp16, var_2303_cast_fp16_0))[name = string("op_2308_cast_fp16")]; tensor var_2309_cast_fp16 = mul(x = var_2308_cast_fp16, y = sin_1_cast_fp16)[name = string("op_2309_cast_fp16")]; tensor query_states_31_cast_fp16 = add(x = var_2302_cast_fp16, y = var_2309_cast_fp16)[name = string("query_states_31_cast_fp16")]; tensor embed_31_cast_fp16 = transpose(perm = embed_31_perm_0, x = var_2291_cast_fp16)[name = string("transpose_50")]; tensor var_2312_cast_fp16 = mul(x = embed_31_cast_fp16, y = cos_cast_fp16)[name = string("op_2312_cast_fp16")]; tensor var_2313_split_sizes_0 = const()[name = string("op_2313_split_sizes_0"), val = tensor([32, 32])]; int32 var_2313_axis_0 = const()[name = string("op_2313_axis_0"), val = int32(-1)]; tensor var_2313_cast_fp16_0, tensor var_2313_cast_fp16_1 = split(axis = var_2313_axis_0, split_sizes = var_2313_split_sizes_0, x = embed_31_cast_fp16)[name = string("op_2313_cast_fp16")]; fp16 const_78_promoted_to_fp16 = const()[name = string("const_78_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_2315_cast_fp16 = mul(x = var_2313_cast_fp16_1, y = const_78_promoted_to_fp16)[name = string("op_2315_cast_fp16")]; int32 var_2317 = const()[name = string("op_2317"), val = int32(-1)]; bool var_2318_interleave_0 = const()[name = string("op_2318_interleave_0"), val = bool(false)]; tensor var_2318_cast_fp16 = concat(axis = var_2317, interleave = var_2318_interleave_0, values = (var_2315_cast_fp16, var_2313_cast_fp16_0))[name = string("op_2318_cast_fp16")]; tensor var_2319_cast_fp16 = mul(x = var_2318_cast_fp16, y = sin_cast_fp16)[name = string("op_2319_cast_fp16")]; tensor key_states_31_cast_fp16 = add(x = var_2312_cast_fp16, y = var_2319_cast_fp16)[name = string("key_states_31_cast_fp16")]; tensor expand_dims_71 = const()[name = string("expand_dims_71"), val = tensor([7])]; tensor expand_dims_72 = const()[name = string("expand_dims_72"), val = tensor([0])]; tensor expand_dims_74 = const()[name = string("expand_dims_74"), val = tensor([8])]; tensor concat_59_values3_0 = const()[name = string("concat_59_values3_0"), val = tensor([0])]; int32 concat_59_axis_0 = const()[name = string("concat_59_axis_0"), val = int32(0)]; bool concat_59_interleave_0 = const()[name = string("concat_59_interleave_0"), val = bool(false)]; tensor concat_59 = concat(axis = concat_59_axis_0, interleave = concat_59_interleave_0, values = (expand_dims_71, expand_dims_72, position_id, concat_59_values3_0))[name = string("concat_59")]; tensor concat_60_values1_0 = const()[name = string("concat_60_values1_0"), val = tensor([0])]; tensor concat_60_values3_0 = const()[name = string("concat_60_values3_0"), val = tensor([0])]; int32 concat_60_axis_0 = const()[name = string("concat_60_axis_0"), val = int32(0)]; bool concat_60_interleave_0 = const()[name = string("concat_60_interleave_0"), val = bool(false)]; tensor concat_60 = concat(axis = concat_60_axis_0, interleave = concat_60_interleave_0, values = (expand_dims_74, concat_60_values1_0, var_426, concat_60_values3_0))[name = string("concat_60")]; tensor key_cache_internal_tensor_assign_8_stride_0 = const()[name = string("key_cache_internal_tensor_assign_8_stride_0"), val = tensor([1, 1, 1, 1])]; tensor key_cache_internal_tensor_assign_8_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_8_begin_mask_0"), val = tensor([false, false, false, true])]; tensor key_cache_internal_tensor_assign_8_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_8_end_mask_0"), val = tensor([false, true, false, true])]; tensor key_cache_internal_tensor_assign_8_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_8_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor key_cache_internal_tensor_assign_8_cast_fp16 = slice_update(begin = concat_59, begin_mask = key_cache_internal_tensor_assign_8_begin_mask_0, end = concat_60, end_mask = key_cache_internal_tensor_assign_8_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_8_squeeze_mask_0, stride = key_cache_internal_tensor_assign_8_stride_0, update = key_states_31_cast_fp16, x = coreml_update_state_60)[name = string("key_cache_internal_tensor_assign_8_cast_fp16")]; write_state(data = key_cache_internal_tensor_assign_8_cast_fp16, input = key_cache)[name = string("coreml_update_state_324_write_state")]; tensor coreml_update_state_62 = read_state(input = key_cache)[name = string("coreml_update_state_324")]; tensor value_cache_internal_tensor_assign_8_stride_0 = const()[name = string("value_cache_internal_tensor_assign_8_stride_0"), val = tensor([1, 1, 1, 1])]; tensor value_cache_internal_tensor_assign_8_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_8_begin_mask_0"), val = tensor([false, false, false, true])]; tensor value_cache_internal_tensor_assign_8_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_8_end_mask_0"), val = tensor([false, true, false, true])]; tensor value_cache_internal_tensor_assign_8_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_8_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor value_states_31_cast_fp16 = transpose(perm = value_states_31_perm_0, x = var_2298_cast_fp16)[name = string("transpose_49")]; tensor value_cache_internal_tensor_assign_8_cast_fp16 = slice_update(begin = concat_59, begin_mask = value_cache_internal_tensor_assign_8_begin_mask_0, end = concat_60, end_mask = value_cache_internal_tensor_assign_8_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_8_squeeze_mask_0, stride = value_cache_internal_tensor_assign_8_stride_0, update = value_states_31_cast_fp16, x = coreml_update_state_61)[name = string("value_cache_internal_tensor_assign_8_cast_fp16")]; write_state(data = value_cache_internal_tensor_assign_8_cast_fp16, input = value_cache)[name = string("coreml_update_state_325_write_state")]; tensor coreml_update_state_63 = read_state(input = value_cache)[name = string("coreml_update_state_325")]; tensor var_2362_begin_0 = const()[name = string("op_2362_begin_0"), val = tensor([7, 0, 0, 0])]; tensor var_2362_end_0 = const()[name = string("op_2362_end_0"), val = tensor([8, 2, 2048, 64])]; tensor var_2362_end_mask_0 = const()[name = string("op_2362_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_2362_cast_fp16 = slice_by_index(begin = var_2362_begin_0, end = var_2362_end_0, end_mask = var_2362_end_mask_0, x = coreml_update_state_62)[name = string("op_2362_cast_fp16")]; tensor tile_14 = const()[name = string("tile_14"), val = tensor([1, 1])]; int32 var_2365_axis_0 = const()[name = string("op_2365_axis_0"), val = int32(1)]; tensor var_2365_cast_fp16_0, tensor var_2365_cast_fp16_1 = split(axis = var_2365_axis_0, split_sizes = tile_14, x = var_2362_cast_fp16)[name = string("op_2365_cast_fp16")]; tensor var_2372_begin_0 = const()[name = string("op_2372_begin_0"), val = tensor([7, 0, 0, 0])]; tensor var_2372_end_0 = const()[name = string("op_2372_end_0"), val = tensor([8, 2, 2048, 64])]; tensor var_2372_end_mask_0 = const()[name = string("op_2372_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_2372_cast_fp16 = slice_by_index(begin = var_2372_begin_0, end = var_2372_end_0, end_mask = var_2372_end_mask_0, x = coreml_update_state_63)[name = string("op_2372_cast_fp16")]; tensor tile_15 = const()[name = string("tile_15"), val = tensor([1, 1])]; int32 var_2375_axis_0 = const()[name = string("op_2375_axis_0"), val = int32(1)]; tensor var_2375_cast_fp16_0, tensor var_2375_cast_fp16_1 = split(axis = var_2375_axis_0, split_sizes = tile_15, x = var_2372_cast_fp16)[name = string("op_2375_cast_fp16")]; tensor var_2378_split_sizes_0 = const()[name = string("op_2378_split_sizes_0"), val = tensor([8, 8])]; int32 var_2378_axis_0 = const()[name = string("op_2378_axis_0"), val = int32(1)]; tensor var_2378_cast_fp16_0, tensor var_2378_cast_fp16_1 = split(axis = var_2378_axis_0, split_sizes = var_2378_split_sizes_0, x = query_states_31_cast_fp16)[name = string("op_2378_cast_fp16")]; bool attn_weights_113_transpose_x_0 = const()[name = string("attn_weights_113_transpose_x_0"), val = bool(false)]; bool attn_weights_113_transpose_y_0 = const()[name = string("attn_weights_113_transpose_y_0"), val = bool(false)]; tensor attn_weights_113_cast_fp16 = matmul(transpose_x = attn_weights_113_transpose_x_0, transpose_y = attn_weights_113_transpose_y_0, x = var_2365_cast_fp16_0, y = var_2378_cast_fp16_0)[name = string("attn_weights_113_cast_fp16")]; fp16 _inversed_attn_weights_115_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_115_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_115_cast_fp16 = mul(x = attn_weights_113_cast_fp16, y = _inversed_attn_weights_115_y_0_to_fp16)[name = string("_inversed_attn_weights_115_cast_fp16")]; tensor attn_weights_117_cast_fp16 = add(x = _inversed_attn_weights_115_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_117_cast_fp16")]; int32 var_2385 = const()[name = string("op_2385"), val = int32(2)]; tensor attn_weights_119_cast_fp16 = softmax(axis = var_2385, x = attn_weights_117_cast_fp16)[name = string("attn_weights_119_cast_fp16")]; bool var_2391_transpose_x_1 = const()[name = string("op_2391_transpose_x_1"), val = bool(true)]; bool var_2391_transpose_y_1 = const()[name = string("op_2391_transpose_y_1"), val = bool(false)]; tensor var_2391_cast_fp16 = matmul(transpose_x = var_2391_transpose_x_1, transpose_y = var_2391_transpose_y_1, x = attn_weights_119_cast_fp16, y = var_2375_cast_fp16_0)[name = string("op_2391_cast_fp16")]; bool attn_weights_121_transpose_x_0 = const()[name = string("attn_weights_121_transpose_x_0"), val = bool(false)]; bool attn_weights_121_transpose_y_0 = const()[name = string("attn_weights_121_transpose_y_0"), val = bool(false)]; tensor attn_weights_121_cast_fp16 = matmul(transpose_x = attn_weights_121_transpose_x_0, transpose_y = attn_weights_121_transpose_y_0, x = var_2365_cast_fp16_1, y = var_2378_cast_fp16_1)[name = string("attn_weights_121_cast_fp16")]; fp16 _inversed_attn_weights_123_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_123_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_123_cast_fp16 = mul(x = attn_weights_121_cast_fp16, y = _inversed_attn_weights_123_y_0_to_fp16)[name = string("_inversed_attn_weights_123_cast_fp16")]; tensor attn_weights_125_cast_fp16 = add(x = _inversed_attn_weights_123_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_125_cast_fp16")]; int32 var_2397 = const()[name = string("op_2397"), val = int32(2)]; tensor attn_weights_127_cast_fp16 = softmax(axis = var_2397, x = attn_weights_125_cast_fp16)[name = string("attn_weights_127_cast_fp16")]; bool attn_output_43_transpose_x_1 = const()[name = string("attn_output_43_transpose_x_1"), val = bool(true)]; bool attn_output_43_transpose_y_1 = const()[name = string("attn_output_43_transpose_y_1"), val = bool(false)]; tensor attn_output_43_cast_fp16 = matmul(transpose_x = attn_output_43_transpose_x_1, transpose_y = attn_output_43_transpose_y_1, x = attn_weights_127_cast_fp16, y = var_2375_cast_fp16_1)[name = string("attn_output_43_cast_fp16")]; int32 var_2405 = const()[name = string("op_2405"), val = int32(1)]; bool attn_output_45_interleave_0 = const()[name = string("attn_output_45_interleave_0"), val = bool(false)]; tensor attn_output_45_cast_fp16 = concat(axis = var_2405, interleave = attn_output_45_interleave_0, values = (var_2391_cast_fp16, attn_output_43_cast_fp16))[name = string("attn_output_45_cast_fp16")]; tensor var_2409_perm_0 = const()[name = string("op_2409_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_2414 = const()[name = string("op_2414"), val = tensor([1, 1024, 1, 32])]; tensor var_2409_cast_fp16 = transpose(perm = var_2409_perm_0, x = attn_output_45_cast_fp16)[name = string("transpose_48")]; tensor x_131_cast_fp16 = reshape(shape = var_2414, x = var_2409_cast_fp16)[name = string("x_131_cast_fp16")]; string hidden_states_45_pad_type_0 = const()[name = string("hidden_states_45_pad_type_0"), val = string("valid")]; tensor hidden_states_45_strides_0 = const()[name = string("hidden_states_45_strides_0"), val = tensor([1, 1])]; tensor hidden_states_45_pad_0 = const()[name = string("hidden_states_45_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_45_dilations_0 = const()[name = string("hidden_states_45_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_45_groups_0 = const()[name = string("hidden_states_45_groups_0"), val = int32(1)]; tensor var_2421_to_fp16 = const()[name = string("op_2421_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(220279296)))]; tensor hidden_states_45_cast_fp16 = conv(dilations = hidden_states_45_dilations_0, groups = hidden_states_45_groups_0, pad = hidden_states_45_pad_0, pad_type = hidden_states_45_pad_type_0, strides = hidden_states_45_strides_0, weight = var_2421_to_fp16, x = x_131_cast_fp16)[name = string("hidden_states_45_cast_fp16")]; tensor x_133_cast_fp16 = add(x = x_125_cast_fp16, y = hidden_states_45_cast_fp16)[name = string("x_133_cast_fp16")]; int32 var_2433 = const()[name = string("op_2433"), val = int32(1)]; fp16 const_83_promoted_to_fp16 = const()[name = string("const_83_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_2436_cast_fp16 = mul(x = x_133_cast_fp16, y = const_83_promoted_to_fp16)[name = string("op_2436_cast_fp16")]; bool x_135_interleave_0 = const()[name = string("x_135_interleave_0"), val = bool(false)]; tensor x_135_cast_fp16 = concat(axis = var_2433, interleave = x_135_interleave_0, values = (x_133_cast_fp16, var_2436_cast_fp16))[name = string("x_135_cast_fp16")]; tensor out_91_axes_0 = const()[name = string("out_91_axes_0"), val = tensor([1])]; fp16 var_2446_to_fp16 = const()[name = string("op_2446_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_91_cast_fp16 = layer_norm(axes = out_91_axes_0, epsilon = var_2446_to_fp16, x = x_135_cast_fp16)[name = string("out_91_cast_fp16")]; tensor layer_layers_7_post_attention_layernorm_weight_to_fp16 = const()[name = string("layer_layers_7_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(222376512)))]; tensor out_93_cast_fp16 = mul(x = out_91_cast_fp16, y = layer_layers_7_post_attention_layernorm_weight_to_fp16)[name = string("out_93_cast_fp16")]; tensor var_2452_split_sizes_0 = const()[name = string("op_2452_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_2452_axis_0 = const()[name = string("op_2452_axis_0"), val = int32(1)]; tensor var_2452_cast_fp16_0, tensor var_2452_cast_fp16_1 = split(axis = var_2452_axis_0, split_sizes = var_2452_split_sizes_0, x = out_93_cast_fp16)[name = string("op_2452_cast_fp16")]; string input_15_pad_type_0 = const()[name = string("input_15_pad_type_0"), val = string("valid")]; tensor input_15_strides_0 = const()[name = string("input_15_strides_0"), val = tensor([1, 1])]; tensor input_15_pad_0 = const()[name = string("input_15_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_15_dilations_0 = const()[name = string("input_15_dilations_0"), val = tensor([1, 1])]; int32 input_15_groups_0 = const()[name = string("input_15_groups_0"), val = int32(1)]; tensor var_2457_to_fp16 = const()[name = string("op_2457_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(222380672)))]; tensor input_15_cast_fp16 = conv(dilations = input_15_dilations_0, groups = input_15_groups_0, pad = input_15_pad_0, pad_type = input_15_pad_type_0, strides = input_15_strides_0, weight = var_2457_to_fp16, x = var_2452_cast_fp16_0)[name = string("input_15_cast_fp16")]; tensor var_2468_cast_fp16 = silu(x = input_15_cast_fp16)[name = string("op_2468_cast_fp16")]; string var_2473_pad_type_0 = const()[name = string("op_2473_pad_type_0"), val = string("valid")]; tensor var_2473_strides_0 = const()[name = string("op_2473_strides_0"), val = tensor([1, 1])]; tensor var_2473_pad_0 = const()[name = string("op_2473_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_2473_dilations_0 = const()[name = string("op_2473_dilations_0"), val = tensor([1, 1])]; int32 var_2473_groups_0 = const()[name = string("op_2473_groups_0"), val = int32(1)]; tensor var_2456_to_fp16 = const()[name = string("op_2456_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(230769344)))]; tensor var_2473_cast_fp16 = conv(dilations = var_2473_dilations_0, groups = var_2473_groups_0, pad = var_2473_pad_0, pad_type = var_2473_pad_type_0, strides = var_2473_strides_0, weight = var_2456_to_fp16, x = var_2452_cast_fp16_0)[name = string("op_2473_cast_fp16")]; tensor x_141_cast_fp16 = mul(x = var_2468_cast_fp16, y = var_2473_cast_fp16)[name = string("x_141_cast_fp16")]; string hidden_states_47_pad_type_0 = const()[name = string("hidden_states_47_pad_type_0"), val = string("valid")]; tensor hidden_states_47_strides_0 = const()[name = string("hidden_states_47_strides_0"), val = tensor([1, 1])]; tensor hidden_states_47_pad_0 = const()[name = string("hidden_states_47_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_47_dilations_0 = const()[name = string("hidden_states_47_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_47_groups_0 = const()[name = string("hidden_states_47_groups_0"), val = int32(1)]; tensor var_2455_to_fp16 = const()[name = string("op_2455_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(239158016)))]; tensor hidden_states_47_cast_fp16 = conv(dilations = hidden_states_47_dilations_0, groups = hidden_states_47_groups_0, pad = hidden_states_47_pad_0, pad_type = hidden_states_47_pad_type_0, strides = hidden_states_47_strides_0, weight = var_2455_to_fp16, x = x_141_cast_fp16)[name = string("hidden_states_47_cast_fp16")]; tensor x_143_cast_fp16 = add(x = x_133_cast_fp16, y = hidden_states_47_cast_fp16)[name = string("x_143_cast_fp16")]; int32 var_2486 = const()[name = string("op_2486"), val = int32(1)]; fp16 const_84_promoted_to_fp16 = const()[name = string("const_84_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_2489_cast_fp16 = mul(x = x_143_cast_fp16, y = const_84_promoted_to_fp16)[name = string("op_2489_cast_fp16")]; bool x_145_interleave_0 = const()[name = string("x_145_interleave_0"), val = bool(false)]; tensor x_145_cast_fp16 = concat(axis = var_2486, interleave = x_145_interleave_0, values = (x_143_cast_fp16, var_2489_cast_fp16))[name = string("x_145_cast_fp16")]; tensor out_97_axes_0 = const()[name = string("out_97_axes_0"), val = tensor([1])]; fp16 var_2499_to_fp16 = const()[name = string("op_2499_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_97_cast_fp16 = layer_norm(axes = out_97_axes_0, epsilon = var_2499_to_fp16, x = x_145_cast_fp16)[name = string("out_97_cast_fp16")]; tensor layer_layers_8_input_layernorm_weight_to_fp16 = const()[name = string("layer_layers_8_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(247546688)))]; tensor out_99_cast_fp16 = mul(x = out_97_cast_fp16, y = layer_layers_8_input_layernorm_weight_to_fp16)[name = string("out_99_cast_fp16")]; tensor var_2505_split_sizes_0 = const()[name = string("op_2505_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_2505_axis_0 = const()[name = string("op_2505_axis_0"), val = int32(1)]; tensor var_2505_cast_fp16_0, tensor var_2505_cast_fp16_1 = split(axis = var_2505_axis_0, split_sizes = var_2505_split_sizes_0, x = out_99_cast_fp16)[name = string("op_2505_cast_fp16")]; string query_states_33_pad_type_0 = const()[name = string("query_states_33_pad_type_0"), val = string("valid")]; tensor query_states_33_strides_0 = const()[name = string("query_states_33_strides_0"), val = tensor([1, 1])]; tensor query_states_33_pad_0 = const()[name = string("query_states_33_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_states_33_dilations_0 = const()[name = string("query_states_33_dilations_0"), val = tensor([1, 1])]; int32 query_states_33_groups_0 = const()[name = string("query_states_33_groups_0"), val = int32(1)]; tensor var_2527_to_fp16 = const()[name = string("op_2527_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(247550848)))]; tensor query_states_33_cast_fp16 = conv(dilations = query_states_33_dilations_0, groups = query_states_33_groups_0, pad = query_states_33_pad_0, pad_type = query_states_33_pad_type_0, strides = query_states_33_strides_0, weight = var_2527_to_fp16, x = var_2505_cast_fp16_0)[name = string("query_states_33_cast_fp16")]; string key_states_33_pad_type_0 = const()[name = string("key_states_33_pad_type_0"), val = string("valid")]; tensor key_states_33_strides_0 = const()[name = string("key_states_33_strides_0"), val = tensor([1, 1])]; tensor key_states_33_pad_0 = const()[name = string("key_states_33_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_states_33_dilations_0 = const()[name = string("key_states_33_dilations_0"), val = tensor([1, 1])]; int32 key_states_33_groups_0 = const()[name = string("key_states_33_groups_0"), val = int32(1)]; tensor var_2538_to_fp16 = const()[name = string("op_2538_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(249648064)))]; tensor key_states_33_cast_fp16 = conv(dilations = key_states_33_dilations_0, groups = key_states_33_groups_0, pad = key_states_33_pad_0, pad_type = key_states_33_pad_type_0, strides = key_states_33_strides_0, weight = var_2538_to_fp16, x = var_2505_cast_fp16_0)[name = string("key_states_33_cast_fp16")]; string value_states_33_pad_type_0 = const()[name = string("value_states_33_pad_type_0"), val = string("valid")]; tensor value_states_33_strides_0 = const()[name = string("value_states_33_strides_0"), val = tensor([1, 1])]; tensor value_states_33_pad_0 = const()[name = string("value_states_33_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_states_33_dilations_0 = const()[name = string("value_states_33_dilations_0"), val = tensor([1, 1])]; int32 value_states_33_groups_0 = const()[name = string("value_states_33_groups_0"), val = int32(1)]; tensor var_2549_to_fp16 = const()[name = string("op_2549_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(249910272)))]; tensor value_states_33_cast_fp16 = conv(dilations = value_states_33_dilations_0, groups = value_states_33_groups_0, pad = value_states_33_pad_0, pad_type = value_states_33_pad_type_0, strides = value_states_33_strides_0, weight = var_2549_to_fp16, x = var_2505_cast_fp16_0)[name = string("value_states_33_cast_fp16")]; tensor var_2557 = const()[name = string("op_2557"), val = tensor([1, 16, 64, 32])]; tensor embed_33_cast_fp16 = reshape(shape = var_2557, x = query_states_33_cast_fp16)[name = string("embed_33_cast_fp16")]; tensor var_2561 = const()[name = string("op_2561"), val = tensor([1, 2, 64, 32])]; tensor var_2562_cast_fp16 = reshape(shape = var_2561, x = key_states_33_cast_fp16)[name = string("op_2562_cast_fp16")]; tensor embed_35_perm_0 = const()[name = string("embed_35_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_2568 = const()[name = string("op_2568"), val = tensor([1, 2, 64, 32])]; tensor var_2569_cast_fp16 = reshape(shape = var_2568, x = value_states_33_cast_fp16)[name = string("op_2569_cast_fp16")]; tensor value_states_35_perm_0 = const()[name = string("value_states_35_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_2573_cast_fp16 = mul(x = embed_33_cast_fp16, y = cos_1_cast_fp16)[name = string("op_2573_cast_fp16")]; tensor var_2574_split_sizes_0 = const()[name = string("op_2574_split_sizes_0"), val = tensor([32, 32])]; int32 var_2574_axis_0 = const()[name = string("op_2574_axis_0"), val = int32(-2)]; tensor var_2574_cast_fp16_0, tensor var_2574_cast_fp16_1 = split(axis = var_2574_axis_0, split_sizes = var_2574_split_sizes_0, x = embed_33_cast_fp16)[name = string("op_2574_cast_fp16")]; fp16 const_87_promoted_to_fp16 = const()[name = string("const_87_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_2576_cast_fp16 = mul(x = var_2574_cast_fp16_1, y = const_87_promoted_to_fp16)[name = string("op_2576_cast_fp16")]; int32 var_2578 = const()[name = string("op_2578"), val = int32(-2)]; bool var_2579_interleave_0 = const()[name = string("op_2579_interleave_0"), val = bool(false)]; tensor var_2579_cast_fp16 = concat(axis = var_2578, interleave = var_2579_interleave_0, values = (var_2576_cast_fp16, var_2574_cast_fp16_0))[name = string("op_2579_cast_fp16")]; tensor var_2580_cast_fp16 = mul(x = var_2579_cast_fp16, y = sin_1_cast_fp16)[name = string("op_2580_cast_fp16")]; tensor query_states_35_cast_fp16 = add(x = var_2573_cast_fp16, y = var_2580_cast_fp16)[name = string("query_states_35_cast_fp16")]; tensor embed_35_cast_fp16 = transpose(perm = embed_35_perm_0, x = var_2562_cast_fp16)[name = string("transpose_47")]; tensor var_2583_cast_fp16 = mul(x = embed_35_cast_fp16, y = cos_cast_fp16)[name = string("op_2583_cast_fp16")]; tensor var_2584_split_sizes_0 = const()[name = string("op_2584_split_sizes_0"), val = tensor([32, 32])]; int32 var_2584_axis_0 = const()[name = string("op_2584_axis_0"), val = int32(-1)]; tensor var_2584_cast_fp16_0, tensor var_2584_cast_fp16_1 = split(axis = var_2584_axis_0, split_sizes = var_2584_split_sizes_0, x = embed_35_cast_fp16)[name = string("op_2584_cast_fp16")]; fp16 const_88_promoted_to_fp16 = const()[name = string("const_88_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_2586_cast_fp16 = mul(x = var_2584_cast_fp16_1, y = const_88_promoted_to_fp16)[name = string("op_2586_cast_fp16")]; int32 var_2588 = const()[name = string("op_2588"), val = int32(-1)]; bool var_2589_interleave_0 = const()[name = string("op_2589_interleave_0"), val = bool(false)]; tensor var_2589_cast_fp16 = concat(axis = var_2588, interleave = var_2589_interleave_0, values = (var_2586_cast_fp16, var_2584_cast_fp16_0))[name = string("op_2589_cast_fp16")]; tensor var_2590_cast_fp16 = mul(x = var_2589_cast_fp16, y = sin_cast_fp16)[name = string("op_2590_cast_fp16")]; tensor key_states_35_cast_fp16 = add(x = var_2583_cast_fp16, y = var_2590_cast_fp16)[name = string("key_states_35_cast_fp16")]; tensor expand_dims_81 = const()[name = string("expand_dims_81"), val = tensor([8])]; tensor expand_dims_82 = const()[name = string("expand_dims_82"), val = tensor([0])]; tensor expand_dims_84 = const()[name = string("expand_dims_84"), val = tensor([9])]; tensor concat_67_values3_0 = const()[name = string("concat_67_values3_0"), val = tensor([0])]; int32 concat_67_axis_0 = const()[name = string("concat_67_axis_0"), val = int32(0)]; bool concat_67_interleave_0 = const()[name = string("concat_67_interleave_0"), val = bool(false)]; tensor concat_67 = concat(axis = concat_67_axis_0, interleave = concat_67_interleave_0, values = (expand_dims_81, expand_dims_82, position_id, concat_67_values3_0))[name = string("concat_67")]; tensor concat_68_values1_0 = const()[name = string("concat_68_values1_0"), val = tensor([0])]; tensor concat_68_values3_0 = const()[name = string("concat_68_values3_0"), val = tensor([0])]; int32 concat_68_axis_0 = const()[name = string("concat_68_axis_0"), val = int32(0)]; bool concat_68_interleave_0 = const()[name = string("concat_68_interleave_0"), val = bool(false)]; tensor concat_68 = concat(axis = concat_68_axis_0, interleave = concat_68_interleave_0, values = (expand_dims_84, concat_68_values1_0, var_426, concat_68_values3_0))[name = string("concat_68")]; tensor key_cache_internal_tensor_assign_9_stride_0 = const()[name = string("key_cache_internal_tensor_assign_9_stride_0"), val = tensor([1, 1, 1, 1])]; tensor key_cache_internal_tensor_assign_9_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_9_begin_mask_0"), val = tensor([false, false, false, true])]; tensor key_cache_internal_tensor_assign_9_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_9_end_mask_0"), val = tensor([false, true, false, true])]; tensor key_cache_internal_tensor_assign_9_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_9_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor key_cache_internal_tensor_assign_9_cast_fp16 = slice_update(begin = concat_67, begin_mask = key_cache_internal_tensor_assign_9_begin_mask_0, end = concat_68, end_mask = key_cache_internal_tensor_assign_9_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_9_squeeze_mask_0, stride = key_cache_internal_tensor_assign_9_stride_0, update = key_states_35_cast_fp16, x = coreml_update_state_62)[name = string("key_cache_internal_tensor_assign_9_cast_fp16")]; write_state(data = key_cache_internal_tensor_assign_9_cast_fp16, input = key_cache)[name = string("coreml_update_state_326_write_state")]; tensor coreml_update_state_64 = read_state(input = key_cache)[name = string("coreml_update_state_326")]; tensor value_cache_internal_tensor_assign_9_stride_0 = const()[name = string("value_cache_internal_tensor_assign_9_stride_0"), val = tensor([1, 1, 1, 1])]; tensor value_cache_internal_tensor_assign_9_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_9_begin_mask_0"), val = tensor([false, false, false, true])]; tensor value_cache_internal_tensor_assign_9_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_9_end_mask_0"), val = tensor([false, true, false, true])]; tensor value_cache_internal_tensor_assign_9_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_9_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor value_states_35_cast_fp16 = transpose(perm = value_states_35_perm_0, x = var_2569_cast_fp16)[name = string("transpose_46")]; tensor value_cache_internal_tensor_assign_9_cast_fp16 = slice_update(begin = concat_67, begin_mask = value_cache_internal_tensor_assign_9_begin_mask_0, end = concat_68, end_mask = value_cache_internal_tensor_assign_9_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_9_squeeze_mask_0, stride = value_cache_internal_tensor_assign_9_stride_0, update = value_states_35_cast_fp16, x = coreml_update_state_63)[name = string("value_cache_internal_tensor_assign_9_cast_fp16")]; write_state(data = value_cache_internal_tensor_assign_9_cast_fp16, input = value_cache)[name = string("coreml_update_state_327_write_state")]; tensor coreml_update_state_65 = read_state(input = value_cache)[name = string("coreml_update_state_327")]; tensor var_2633_begin_0 = const()[name = string("op_2633_begin_0"), val = tensor([8, 0, 0, 0])]; tensor var_2633_end_0 = const()[name = string("op_2633_end_0"), val = tensor([9, 2, 2048, 64])]; tensor var_2633_end_mask_0 = const()[name = string("op_2633_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_2633_cast_fp16 = slice_by_index(begin = var_2633_begin_0, end = var_2633_end_0, end_mask = var_2633_end_mask_0, x = coreml_update_state_64)[name = string("op_2633_cast_fp16")]; tensor tile_16 = const()[name = string("tile_16"), val = tensor([1, 1])]; int32 var_2636_axis_0 = const()[name = string("op_2636_axis_0"), val = int32(1)]; tensor var_2636_cast_fp16_0, tensor var_2636_cast_fp16_1 = split(axis = var_2636_axis_0, split_sizes = tile_16, x = var_2633_cast_fp16)[name = string("op_2636_cast_fp16")]; tensor var_2643_begin_0 = const()[name = string("op_2643_begin_0"), val = tensor([8, 0, 0, 0])]; tensor var_2643_end_0 = const()[name = string("op_2643_end_0"), val = tensor([9, 2, 2048, 64])]; tensor var_2643_end_mask_0 = const()[name = string("op_2643_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_2643_cast_fp16 = slice_by_index(begin = var_2643_begin_0, end = var_2643_end_0, end_mask = var_2643_end_mask_0, x = coreml_update_state_65)[name = string("op_2643_cast_fp16")]; tensor tile_17 = const()[name = string("tile_17"), val = tensor([1, 1])]; int32 var_2646_axis_0 = const()[name = string("op_2646_axis_0"), val = int32(1)]; tensor var_2646_cast_fp16_0, tensor var_2646_cast_fp16_1 = split(axis = var_2646_axis_0, split_sizes = tile_17, x = var_2643_cast_fp16)[name = string("op_2646_cast_fp16")]; tensor var_2649_split_sizes_0 = const()[name = string("op_2649_split_sizes_0"), val = tensor([8, 8])]; int32 var_2649_axis_0 = const()[name = string("op_2649_axis_0"), val = int32(1)]; tensor var_2649_cast_fp16_0, tensor var_2649_cast_fp16_1 = split(axis = var_2649_axis_0, split_sizes = var_2649_split_sizes_0, x = query_states_35_cast_fp16)[name = string("op_2649_cast_fp16")]; bool attn_weights_129_transpose_x_0 = const()[name = string("attn_weights_129_transpose_x_0"), val = bool(false)]; bool attn_weights_129_transpose_y_0 = const()[name = string("attn_weights_129_transpose_y_0"), val = bool(false)]; tensor attn_weights_129_cast_fp16 = matmul(transpose_x = attn_weights_129_transpose_x_0, transpose_y = attn_weights_129_transpose_y_0, x = var_2636_cast_fp16_0, y = var_2649_cast_fp16_0)[name = string("attn_weights_129_cast_fp16")]; fp16 _inversed_attn_weights_131_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_131_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_131_cast_fp16 = mul(x = attn_weights_129_cast_fp16, y = _inversed_attn_weights_131_y_0_to_fp16)[name = string("_inversed_attn_weights_131_cast_fp16")]; tensor attn_weights_133_cast_fp16 = add(x = _inversed_attn_weights_131_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_133_cast_fp16")]; int32 var_2656 = const()[name = string("op_2656"), val = int32(2)]; tensor attn_weights_135_cast_fp16 = softmax(axis = var_2656, x = attn_weights_133_cast_fp16)[name = string("attn_weights_135_cast_fp16")]; bool var_2662_transpose_x_1 = const()[name = string("op_2662_transpose_x_1"), val = bool(true)]; bool var_2662_transpose_y_1 = const()[name = string("op_2662_transpose_y_1"), val = bool(false)]; tensor var_2662_cast_fp16 = matmul(transpose_x = var_2662_transpose_x_1, transpose_y = var_2662_transpose_y_1, x = attn_weights_135_cast_fp16, y = var_2646_cast_fp16_0)[name = string("op_2662_cast_fp16")]; bool attn_weights_137_transpose_x_0 = const()[name = string("attn_weights_137_transpose_x_0"), val = bool(false)]; bool attn_weights_137_transpose_y_0 = const()[name = string("attn_weights_137_transpose_y_0"), val = bool(false)]; tensor attn_weights_137_cast_fp16 = matmul(transpose_x = attn_weights_137_transpose_x_0, transpose_y = attn_weights_137_transpose_y_0, x = var_2636_cast_fp16_1, y = var_2649_cast_fp16_1)[name = string("attn_weights_137_cast_fp16")]; fp16 _inversed_attn_weights_139_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_139_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_139_cast_fp16 = mul(x = attn_weights_137_cast_fp16, y = _inversed_attn_weights_139_y_0_to_fp16)[name = string("_inversed_attn_weights_139_cast_fp16")]; tensor attn_weights_141_cast_fp16 = add(x = _inversed_attn_weights_139_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_141_cast_fp16")]; int32 var_2668 = const()[name = string("op_2668"), val = int32(2)]; tensor attn_weights_143_cast_fp16 = softmax(axis = var_2668, x = attn_weights_141_cast_fp16)[name = string("attn_weights_143_cast_fp16")]; bool attn_output_49_transpose_x_1 = const()[name = string("attn_output_49_transpose_x_1"), val = bool(true)]; bool attn_output_49_transpose_y_1 = const()[name = string("attn_output_49_transpose_y_1"), val = bool(false)]; tensor attn_output_49_cast_fp16 = matmul(transpose_x = attn_output_49_transpose_x_1, transpose_y = attn_output_49_transpose_y_1, x = attn_weights_143_cast_fp16, y = var_2646_cast_fp16_1)[name = string("attn_output_49_cast_fp16")]; int32 var_2676 = const()[name = string("op_2676"), val = int32(1)]; bool attn_output_51_interleave_0 = const()[name = string("attn_output_51_interleave_0"), val = bool(false)]; tensor attn_output_51_cast_fp16 = concat(axis = var_2676, interleave = attn_output_51_interleave_0, values = (var_2662_cast_fp16, attn_output_49_cast_fp16))[name = string("attn_output_51_cast_fp16")]; tensor var_2680_perm_0 = const()[name = string("op_2680_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_2685 = const()[name = string("op_2685"), val = tensor([1, 1024, 1, 32])]; tensor var_2680_cast_fp16 = transpose(perm = var_2680_perm_0, x = attn_output_51_cast_fp16)[name = string("transpose_45")]; tensor x_149_cast_fp16 = reshape(shape = var_2685, x = var_2680_cast_fp16)[name = string("x_149_cast_fp16")]; string hidden_states_51_pad_type_0 = const()[name = string("hidden_states_51_pad_type_0"), val = string("valid")]; tensor hidden_states_51_strides_0 = const()[name = string("hidden_states_51_strides_0"), val = tensor([1, 1])]; tensor hidden_states_51_pad_0 = const()[name = string("hidden_states_51_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_51_dilations_0 = const()[name = string("hidden_states_51_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_51_groups_0 = const()[name = string("hidden_states_51_groups_0"), val = int32(1)]; tensor var_2692_to_fp16 = const()[name = string("op_2692_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(250172480)))]; tensor hidden_states_51_cast_fp16 = conv(dilations = hidden_states_51_dilations_0, groups = hidden_states_51_groups_0, pad = hidden_states_51_pad_0, pad_type = hidden_states_51_pad_type_0, strides = hidden_states_51_strides_0, weight = var_2692_to_fp16, x = x_149_cast_fp16)[name = string("hidden_states_51_cast_fp16")]; tensor x_151_cast_fp16 = add(x = x_143_cast_fp16, y = hidden_states_51_cast_fp16)[name = string("x_151_cast_fp16")]; int32 var_2704 = const()[name = string("op_2704"), val = int32(1)]; fp16 const_93_promoted_to_fp16 = const()[name = string("const_93_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_2707_cast_fp16 = mul(x = x_151_cast_fp16, y = const_93_promoted_to_fp16)[name = string("op_2707_cast_fp16")]; bool x_153_interleave_0 = const()[name = string("x_153_interleave_0"), val = bool(false)]; tensor x_153_cast_fp16 = concat(axis = var_2704, interleave = x_153_interleave_0, values = (x_151_cast_fp16, var_2707_cast_fp16))[name = string("x_153_cast_fp16")]; tensor out_103_axes_0 = const()[name = string("out_103_axes_0"), val = tensor([1])]; fp16 var_2717_to_fp16 = const()[name = string("op_2717_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_103_cast_fp16 = layer_norm(axes = out_103_axes_0, epsilon = var_2717_to_fp16, x = x_153_cast_fp16)[name = string("out_103_cast_fp16")]; tensor layer_layers_8_post_attention_layernorm_weight_to_fp16 = const()[name = string("layer_layers_8_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(252269696)))]; tensor out_105_cast_fp16 = mul(x = out_103_cast_fp16, y = layer_layers_8_post_attention_layernorm_weight_to_fp16)[name = string("out_105_cast_fp16")]; tensor var_2723_split_sizes_0 = const()[name = string("op_2723_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_2723_axis_0 = const()[name = string("op_2723_axis_0"), val = int32(1)]; tensor var_2723_cast_fp16_0, tensor var_2723_cast_fp16_1 = split(axis = var_2723_axis_0, split_sizes = var_2723_split_sizes_0, x = out_105_cast_fp16)[name = string("op_2723_cast_fp16")]; string input_17_pad_type_0 = const()[name = string("input_17_pad_type_0"), val = string("valid")]; tensor input_17_strides_0 = const()[name = string("input_17_strides_0"), val = tensor([1, 1])]; tensor input_17_pad_0 = const()[name = string("input_17_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_17_dilations_0 = const()[name = string("input_17_dilations_0"), val = tensor([1, 1])]; int32 input_17_groups_0 = const()[name = string("input_17_groups_0"), val = int32(1)]; tensor var_2728_to_fp16 = const()[name = string("op_2728_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(252273856)))]; tensor input_17_cast_fp16 = conv(dilations = input_17_dilations_0, groups = input_17_groups_0, pad = input_17_pad_0, pad_type = input_17_pad_type_0, strides = input_17_strides_0, weight = var_2728_to_fp16, x = var_2723_cast_fp16_0)[name = string("input_17_cast_fp16")]; tensor var_2739_cast_fp16 = silu(x = input_17_cast_fp16)[name = string("op_2739_cast_fp16")]; string var_2744_pad_type_0 = const()[name = string("op_2744_pad_type_0"), val = string("valid")]; tensor var_2744_strides_0 = const()[name = string("op_2744_strides_0"), val = tensor([1, 1])]; tensor var_2744_pad_0 = const()[name = string("op_2744_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_2744_dilations_0 = const()[name = string("op_2744_dilations_0"), val = tensor([1, 1])]; int32 var_2744_groups_0 = const()[name = string("op_2744_groups_0"), val = int32(1)]; tensor var_2727_to_fp16 = const()[name = string("op_2727_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(260662528)))]; tensor var_2744_cast_fp16 = conv(dilations = var_2744_dilations_0, groups = var_2744_groups_0, pad = var_2744_pad_0, pad_type = var_2744_pad_type_0, strides = var_2744_strides_0, weight = var_2727_to_fp16, x = var_2723_cast_fp16_0)[name = string("op_2744_cast_fp16")]; tensor x_159_cast_fp16 = mul(x = var_2739_cast_fp16, y = var_2744_cast_fp16)[name = string("x_159_cast_fp16")]; string hidden_states_53_pad_type_0 = const()[name = string("hidden_states_53_pad_type_0"), val = string("valid")]; tensor hidden_states_53_strides_0 = const()[name = string("hidden_states_53_strides_0"), val = tensor([1, 1])]; tensor hidden_states_53_pad_0 = const()[name = string("hidden_states_53_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_53_dilations_0 = const()[name = string("hidden_states_53_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_53_groups_0 = const()[name = string("hidden_states_53_groups_0"), val = int32(1)]; tensor var_2726_to_fp16 = const()[name = string("op_2726_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(269051200)))]; tensor hidden_states_53_cast_fp16 = conv(dilations = hidden_states_53_dilations_0, groups = hidden_states_53_groups_0, pad = hidden_states_53_pad_0, pad_type = hidden_states_53_pad_type_0, strides = hidden_states_53_strides_0, weight = var_2726_to_fp16, x = x_159_cast_fp16)[name = string("hidden_states_53_cast_fp16")]; tensor x_161_cast_fp16 = add(x = x_151_cast_fp16, y = hidden_states_53_cast_fp16)[name = string("x_161_cast_fp16")]; int32 var_2757 = const()[name = string("op_2757"), val = int32(1)]; fp16 const_94_promoted_to_fp16 = const()[name = string("const_94_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_2760_cast_fp16 = mul(x = x_161_cast_fp16, y = const_94_promoted_to_fp16)[name = string("op_2760_cast_fp16")]; bool x_163_interleave_0 = const()[name = string("x_163_interleave_0"), val = bool(false)]; tensor x_163_cast_fp16 = concat(axis = var_2757, interleave = x_163_interleave_0, values = (x_161_cast_fp16, var_2760_cast_fp16))[name = string("x_163_cast_fp16")]; tensor out_109_axes_0 = const()[name = string("out_109_axes_0"), val = tensor([1])]; fp16 var_2770_to_fp16 = const()[name = string("op_2770_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_109_cast_fp16 = layer_norm(axes = out_109_axes_0, epsilon = var_2770_to_fp16, x = x_163_cast_fp16)[name = string("out_109_cast_fp16")]; tensor layer_layers_9_input_layernorm_weight_to_fp16 = const()[name = string("layer_layers_9_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(277439872)))]; tensor out_111_cast_fp16 = mul(x = out_109_cast_fp16, y = layer_layers_9_input_layernorm_weight_to_fp16)[name = string("out_111_cast_fp16")]; tensor var_2776_split_sizes_0 = const()[name = string("op_2776_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_2776_axis_0 = const()[name = string("op_2776_axis_0"), val = int32(1)]; tensor var_2776_cast_fp16_0, tensor var_2776_cast_fp16_1 = split(axis = var_2776_axis_0, split_sizes = var_2776_split_sizes_0, x = out_111_cast_fp16)[name = string("op_2776_cast_fp16")]; string query_states_37_pad_type_0 = const()[name = string("query_states_37_pad_type_0"), val = string("valid")]; tensor query_states_37_strides_0 = const()[name = string("query_states_37_strides_0"), val = tensor([1, 1])]; tensor query_states_37_pad_0 = const()[name = string("query_states_37_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_states_37_dilations_0 = const()[name = string("query_states_37_dilations_0"), val = tensor([1, 1])]; int32 query_states_37_groups_0 = const()[name = string("query_states_37_groups_0"), val = int32(1)]; tensor var_2798_to_fp16 = const()[name = string("op_2798_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(277444032)))]; tensor query_states_37_cast_fp16 = conv(dilations = query_states_37_dilations_0, groups = query_states_37_groups_0, pad = query_states_37_pad_0, pad_type = query_states_37_pad_type_0, strides = query_states_37_strides_0, weight = var_2798_to_fp16, x = var_2776_cast_fp16_0)[name = string("query_states_37_cast_fp16")]; string key_states_37_pad_type_0 = const()[name = string("key_states_37_pad_type_0"), val = string("valid")]; tensor key_states_37_strides_0 = const()[name = string("key_states_37_strides_0"), val = tensor([1, 1])]; tensor key_states_37_pad_0 = const()[name = string("key_states_37_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_states_37_dilations_0 = const()[name = string("key_states_37_dilations_0"), val = tensor([1, 1])]; int32 key_states_37_groups_0 = const()[name = string("key_states_37_groups_0"), val = int32(1)]; tensor var_2809_to_fp16 = const()[name = string("op_2809_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(279541248)))]; tensor key_states_37_cast_fp16 = conv(dilations = key_states_37_dilations_0, groups = key_states_37_groups_0, pad = key_states_37_pad_0, pad_type = key_states_37_pad_type_0, strides = key_states_37_strides_0, weight = var_2809_to_fp16, x = var_2776_cast_fp16_0)[name = string("key_states_37_cast_fp16")]; string value_states_37_pad_type_0 = const()[name = string("value_states_37_pad_type_0"), val = string("valid")]; tensor value_states_37_strides_0 = const()[name = string("value_states_37_strides_0"), val = tensor([1, 1])]; tensor value_states_37_pad_0 = const()[name = string("value_states_37_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_states_37_dilations_0 = const()[name = string("value_states_37_dilations_0"), val = tensor([1, 1])]; int32 value_states_37_groups_0 = const()[name = string("value_states_37_groups_0"), val = int32(1)]; tensor var_2820_to_fp16 = const()[name = string("op_2820_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(279803456)))]; tensor value_states_37_cast_fp16 = conv(dilations = value_states_37_dilations_0, groups = value_states_37_groups_0, pad = value_states_37_pad_0, pad_type = value_states_37_pad_type_0, strides = value_states_37_strides_0, weight = var_2820_to_fp16, x = var_2776_cast_fp16_0)[name = string("value_states_37_cast_fp16")]; tensor var_2828 = const()[name = string("op_2828"), val = tensor([1, 16, 64, 32])]; tensor embed_37_cast_fp16 = reshape(shape = var_2828, x = query_states_37_cast_fp16)[name = string("embed_37_cast_fp16")]; tensor var_2832 = const()[name = string("op_2832"), val = tensor([1, 2, 64, 32])]; tensor var_2833_cast_fp16 = reshape(shape = var_2832, x = key_states_37_cast_fp16)[name = string("op_2833_cast_fp16")]; tensor embed_39_perm_0 = const()[name = string("embed_39_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_2839 = const()[name = string("op_2839"), val = tensor([1, 2, 64, 32])]; tensor var_2840_cast_fp16 = reshape(shape = var_2839, x = value_states_37_cast_fp16)[name = string("op_2840_cast_fp16")]; tensor value_states_39_perm_0 = const()[name = string("value_states_39_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_2844_cast_fp16 = mul(x = embed_37_cast_fp16, y = cos_1_cast_fp16)[name = string("op_2844_cast_fp16")]; tensor var_2845_split_sizes_0 = const()[name = string("op_2845_split_sizes_0"), val = tensor([32, 32])]; int32 var_2845_axis_0 = const()[name = string("op_2845_axis_0"), val = int32(-2)]; tensor var_2845_cast_fp16_0, tensor var_2845_cast_fp16_1 = split(axis = var_2845_axis_0, split_sizes = var_2845_split_sizes_0, x = embed_37_cast_fp16)[name = string("op_2845_cast_fp16")]; fp16 const_97_promoted_to_fp16 = const()[name = string("const_97_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_2847_cast_fp16 = mul(x = var_2845_cast_fp16_1, y = const_97_promoted_to_fp16)[name = string("op_2847_cast_fp16")]; int32 var_2849 = const()[name = string("op_2849"), val = int32(-2)]; bool var_2850_interleave_0 = const()[name = string("op_2850_interleave_0"), val = bool(false)]; tensor var_2850_cast_fp16 = concat(axis = var_2849, interleave = var_2850_interleave_0, values = (var_2847_cast_fp16, var_2845_cast_fp16_0))[name = string("op_2850_cast_fp16")]; tensor var_2851_cast_fp16 = mul(x = var_2850_cast_fp16, y = sin_1_cast_fp16)[name = string("op_2851_cast_fp16")]; tensor query_states_39_cast_fp16 = add(x = var_2844_cast_fp16, y = var_2851_cast_fp16)[name = string("query_states_39_cast_fp16")]; tensor embed_39_cast_fp16 = transpose(perm = embed_39_perm_0, x = var_2833_cast_fp16)[name = string("transpose_44")]; tensor var_2854_cast_fp16 = mul(x = embed_39_cast_fp16, y = cos_cast_fp16)[name = string("op_2854_cast_fp16")]; tensor var_2855_split_sizes_0 = const()[name = string("op_2855_split_sizes_0"), val = tensor([32, 32])]; int32 var_2855_axis_0 = const()[name = string("op_2855_axis_0"), val = int32(-1)]; tensor var_2855_cast_fp16_0, tensor var_2855_cast_fp16_1 = split(axis = var_2855_axis_0, split_sizes = var_2855_split_sizes_0, x = embed_39_cast_fp16)[name = string("op_2855_cast_fp16")]; fp16 const_98_promoted_to_fp16 = const()[name = string("const_98_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_2857_cast_fp16 = mul(x = var_2855_cast_fp16_1, y = const_98_promoted_to_fp16)[name = string("op_2857_cast_fp16")]; int32 var_2859 = const()[name = string("op_2859"), val = int32(-1)]; bool var_2860_interleave_0 = const()[name = string("op_2860_interleave_0"), val = bool(false)]; tensor var_2860_cast_fp16 = concat(axis = var_2859, interleave = var_2860_interleave_0, values = (var_2857_cast_fp16, var_2855_cast_fp16_0))[name = string("op_2860_cast_fp16")]; tensor var_2861_cast_fp16 = mul(x = var_2860_cast_fp16, y = sin_cast_fp16)[name = string("op_2861_cast_fp16")]; tensor key_states_39_cast_fp16 = add(x = var_2854_cast_fp16, y = var_2861_cast_fp16)[name = string("key_states_39_cast_fp16")]; tensor expand_dims_91 = const()[name = string("expand_dims_91"), val = tensor([9])]; tensor expand_dims_92 = const()[name = string("expand_dims_92"), val = tensor([0])]; tensor expand_dims_94 = const()[name = string("expand_dims_94"), val = tensor([10])]; tensor concat_75_values3_0 = const()[name = string("concat_75_values3_0"), val = tensor([0])]; int32 concat_75_axis_0 = const()[name = string("concat_75_axis_0"), val = int32(0)]; bool concat_75_interleave_0 = const()[name = string("concat_75_interleave_0"), val = bool(false)]; tensor concat_75 = concat(axis = concat_75_axis_0, interleave = concat_75_interleave_0, values = (expand_dims_91, expand_dims_92, position_id, concat_75_values3_0))[name = string("concat_75")]; tensor concat_76_values1_0 = const()[name = string("concat_76_values1_0"), val = tensor([0])]; tensor concat_76_values3_0 = const()[name = string("concat_76_values3_0"), val = tensor([0])]; int32 concat_76_axis_0 = const()[name = string("concat_76_axis_0"), val = int32(0)]; bool concat_76_interleave_0 = const()[name = string("concat_76_interleave_0"), val = bool(false)]; tensor concat_76 = concat(axis = concat_76_axis_0, interleave = concat_76_interleave_0, values = (expand_dims_94, concat_76_values1_0, var_426, concat_76_values3_0))[name = string("concat_76")]; tensor key_cache_internal_tensor_assign_10_stride_0 = const()[name = string("key_cache_internal_tensor_assign_10_stride_0"), val = tensor([1, 1, 1, 1])]; tensor key_cache_internal_tensor_assign_10_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_10_begin_mask_0"), val = tensor([false, false, false, true])]; tensor key_cache_internal_tensor_assign_10_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_10_end_mask_0"), val = tensor([false, true, false, true])]; tensor key_cache_internal_tensor_assign_10_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_10_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor key_cache_internal_tensor_assign_10_cast_fp16 = slice_update(begin = concat_75, begin_mask = key_cache_internal_tensor_assign_10_begin_mask_0, end = concat_76, end_mask = key_cache_internal_tensor_assign_10_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_10_squeeze_mask_0, stride = key_cache_internal_tensor_assign_10_stride_0, update = key_states_39_cast_fp16, x = coreml_update_state_64)[name = string("key_cache_internal_tensor_assign_10_cast_fp16")]; write_state(data = key_cache_internal_tensor_assign_10_cast_fp16, input = key_cache)[name = string("coreml_update_state_328_write_state")]; tensor coreml_update_state_66 = read_state(input = key_cache)[name = string("coreml_update_state_328")]; tensor value_cache_internal_tensor_assign_10_stride_0 = const()[name = string("value_cache_internal_tensor_assign_10_stride_0"), val = tensor([1, 1, 1, 1])]; tensor value_cache_internal_tensor_assign_10_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_10_begin_mask_0"), val = tensor([false, false, false, true])]; tensor value_cache_internal_tensor_assign_10_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_10_end_mask_0"), val = tensor([false, true, false, true])]; tensor value_cache_internal_tensor_assign_10_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_10_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor value_states_39_cast_fp16 = transpose(perm = value_states_39_perm_0, x = var_2840_cast_fp16)[name = string("transpose_43")]; tensor value_cache_internal_tensor_assign_10_cast_fp16 = slice_update(begin = concat_75, begin_mask = value_cache_internal_tensor_assign_10_begin_mask_0, end = concat_76, end_mask = value_cache_internal_tensor_assign_10_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_10_squeeze_mask_0, stride = value_cache_internal_tensor_assign_10_stride_0, update = value_states_39_cast_fp16, x = coreml_update_state_65)[name = string("value_cache_internal_tensor_assign_10_cast_fp16")]; write_state(data = value_cache_internal_tensor_assign_10_cast_fp16, input = value_cache)[name = string("coreml_update_state_329_write_state")]; tensor coreml_update_state_67 = read_state(input = value_cache)[name = string("coreml_update_state_329")]; tensor var_2904_begin_0 = const()[name = string("op_2904_begin_0"), val = tensor([9, 0, 0, 0])]; tensor var_2904_end_0 = const()[name = string("op_2904_end_0"), val = tensor([10, 2, 2048, 64])]; tensor var_2904_end_mask_0 = const()[name = string("op_2904_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_2904_cast_fp16 = slice_by_index(begin = var_2904_begin_0, end = var_2904_end_0, end_mask = var_2904_end_mask_0, x = coreml_update_state_66)[name = string("op_2904_cast_fp16")]; tensor tile_18 = const()[name = string("tile_18"), val = tensor([1, 1])]; int32 var_2907_axis_0 = const()[name = string("op_2907_axis_0"), val = int32(1)]; tensor var_2907_cast_fp16_0, tensor var_2907_cast_fp16_1 = split(axis = var_2907_axis_0, split_sizes = tile_18, x = var_2904_cast_fp16)[name = string("op_2907_cast_fp16")]; tensor var_2914_begin_0 = const()[name = string("op_2914_begin_0"), val = tensor([9, 0, 0, 0])]; tensor var_2914_end_0 = const()[name = string("op_2914_end_0"), val = tensor([10, 2, 2048, 64])]; tensor var_2914_end_mask_0 = const()[name = string("op_2914_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_2914_cast_fp16 = slice_by_index(begin = var_2914_begin_0, end = var_2914_end_0, end_mask = var_2914_end_mask_0, x = coreml_update_state_67)[name = string("op_2914_cast_fp16")]; tensor tile_19 = const()[name = string("tile_19"), val = tensor([1, 1])]; int32 var_2917_axis_0 = const()[name = string("op_2917_axis_0"), val = int32(1)]; tensor var_2917_cast_fp16_0, tensor var_2917_cast_fp16_1 = split(axis = var_2917_axis_0, split_sizes = tile_19, x = var_2914_cast_fp16)[name = string("op_2917_cast_fp16")]; tensor var_2920_split_sizes_0 = const()[name = string("op_2920_split_sizes_0"), val = tensor([8, 8])]; int32 var_2920_axis_0 = const()[name = string("op_2920_axis_0"), val = int32(1)]; tensor var_2920_cast_fp16_0, tensor var_2920_cast_fp16_1 = split(axis = var_2920_axis_0, split_sizes = var_2920_split_sizes_0, x = query_states_39_cast_fp16)[name = string("op_2920_cast_fp16")]; bool attn_weights_145_transpose_x_0 = const()[name = string("attn_weights_145_transpose_x_0"), val = bool(false)]; bool attn_weights_145_transpose_y_0 = const()[name = string("attn_weights_145_transpose_y_0"), val = bool(false)]; tensor attn_weights_145_cast_fp16 = matmul(transpose_x = attn_weights_145_transpose_x_0, transpose_y = attn_weights_145_transpose_y_0, x = var_2907_cast_fp16_0, y = var_2920_cast_fp16_0)[name = string("attn_weights_145_cast_fp16")]; fp16 _inversed_attn_weights_147_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_147_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_147_cast_fp16 = mul(x = attn_weights_145_cast_fp16, y = _inversed_attn_weights_147_y_0_to_fp16)[name = string("_inversed_attn_weights_147_cast_fp16")]; tensor attn_weights_149_cast_fp16 = add(x = _inversed_attn_weights_147_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_149_cast_fp16")]; int32 var_2927 = const()[name = string("op_2927"), val = int32(2)]; tensor attn_weights_151_cast_fp16 = softmax(axis = var_2927, x = attn_weights_149_cast_fp16)[name = string("attn_weights_151_cast_fp16")]; bool var_2933_transpose_x_1 = const()[name = string("op_2933_transpose_x_1"), val = bool(true)]; bool var_2933_transpose_y_1 = const()[name = string("op_2933_transpose_y_1"), val = bool(false)]; tensor var_2933_cast_fp16 = matmul(transpose_x = var_2933_transpose_x_1, transpose_y = var_2933_transpose_y_1, x = attn_weights_151_cast_fp16, y = var_2917_cast_fp16_0)[name = string("op_2933_cast_fp16")]; bool attn_weights_153_transpose_x_0 = const()[name = string("attn_weights_153_transpose_x_0"), val = bool(false)]; bool attn_weights_153_transpose_y_0 = const()[name = string("attn_weights_153_transpose_y_0"), val = bool(false)]; tensor attn_weights_153_cast_fp16 = matmul(transpose_x = attn_weights_153_transpose_x_0, transpose_y = attn_weights_153_transpose_y_0, x = var_2907_cast_fp16_1, y = var_2920_cast_fp16_1)[name = string("attn_weights_153_cast_fp16")]; fp16 _inversed_attn_weights_155_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_155_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_155_cast_fp16 = mul(x = attn_weights_153_cast_fp16, y = _inversed_attn_weights_155_y_0_to_fp16)[name = string("_inversed_attn_weights_155_cast_fp16")]; tensor attn_weights_157_cast_fp16 = add(x = _inversed_attn_weights_155_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_157_cast_fp16")]; int32 var_2939 = const()[name = string("op_2939"), val = int32(2)]; tensor attn_weights_159_cast_fp16 = softmax(axis = var_2939, x = attn_weights_157_cast_fp16)[name = string("attn_weights_159_cast_fp16")]; bool attn_output_55_transpose_x_1 = const()[name = string("attn_output_55_transpose_x_1"), val = bool(true)]; bool attn_output_55_transpose_y_1 = const()[name = string("attn_output_55_transpose_y_1"), val = bool(false)]; tensor attn_output_55_cast_fp16 = matmul(transpose_x = attn_output_55_transpose_x_1, transpose_y = attn_output_55_transpose_y_1, x = attn_weights_159_cast_fp16, y = var_2917_cast_fp16_1)[name = string("attn_output_55_cast_fp16")]; int32 var_2947 = const()[name = string("op_2947"), val = int32(1)]; bool attn_output_57_interleave_0 = const()[name = string("attn_output_57_interleave_0"), val = bool(false)]; tensor attn_output_57_cast_fp16 = concat(axis = var_2947, interleave = attn_output_57_interleave_0, values = (var_2933_cast_fp16, attn_output_55_cast_fp16))[name = string("attn_output_57_cast_fp16")]; tensor var_2951_perm_0 = const()[name = string("op_2951_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_2956 = const()[name = string("op_2956"), val = tensor([1, 1024, 1, 32])]; tensor var_2951_cast_fp16 = transpose(perm = var_2951_perm_0, x = attn_output_57_cast_fp16)[name = string("transpose_42")]; tensor x_167_cast_fp16 = reshape(shape = var_2956, x = var_2951_cast_fp16)[name = string("x_167_cast_fp16")]; string hidden_states_57_pad_type_0 = const()[name = string("hidden_states_57_pad_type_0"), val = string("valid")]; tensor hidden_states_57_strides_0 = const()[name = string("hidden_states_57_strides_0"), val = tensor([1, 1])]; tensor hidden_states_57_pad_0 = const()[name = string("hidden_states_57_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_57_dilations_0 = const()[name = string("hidden_states_57_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_57_groups_0 = const()[name = string("hidden_states_57_groups_0"), val = int32(1)]; tensor var_2963_to_fp16 = const()[name = string("op_2963_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(280065664)))]; tensor hidden_states_57_cast_fp16 = conv(dilations = hidden_states_57_dilations_0, groups = hidden_states_57_groups_0, pad = hidden_states_57_pad_0, pad_type = hidden_states_57_pad_type_0, strides = hidden_states_57_strides_0, weight = var_2963_to_fp16, x = x_167_cast_fp16)[name = string("hidden_states_57_cast_fp16")]; tensor x_169_cast_fp16 = add(x = x_161_cast_fp16, y = hidden_states_57_cast_fp16)[name = string("x_169_cast_fp16")]; int32 var_2975 = const()[name = string("op_2975"), val = int32(1)]; fp16 const_103_promoted_to_fp16 = const()[name = string("const_103_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_2978_cast_fp16 = mul(x = x_169_cast_fp16, y = const_103_promoted_to_fp16)[name = string("op_2978_cast_fp16")]; bool x_171_interleave_0 = const()[name = string("x_171_interleave_0"), val = bool(false)]; tensor x_171_cast_fp16 = concat(axis = var_2975, interleave = x_171_interleave_0, values = (x_169_cast_fp16, var_2978_cast_fp16))[name = string("x_171_cast_fp16")]; tensor out_115_axes_0 = const()[name = string("out_115_axes_0"), val = tensor([1])]; fp16 var_2988_to_fp16 = const()[name = string("op_2988_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_115_cast_fp16 = layer_norm(axes = out_115_axes_0, epsilon = var_2988_to_fp16, x = x_171_cast_fp16)[name = string("out_115_cast_fp16")]; tensor layer_layers_9_post_attention_layernorm_weight_to_fp16 = const()[name = string("layer_layers_9_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(282162880)))]; tensor out_117_cast_fp16 = mul(x = out_115_cast_fp16, y = layer_layers_9_post_attention_layernorm_weight_to_fp16)[name = string("out_117_cast_fp16")]; tensor var_2994_split_sizes_0 = const()[name = string("op_2994_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_2994_axis_0 = const()[name = string("op_2994_axis_0"), val = int32(1)]; tensor var_2994_cast_fp16_0, tensor var_2994_cast_fp16_1 = split(axis = var_2994_axis_0, split_sizes = var_2994_split_sizes_0, x = out_117_cast_fp16)[name = string("op_2994_cast_fp16")]; string input_19_pad_type_0 = const()[name = string("input_19_pad_type_0"), val = string("valid")]; tensor input_19_strides_0 = const()[name = string("input_19_strides_0"), val = tensor([1, 1])]; tensor input_19_pad_0 = const()[name = string("input_19_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_19_dilations_0 = const()[name = string("input_19_dilations_0"), val = tensor([1, 1])]; int32 input_19_groups_0 = const()[name = string("input_19_groups_0"), val = int32(1)]; tensor var_2999_to_fp16 = const()[name = string("op_2999_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(282167040)))]; tensor input_19_cast_fp16 = conv(dilations = input_19_dilations_0, groups = input_19_groups_0, pad = input_19_pad_0, pad_type = input_19_pad_type_0, strides = input_19_strides_0, weight = var_2999_to_fp16, x = var_2994_cast_fp16_0)[name = string("input_19_cast_fp16")]; tensor var_3010_cast_fp16 = silu(x = input_19_cast_fp16)[name = string("op_3010_cast_fp16")]; string var_3015_pad_type_0 = const()[name = string("op_3015_pad_type_0"), val = string("valid")]; tensor var_3015_strides_0 = const()[name = string("op_3015_strides_0"), val = tensor([1, 1])]; tensor var_3015_pad_0 = const()[name = string("op_3015_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_3015_dilations_0 = const()[name = string("op_3015_dilations_0"), val = tensor([1, 1])]; int32 var_3015_groups_0 = const()[name = string("op_3015_groups_0"), val = int32(1)]; tensor var_2998_to_fp16 = const()[name = string("op_2998_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(290555712)))]; tensor var_3015_cast_fp16 = conv(dilations = var_3015_dilations_0, groups = var_3015_groups_0, pad = var_3015_pad_0, pad_type = var_3015_pad_type_0, strides = var_3015_strides_0, weight = var_2998_to_fp16, x = var_2994_cast_fp16_0)[name = string("op_3015_cast_fp16")]; tensor x_177_cast_fp16 = mul(x = var_3010_cast_fp16, y = var_3015_cast_fp16)[name = string("x_177_cast_fp16")]; string hidden_states_59_pad_type_0 = const()[name = string("hidden_states_59_pad_type_0"), val = string("valid")]; tensor hidden_states_59_strides_0 = const()[name = string("hidden_states_59_strides_0"), val = tensor([1, 1])]; tensor hidden_states_59_pad_0 = const()[name = string("hidden_states_59_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_59_dilations_0 = const()[name = string("hidden_states_59_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_59_groups_0 = const()[name = string("hidden_states_59_groups_0"), val = int32(1)]; tensor var_2997_to_fp16 = const()[name = string("op_2997_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(298944384)))]; tensor hidden_states_59_cast_fp16 = conv(dilations = hidden_states_59_dilations_0, groups = hidden_states_59_groups_0, pad = hidden_states_59_pad_0, pad_type = hidden_states_59_pad_type_0, strides = hidden_states_59_strides_0, weight = var_2997_to_fp16, x = x_177_cast_fp16)[name = string("hidden_states_59_cast_fp16")]; tensor x_179_cast_fp16 = add(x = x_169_cast_fp16, y = hidden_states_59_cast_fp16)[name = string("x_179_cast_fp16")]; int32 var_3028 = const()[name = string("op_3028"), val = int32(1)]; fp16 const_104_promoted_to_fp16 = const()[name = string("const_104_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_3031_cast_fp16 = mul(x = x_179_cast_fp16, y = const_104_promoted_to_fp16)[name = string("op_3031_cast_fp16")]; bool x_181_interleave_0 = const()[name = string("x_181_interleave_0"), val = bool(false)]; tensor x_181_cast_fp16 = concat(axis = var_3028, interleave = x_181_interleave_0, values = (x_179_cast_fp16, var_3031_cast_fp16))[name = string("x_181_cast_fp16")]; tensor out_121_axes_0 = const()[name = string("out_121_axes_0"), val = tensor([1])]; fp16 var_3041_to_fp16 = const()[name = string("op_3041_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_121_cast_fp16 = layer_norm(axes = out_121_axes_0, epsilon = var_3041_to_fp16, x = x_181_cast_fp16)[name = string("out_121_cast_fp16")]; tensor layer_layers_10_input_layernorm_weight_to_fp16 = const()[name = string("layer_layers_10_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(307333056)))]; tensor out_123_cast_fp16 = mul(x = out_121_cast_fp16, y = layer_layers_10_input_layernorm_weight_to_fp16)[name = string("out_123_cast_fp16")]; tensor var_3047_split_sizes_0 = const()[name = string("op_3047_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_3047_axis_0 = const()[name = string("op_3047_axis_0"), val = int32(1)]; tensor var_3047_cast_fp16_0, tensor var_3047_cast_fp16_1 = split(axis = var_3047_axis_0, split_sizes = var_3047_split_sizes_0, x = out_123_cast_fp16)[name = string("op_3047_cast_fp16")]; string query_states_41_pad_type_0 = const()[name = string("query_states_41_pad_type_0"), val = string("valid")]; tensor query_states_41_strides_0 = const()[name = string("query_states_41_strides_0"), val = tensor([1, 1])]; tensor query_states_41_pad_0 = const()[name = string("query_states_41_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_states_41_dilations_0 = const()[name = string("query_states_41_dilations_0"), val = tensor([1, 1])]; int32 query_states_41_groups_0 = const()[name = string("query_states_41_groups_0"), val = int32(1)]; tensor var_3069_to_fp16 = const()[name = string("op_3069_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(307337216)))]; tensor query_states_41_cast_fp16 = conv(dilations = query_states_41_dilations_0, groups = query_states_41_groups_0, pad = query_states_41_pad_0, pad_type = query_states_41_pad_type_0, strides = query_states_41_strides_0, weight = var_3069_to_fp16, x = var_3047_cast_fp16_0)[name = string("query_states_41_cast_fp16")]; string key_states_41_pad_type_0 = const()[name = string("key_states_41_pad_type_0"), val = string("valid")]; tensor key_states_41_strides_0 = const()[name = string("key_states_41_strides_0"), val = tensor([1, 1])]; tensor key_states_41_pad_0 = const()[name = string("key_states_41_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_states_41_dilations_0 = const()[name = string("key_states_41_dilations_0"), val = tensor([1, 1])]; int32 key_states_41_groups_0 = const()[name = string("key_states_41_groups_0"), val = int32(1)]; tensor var_3080_to_fp16 = const()[name = string("op_3080_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(309434432)))]; tensor key_states_41_cast_fp16 = conv(dilations = key_states_41_dilations_0, groups = key_states_41_groups_0, pad = key_states_41_pad_0, pad_type = key_states_41_pad_type_0, strides = key_states_41_strides_0, weight = var_3080_to_fp16, x = var_3047_cast_fp16_0)[name = string("key_states_41_cast_fp16")]; string value_states_41_pad_type_0 = const()[name = string("value_states_41_pad_type_0"), val = string("valid")]; tensor value_states_41_strides_0 = const()[name = string("value_states_41_strides_0"), val = tensor([1, 1])]; tensor value_states_41_pad_0 = const()[name = string("value_states_41_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_states_41_dilations_0 = const()[name = string("value_states_41_dilations_0"), val = tensor([1, 1])]; int32 value_states_41_groups_0 = const()[name = string("value_states_41_groups_0"), val = int32(1)]; tensor var_3091_to_fp16 = const()[name = string("op_3091_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(309696640)))]; tensor value_states_41_cast_fp16 = conv(dilations = value_states_41_dilations_0, groups = value_states_41_groups_0, pad = value_states_41_pad_0, pad_type = value_states_41_pad_type_0, strides = value_states_41_strides_0, weight = var_3091_to_fp16, x = var_3047_cast_fp16_0)[name = string("value_states_41_cast_fp16")]; tensor var_3099 = const()[name = string("op_3099"), val = tensor([1, 16, 64, 32])]; tensor embed_41_cast_fp16 = reshape(shape = var_3099, x = query_states_41_cast_fp16)[name = string("embed_41_cast_fp16")]; tensor var_3103 = const()[name = string("op_3103"), val = tensor([1, 2, 64, 32])]; tensor var_3104_cast_fp16 = reshape(shape = var_3103, x = key_states_41_cast_fp16)[name = string("op_3104_cast_fp16")]; tensor embed_43_perm_0 = const()[name = string("embed_43_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_3110 = const()[name = string("op_3110"), val = tensor([1, 2, 64, 32])]; tensor var_3111_cast_fp16 = reshape(shape = var_3110, x = value_states_41_cast_fp16)[name = string("op_3111_cast_fp16")]; tensor value_states_43_perm_0 = const()[name = string("value_states_43_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_3115_cast_fp16 = mul(x = embed_41_cast_fp16, y = cos_1_cast_fp16)[name = string("op_3115_cast_fp16")]; tensor var_3116_split_sizes_0 = const()[name = string("op_3116_split_sizes_0"), val = tensor([32, 32])]; int32 var_3116_axis_0 = const()[name = string("op_3116_axis_0"), val = int32(-2)]; tensor var_3116_cast_fp16_0, tensor var_3116_cast_fp16_1 = split(axis = var_3116_axis_0, split_sizes = var_3116_split_sizes_0, x = embed_41_cast_fp16)[name = string("op_3116_cast_fp16")]; fp16 const_107_promoted_to_fp16 = const()[name = string("const_107_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_3118_cast_fp16 = mul(x = var_3116_cast_fp16_1, y = const_107_promoted_to_fp16)[name = string("op_3118_cast_fp16")]; int32 var_3120 = const()[name = string("op_3120"), val = int32(-2)]; bool var_3121_interleave_0 = const()[name = string("op_3121_interleave_0"), val = bool(false)]; tensor var_3121_cast_fp16 = concat(axis = var_3120, interleave = var_3121_interleave_0, values = (var_3118_cast_fp16, var_3116_cast_fp16_0))[name = string("op_3121_cast_fp16")]; tensor var_3122_cast_fp16 = mul(x = var_3121_cast_fp16, y = sin_1_cast_fp16)[name = string("op_3122_cast_fp16")]; tensor query_states_43_cast_fp16 = add(x = var_3115_cast_fp16, y = var_3122_cast_fp16)[name = string("query_states_43_cast_fp16")]; tensor embed_43_cast_fp16 = transpose(perm = embed_43_perm_0, x = var_3104_cast_fp16)[name = string("transpose_41")]; tensor var_3125_cast_fp16 = mul(x = embed_43_cast_fp16, y = cos_cast_fp16)[name = string("op_3125_cast_fp16")]; tensor var_3126_split_sizes_0 = const()[name = string("op_3126_split_sizes_0"), val = tensor([32, 32])]; int32 var_3126_axis_0 = const()[name = string("op_3126_axis_0"), val = int32(-1)]; tensor var_3126_cast_fp16_0, tensor var_3126_cast_fp16_1 = split(axis = var_3126_axis_0, split_sizes = var_3126_split_sizes_0, x = embed_43_cast_fp16)[name = string("op_3126_cast_fp16")]; fp16 const_108_promoted_to_fp16 = const()[name = string("const_108_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_3128_cast_fp16 = mul(x = var_3126_cast_fp16_1, y = const_108_promoted_to_fp16)[name = string("op_3128_cast_fp16")]; int32 var_3130 = const()[name = string("op_3130"), val = int32(-1)]; bool var_3131_interleave_0 = const()[name = string("op_3131_interleave_0"), val = bool(false)]; tensor var_3131_cast_fp16 = concat(axis = var_3130, interleave = var_3131_interleave_0, values = (var_3128_cast_fp16, var_3126_cast_fp16_0))[name = string("op_3131_cast_fp16")]; tensor var_3132_cast_fp16 = mul(x = var_3131_cast_fp16, y = sin_cast_fp16)[name = string("op_3132_cast_fp16")]; tensor key_states_43_cast_fp16 = add(x = var_3125_cast_fp16, y = var_3132_cast_fp16)[name = string("key_states_43_cast_fp16")]; tensor expand_dims_101 = const()[name = string("expand_dims_101"), val = tensor([10])]; tensor expand_dims_102 = const()[name = string("expand_dims_102"), val = tensor([0])]; tensor expand_dims_104 = const()[name = string("expand_dims_104"), val = tensor([11])]; tensor concat_83_values3_0 = const()[name = string("concat_83_values3_0"), val = tensor([0])]; int32 concat_83_axis_0 = const()[name = string("concat_83_axis_0"), val = int32(0)]; bool concat_83_interleave_0 = const()[name = string("concat_83_interleave_0"), val = bool(false)]; tensor concat_83 = concat(axis = concat_83_axis_0, interleave = concat_83_interleave_0, values = (expand_dims_101, expand_dims_102, position_id, concat_83_values3_0))[name = string("concat_83")]; tensor concat_84_values1_0 = const()[name = string("concat_84_values1_0"), val = tensor([0])]; tensor concat_84_values3_0 = const()[name = string("concat_84_values3_0"), val = tensor([0])]; int32 concat_84_axis_0 = const()[name = string("concat_84_axis_0"), val = int32(0)]; bool concat_84_interleave_0 = const()[name = string("concat_84_interleave_0"), val = bool(false)]; tensor concat_84 = concat(axis = concat_84_axis_0, interleave = concat_84_interleave_0, values = (expand_dims_104, concat_84_values1_0, var_426, concat_84_values3_0))[name = string("concat_84")]; tensor key_cache_internal_tensor_assign_11_stride_0 = const()[name = string("key_cache_internal_tensor_assign_11_stride_0"), val = tensor([1, 1, 1, 1])]; tensor key_cache_internal_tensor_assign_11_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_11_begin_mask_0"), val = tensor([false, false, false, true])]; tensor key_cache_internal_tensor_assign_11_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_11_end_mask_0"), val = tensor([false, true, false, true])]; tensor key_cache_internal_tensor_assign_11_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_11_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor key_cache_internal_tensor_assign_11_cast_fp16 = slice_update(begin = concat_83, begin_mask = key_cache_internal_tensor_assign_11_begin_mask_0, end = concat_84, end_mask = key_cache_internal_tensor_assign_11_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_11_squeeze_mask_0, stride = key_cache_internal_tensor_assign_11_stride_0, update = key_states_43_cast_fp16, x = coreml_update_state_66)[name = string("key_cache_internal_tensor_assign_11_cast_fp16")]; write_state(data = key_cache_internal_tensor_assign_11_cast_fp16, input = key_cache)[name = string("coreml_update_state_330_write_state")]; tensor coreml_update_state_68 = read_state(input = key_cache)[name = string("coreml_update_state_330")]; tensor value_cache_internal_tensor_assign_11_stride_0 = const()[name = string("value_cache_internal_tensor_assign_11_stride_0"), val = tensor([1, 1, 1, 1])]; tensor value_cache_internal_tensor_assign_11_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_11_begin_mask_0"), val = tensor([false, false, false, true])]; tensor value_cache_internal_tensor_assign_11_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_11_end_mask_0"), val = tensor([false, true, false, true])]; tensor value_cache_internal_tensor_assign_11_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_11_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor value_states_43_cast_fp16 = transpose(perm = value_states_43_perm_0, x = var_3111_cast_fp16)[name = string("transpose_40")]; tensor value_cache_internal_tensor_assign_11_cast_fp16 = slice_update(begin = concat_83, begin_mask = value_cache_internal_tensor_assign_11_begin_mask_0, end = concat_84, end_mask = value_cache_internal_tensor_assign_11_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_11_squeeze_mask_0, stride = value_cache_internal_tensor_assign_11_stride_0, update = value_states_43_cast_fp16, x = coreml_update_state_67)[name = string("value_cache_internal_tensor_assign_11_cast_fp16")]; write_state(data = value_cache_internal_tensor_assign_11_cast_fp16, input = value_cache)[name = string("coreml_update_state_331_write_state")]; tensor coreml_update_state_69 = read_state(input = value_cache)[name = string("coreml_update_state_331")]; tensor var_3175_begin_0 = const()[name = string("op_3175_begin_0"), val = tensor([10, 0, 0, 0])]; tensor var_3175_end_0 = const()[name = string("op_3175_end_0"), val = tensor([11, 2, 2048, 64])]; tensor var_3175_end_mask_0 = const()[name = string("op_3175_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_3175_cast_fp16 = slice_by_index(begin = var_3175_begin_0, end = var_3175_end_0, end_mask = var_3175_end_mask_0, x = coreml_update_state_68)[name = string("op_3175_cast_fp16")]; tensor tile_20 = const()[name = string("tile_20"), val = tensor([1, 1])]; int32 var_3178_axis_0 = const()[name = string("op_3178_axis_0"), val = int32(1)]; tensor var_3178_cast_fp16_0, tensor var_3178_cast_fp16_1 = split(axis = var_3178_axis_0, split_sizes = tile_20, x = var_3175_cast_fp16)[name = string("op_3178_cast_fp16")]; tensor var_3185_begin_0 = const()[name = string("op_3185_begin_0"), val = tensor([10, 0, 0, 0])]; tensor var_3185_end_0 = const()[name = string("op_3185_end_0"), val = tensor([11, 2, 2048, 64])]; tensor var_3185_end_mask_0 = const()[name = string("op_3185_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_3185_cast_fp16 = slice_by_index(begin = var_3185_begin_0, end = var_3185_end_0, end_mask = var_3185_end_mask_0, x = coreml_update_state_69)[name = string("op_3185_cast_fp16")]; tensor tile_21 = const()[name = string("tile_21"), val = tensor([1, 1])]; int32 var_3188_axis_0 = const()[name = string("op_3188_axis_0"), val = int32(1)]; tensor var_3188_cast_fp16_0, tensor var_3188_cast_fp16_1 = split(axis = var_3188_axis_0, split_sizes = tile_21, x = var_3185_cast_fp16)[name = string("op_3188_cast_fp16")]; tensor var_3191_split_sizes_0 = const()[name = string("op_3191_split_sizes_0"), val = tensor([8, 8])]; int32 var_3191_axis_0 = const()[name = string("op_3191_axis_0"), val = int32(1)]; tensor var_3191_cast_fp16_0, tensor var_3191_cast_fp16_1 = split(axis = var_3191_axis_0, split_sizes = var_3191_split_sizes_0, x = query_states_43_cast_fp16)[name = string("op_3191_cast_fp16")]; bool attn_weights_161_transpose_x_0 = const()[name = string("attn_weights_161_transpose_x_0"), val = bool(false)]; bool attn_weights_161_transpose_y_0 = const()[name = string("attn_weights_161_transpose_y_0"), val = bool(false)]; tensor attn_weights_161_cast_fp16 = matmul(transpose_x = attn_weights_161_transpose_x_0, transpose_y = attn_weights_161_transpose_y_0, x = var_3178_cast_fp16_0, y = var_3191_cast_fp16_0)[name = string("attn_weights_161_cast_fp16")]; fp16 _inversed_attn_weights_163_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_163_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_163_cast_fp16 = mul(x = attn_weights_161_cast_fp16, y = _inversed_attn_weights_163_y_0_to_fp16)[name = string("_inversed_attn_weights_163_cast_fp16")]; tensor attn_weights_165_cast_fp16 = add(x = _inversed_attn_weights_163_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_165_cast_fp16")]; int32 var_3198 = const()[name = string("op_3198"), val = int32(2)]; tensor attn_weights_167_cast_fp16 = softmax(axis = var_3198, x = attn_weights_165_cast_fp16)[name = string("attn_weights_167_cast_fp16")]; bool var_3204_transpose_x_1 = const()[name = string("op_3204_transpose_x_1"), val = bool(true)]; bool var_3204_transpose_y_1 = const()[name = string("op_3204_transpose_y_1"), val = bool(false)]; tensor var_3204_cast_fp16 = matmul(transpose_x = var_3204_transpose_x_1, transpose_y = var_3204_transpose_y_1, x = attn_weights_167_cast_fp16, y = var_3188_cast_fp16_0)[name = string("op_3204_cast_fp16")]; bool attn_weights_169_transpose_x_0 = const()[name = string("attn_weights_169_transpose_x_0"), val = bool(false)]; bool attn_weights_169_transpose_y_0 = const()[name = string("attn_weights_169_transpose_y_0"), val = bool(false)]; tensor attn_weights_169_cast_fp16 = matmul(transpose_x = attn_weights_169_transpose_x_0, transpose_y = attn_weights_169_transpose_y_0, x = var_3178_cast_fp16_1, y = var_3191_cast_fp16_1)[name = string("attn_weights_169_cast_fp16")]; fp16 _inversed_attn_weights_171_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_171_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_171_cast_fp16 = mul(x = attn_weights_169_cast_fp16, y = _inversed_attn_weights_171_y_0_to_fp16)[name = string("_inversed_attn_weights_171_cast_fp16")]; tensor attn_weights_173_cast_fp16 = add(x = _inversed_attn_weights_171_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_173_cast_fp16")]; int32 var_3210 = const()[name = string("op_3210"), val = int32(2)]; tensor attn_weights_175_cast_fp16 = softmax(axis = var_3210, x = attn_weights_173_cast_fp16)[name = string("attn_weights_175_cast_fp16")]; bool attn_output_61_transpose_x_1 = const()[name = string("attn_output_61_transpose_x_1"), val = bool(true)]; bool attn_output_61_transpose_y_1 = const()[name = string("attn_output_61_transpose_y_1"), val = bool(false)]; tensor attn_output_61_cast_fp16 = matmul(transpose_x = attn_output_61_transpose_x_1, transpose_y = attn_output_61_transpose_y_1, x = attn_weights_175_cast_fp16, y = var_3188_cast_fp16_1)[name = string("attn_output_61_cast_fp16")]; int32 var_3218 = const()[name = string("op_3218"), val = int32(1)]; bool attn_output_63_interleave_0 = const()[name = string("attn_output_63_interleave_0"), val = bool(false)]; tensor attn_output_63_cast_fp16 = concat(axis = var_3218, interleave = attn_output_63_interleave_0, values = (var_3204_cast_fp16, attn_output_61_cast_fp16))[name = string("attn_output_63_cast_fp16")]; tensor var_3222_perm_0 = const()[name = string("op_3222_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_3227 = const()[name = string("op_3227"), val = tensor([1, 1024, 1, 32])]; tensor var_3222_cast_fp16 = transpose(perm = var_3222_perm_0, x = attn_output_63_cast_fp16)[name = string("transpose_39")]; tensor x_185_cast_fp16 = reshape(shape = var_3227, x = var_3222_cast_fp16)[name = string("x_185_cast_fp16")]; string hidden_states_63_pad_type_0 = const()[name = string("hidden_states_63_pad_type_0"), val = string("valid")]; tensor hidden_states_63_strides_0 = const()[name = string("hidden_states_63_strides_0"), val = tensor([1, 1])]; tensor hidden_states_63_pad_0 = const()[name = string("hidden_states_63_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_63_dilations_0 = const()[name = string("hidden_states_63_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_63_groups_0 = const()[name = string("hidden_states_63_groups_0"), val = int32(1)]; tensor var_3234_to_fp16 = const()[name = string("op_3234_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(309958848)))]; tensor hidden_states_63_cast_fp16 = conv(dilations = hidden_states_63_dilations_0, groups = hidden_states_63_groups_0, pad = hidden_states_63_pad_0, pad_type = hidden_states_63_pad_type_0, strides = hidden_states_63_strides_0, weight = var_3234_to_fp16, x = x_185_cast_fp16)[name = string("hidden_states_63_cast_fp16")]; tensor x_187_cast_fp16 = add(x = x_179_cast_fp16, y = hidden_states_63_cast_fp16)[name = string("x_187_cast_fp16")]; int32 var_3246 = const()[name = string("op_3246"), val = int32(1)]; fp16 const_113_promoted_to_fp16 = const()[name = string("const_113_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_3249_cast_fp16 = mul(x = x_187_cast_fp16, y = const_113_promoted_to_fp16)[name = string("op_3249_cast_fp16")]; bool x_189_interleave_0 = const()[name = string("x_189_interleave_0"), val = bool(false)]; tensor x_189_cast_fp16 = concat(axis = var_3246, interleave = x_189_interleave_0, values = (x_187_cast_fp16, var_3249_cast_fp16))[name = string("x_189_cast_fp16")]; tensor out_127_axes_0 = const()[name = string("out_127_axes_0"), val = tensor([1])]; fp16 var_3259_to_fp16 = const()[name = string("op_3259_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_127_cast_fp16 = layer_norm(axes = out_127_axes_0, epsilon = var_3259_to_fp16, x = x_189_cast_fp16)[name = string("out_127_cast_fp16")]; tensor layer_layers_10_post_attention_layernorm_weight_to_fp16 = const()[name = string("layer_layers_10_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(312056064)))]; tensor out_129_cast_fp16 = mul(x = out_127_cast_fp16, y = layer_layers_10_post_attention_layernorm_weight_to_fp16)[name = string("out_129_cast_fp16")]; tensor var_3265_split_sizes_0 = const()[name = string("op_3265_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_3265_axis_0 = const()[name = string("op_3265_axis_0"), val = int32(1)]; tensor var_3265_cast_fp16_0, tensor var_3265_cast_fp16_1 = split(axis = var_3265_axis_0, split_sizes = var_3265_split_sizes_0, x = out_129_cast_fp16)[name = string("op_3265_cast_fp16")]; string input_21_pad_type_0 = const()[name = string("input_21_pad_type_0"), val = string("valid")]; tensor input_21_strides_0 = const()[name = string("input_21_strides_0"), val = tensor([1, 1])]; tensor input_21_pad_0 = const()[name = string("input_21_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_21_dilations_0 = const()[name = string("input_21_dilations_0"), val = tensor([1, 1])]; int32 input_21_groups_0 = const()[name = string("input_21_groups_0"), val = int32(1)]; tensor var_3270_to_fp16 = const()[name = string("op_3270_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(312060224)))]; tensor input_21_cast_fp16 = conv(dilations = input_21_dilations_0, groups = input_21_groups_0, pad = input_21_pad_0, pad_type = input_21_pad_type_0, strides = input_21_strides_0, weight = var_3270_to_fp16, x = var_3265_cast_fp16_0)[name = string("input_21_cast_fp16")]; tensor var_3281_cast_fp16 = silu(x = input_21_cast_fp16)[name = string("op_3281_cast_fp16")]; string var_3286_pad_type_0 = const()[name = string("op_3286_pad_type_0"), val = string("valid")]; tensor var_3286_strides_0 = const()[name = string("op_3286_strides_0"), val = tensor([1, 1])]; tensor var_3286_pad_0 = const()[name = string("op_3286_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_3286_dilations_0 = const()[name = string("op_3286_dilations_0"), val = tensor([1, 1])]; int32 var_3286_groups_0 = const()[name = string("op_3286_groups_0"), val = int32(1)]; tensor var_3269_to_fp16 = const()[name = string("op_3269_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(320448896)))]; tensor var_3286_cast_fp16 = conv(dilations = var_3286_dilations_0, groups = var_3286_groups_0, pad = var_3286_pad_0, pad_type = var_3286_pad_type_0, strides = var_3286_strides_0, weight = var_3269_to_fp16, x = var_3265_cast_fp16_0)[name = string("op_3286_cast_fp16")]; tensor x_195_cast_fp16 = mul(x = var_3281_cast_fp16, y = var_3286_cast_fp16)[name = string("x_195_cast_fp16")]; string hidden_states_65_pad_type_0 = const()[name = string("hidden_states_65_pad_type_0"), val = string("valid")]; tensor hidden_states_65_strides_0 = const()[name = string("hidden_states_65_strides_0"), val = tensor([1, 1])]; tensor hidden_states_65_pad_0 = const()[name = string("hidden_states_65_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_65_dilations_0 = const()[name = string("hidden_states_65_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_65_groups_0 = const()[name = string("hidden_states_65_groups_0"), val = int32(1)]; tensor var_3268_to_fp16 = const()[name = string("op_3268_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(328837568)))]; tensor hidden_states_65_cast_fp16 = conv(dilations = hidden_states_65_dilations_0, groups = hidden_states_65_groups_0, pad = hidden_states_65_pad_0, pad_type = hidden_states_65_pad_type_0, strides = hidden_states_65_strides_0, weight = var_3268_to_fp16, x = x_195_cast_fp16)[name = string("hidden_states_65_cast_fp16")]; tensor x_197_cast_fp16 = add(x = x_187_cast_fp16, y = hidden_states_65_cast_fp16)[name = string("x_197_cast_fp16")]; int32 var_3299 = const()[name = string("op_3299"), val = int32(1)]; fp16 const_114_promoted_to_fp16 = const()[name = string("const_114_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_3302_cast_fp16 = mul(x = x_197_cast_fp16, y = const_114_promoted_to_fp16)[name = string("op_3302_cast_fp16")]; bool x_199_interleave_0 = const()[name = string("x_199_interleave_0"), val = bool(false)]; tensor x_199_cast_fp16 = concat(axis = var_3299, interleave = x_199_interleave_0, values = (x_197_cast_fp16, var_3302_cast_fp16))[name = string("x_199_cast_fp16")]; tensor out_133_axes_0 = const()[name = string("out_133_axes_0"), val = tensor([1])]; fp16 var_3312_to_fp16 = const()[name = string("op_3312_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_133_cast_fp16 = layer_norm(axes = out_133_axes_0, epsilon = var_3312_to_fp16, x = x_199_cast_fp16)[name = string("out_133_cast_fp16")]; tensor layer_layers_11_input_layernorm_weight_to_fp16 = const()[name = string("layer_layers_11_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(337226240)))]; tensor out_135_cast_fp16 = mul(x = out_133_cast_fp16, y = layer_layers_11_input_layernorm_weight_to_fp16)[name = string("out_135_cast_fp16")]; tensor var_3318_split_sizes_0 = const()[name = string("op_3318_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_3318_axis_0 = const()[name = string("op_3318_axis_0"), val = int32(1)]; tensor var_3318_cast_fp16_0, tensor var_3318_cast_fp16_1 = split(axis = var_3318_axis_0, split_sizes = var_3318_split_sizes_0, x = out_135_cast_fp16)[name = string("op_3318_cast_fp16")]; string query_states_45_pad_type_0 = const()[name = string("query_states_45_pad_type_0"), val = string("valid")]; tensor query_states_45_strides_0 = const()[name = string("query_states_45_strides_0"), val = tensor([1, 1])]; tensor query_states_45_pad_0 = const()[name = string("query_states_45_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_states_45_dilations_0 = const()[name = string("query_states_45_dilations_0"), val = tensor([1, 1])]; int32 query_states_45_groups_0 = const()[name = string("query_states_45_groups_0"), val = int32(1)]; tensor var_3340_to_fp16 = const()[name = string("op_3340_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(337230400)))]; tensor query_states_45_cast_fp16 = conv(dilations = query_states_45_dilations_0, groups = query_states_45_groups_0, pad = query_states_45_pad_0, pad_type = query_states_45_pad_type_0, strides = query_states_45_strides_0, weight = var_3340_to_fp16, x = var_3318_cast_fp16_0)[name = string("query_states_45_cast_fp16")]; string key_states_45_pad_type_0 = const()[name = string("key_states_45_pad_type_0"), val = string("valid")]; tensor key_states_45_strides_0 = const()[name = string("key_states_45_strides_0"), val = tensor([1, 1])]; tensor key_states_45_pad_0 = const()[name = string("key_states_45_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_states_45_dilations_0 = const()[name = string("key_states_45_dilations_0"), val = tensor([1, 1])]; int32 key_states_45_groups_0 = const()[name = string("key_states_45_groups_0"), val = int32(1)]; tensor var_3351_to_fp16 = const()[name = string("op_3351_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(339327616)))]; tensor key_states_45_cast_fp16 = conv(dilations = key_states_45_dilations_0, groups = key_states_45_groups_0, pad = key_states_45_pad_0, pad_type = key_states_45_pad_type_0, strides = key_states_45_strides_0, weight = var_3351_to_fp16, x = var_3318_cast_fp16_0)[name = string("key_states_45_cast_fp16")]; string value_states_45_pad_type_0 = const()[name = string("value_states_45_pad_type_0"), val = string("valid")]; tensor value_states_45_strides_0 = const()[name = string("value_states_45_strides_0"), val = tensor([1, 1])]; tensor value_states_45_pad_0 = const()[name = string("value_states_45_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_states_45_dilations_0 = const()[name = string("value_states_45_dilations_0"), val = tensor([1, 1])]; int32 value_states_45_groups_0 = const()[name = string("value_states_45_groups_0"), val = int32(1)]; tensor var_3362_to_fp16 = const()[name = string("op_3362_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(339589824)))]; tensor value_states_45_cast_fp16 = conv(dilations = value_states_45_dilations_0, groups = value_states_45_groups_0, pad = value_states_45_pad_0, pad_type = value_states_45_pad_type_0, strides = value_states_45_strides_0, weight = var_3362_to_fp16, x = var_3318_cast_fp16_0)[name = string("value_states_45_cast_fp16")]; tensor var_3370 = const()[name = string("op_3370"), val = tensor([1, 16, 64, 32])]; tensor embed_45_cast_fp16 = reshape(shape = var_3370, x = query_states_45_cast_fp16)[name = string("embed_45_cast_fp16")]; tensor var_3374 = const()[name = string("op_3374"), val = tensor([1, 2, 64, 32])]; tensor var_3375_cast_fp16 = reshape(shape = var_3374, x = key_states_45_cast_fp16)[name = string("op_3375_cast_fp16")]; tensor embed_47_perm_0 = const()[name = string("embed_47_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_3381 = const()[name = string("op_3381"), val = tensor([1, 2, 64, 32])]; tensor var_3382_cast_fp16 = reshape(shape = var_3381, x = value_states_45_cast_fp16)[name = string("op_3382_cast_fp16")]; tensor value_states_47_perm_0 = const()[name = string("value_states_47_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_3386_cast_fp16 = mul(x = embed_45_cast_fp16, y = cos_1_cast_fp16)[name = string("op_3386_cast_fp16")]; tensor var_3387_split_sizes_0 = const()[name = string("op_3387_split_sizes_0"), val = tensor([32, 32])]; int32 var_3387_axis_0 = const()[name = string("op_3387_axis_0"), val = int32(-2)]; tensor var_3387_cast_fp16_0, tensor var_3387_cast_fp16_1 = split(axis = var_3387_axis_0, split_sizes = var_3387_split_sizes_0, x = embed_45_cast_fp16)[name = string("op_3387_cast_fp16")]; fp16 const_117_promoted_to_fp16 = const()[name = string("const_117_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_3389_cast_fp16 = mul(x = var_3387_cast_fp16_1, y = const_117_promoted_to_fp16)[name = string("op_3389_cast_fp16")]; int32 var_3391 = const()[name = string("op_3391"), val = int32(-2)]; bool var_3392_interleave_0 = const()[name = string("op_3392_interleave_0"), val = bool(false)]; tensor var_3392_cast_fp16 = concat(axis = var_3391, interleave = var_3392_interleave_0, values = (var_3389_cast_fp16, var_3387_cast_fp16_0))[name = string("op_3392_cast_fp16")]; tensor var_3393_cast_fp16 = mul(x = var_3392_cast_fp16, y = sin_1_cast_fp16)[name = string("op_3393_cast_fp16")]; tensor query_states_47_cast_fp16 = add(x = var_3386_cast_fp16, y = var_3393_cast_fp16)[name = string("query_states_47_cast_fp16")]; tensor embed_47_cast_fp16 = transpose(perm = embed_47_perm_0, x = var_3375_cast_fp16)[name = string("transpose_38")]; tensor var_3396_cast_fp16 = mul(x = embed_47_cast_fp16, y = cos_cast_fp16)[name = string("op_3396_cast_fp16")]; tensor var_3397_split_sizes_0 = const()[name = string("op_3397_split_sizes_0"), val = tensor([32, 32])]; int32 var_3397_axis_0 = const()[name = string("op_3397_axis_0"), val = int32(-1)]; tensor var_3397_cast_fp16_0, tensor var_3397_cast_fp16_1 = split(axis = var_3397_axis_0, split_sizes = var_3397_split_sizes_0, x = embed_47_cast_fp16)[name = string("op_3397_cast_fp16")]; fp16 const_118_promoted_to_fp16 = const()[name = string("const_118_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_3399_cast_fp16 = mul(x = var_3397_cast_fp16_1, y = const_118_promoted_to_fp16)[name = string("op_3399_cast_fp16")]; int32 var_3401 = const()[name = string("op_3401"), val = int32(-1)]; bool var_3402_interleave_0 = const()[name = string("op_3402_interleave_0"), val = bool(false)]; tensor var_3402_cast_fp16 = concat(axis = var_3401, interleave = var_3402_interleave_0, values = (var_3399_cast_fp16, var_3397_cast_fp16_0))[name = string("op_3402_cast_fp16")]; tensor var_3403_cast_fp16 = mul(x = var_3402_cast_fp16, y = sin_cast_fp16)[name = string("op_3403_cast_fp16")]; tensor key_states_47_cast_fp16 = add(x = var_3396_cast_fp16, y = var_3403_cast_fp16)[name = string("key_states_47_cast_fp16")]; tensor expand_dims_111 = const()[name = string("expand_dims_111"), val = tensor([11])]; tensor expand_dims_112 = const()[name = string("expand_dims_112"), val = tensor([0])]; tensor expand_dims_114 = const()[name = string("expand_dims_114"), val = tensor([12])]; tensor concat_91_values3_0 = const()[name = string("concat_91_values3_0"), val = tensor([0])]; int32 concat_91_axis_0 = const()[name = string("concat_91_axis_0"), val = int32(0)]; bool concat_91_interleave_0 = const()[name = string("concat_91_interleave_0"), val = bool(false)]; tensor concat_91 = concat(axis = concat_91_axis_0, interleave = concat_91_interleave_0, values = (expand_dims_111, expand_dims_112, position_id, concat_91_values3_0))[name = string("concat_91")]; tensor concat_92_values1_0 = const()[name = string("concat_92_values1_0"), val = tensor([0])]; tensor concat_92_values3_0 = const()[name = string("concat_92_values3_0"), val = tensor([0])]; int32 concat_92_axis_0 = const()[name = string("concat_92_axis_0"), val = int32(0)]; bool concat_92_interleave_0 = const()[name = string("concat_92_interleave_0"), val = bool(false)]; tensor concat_92 = concat(axis = concat_92_axis_0, interleave = concat_92_interleave_0, values = (expand_dims_114, concat_92_values1_0, var_426, concat_92_values3_0))[name = string("concat_92")]; tensor key_cache_internal_tensor_assign_12_stride_0 = const()[name = string("key_cache_internal_tensor_assign_12_stride_0"), val = tensor([1, 1, 1, 1])]; tensor key_cache_internal_tensor_assign_12_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_12_begin_mask_0"), val = tensor([false, false, false, true])]; tensor key_cache_internal_tensor_assign_12_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_12_end_mask_0"), val = tensor([false, true, false, true])]; tensor key_cache_internal_tensor_assign_12_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_12_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor key_cache_internal_tensor_assign_12_cast_fp16 = slice_update(begin = concat_91, begin_mask = key_cache_internal_tensor_assign_12_begin_mask_0, end = concat_92, end_mask = key_cache_internal_tensor_assign_12_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_12_squeeze_mask_0, stride = key_cache_internal_tensor_assign_12_stride_0, update = key_states_47_cast_fp16, x = coreml_update_state_68)[name = string("key_cache_internal_tensor_assign_12_cast_fp16")]; write_state(data = key_cache_internal_tensor_assign_12_cast_fp16, input = key_cache)[name = string("coreml_update_state_332_write_state")]; tensor coreml_update_state_70 = read_state(input = key_cache)[name = string("coreml_update_state_332")]; tensor value_cache_internal_tensor_assign_12_stride_0 = const()[name = string("value_cache_internal_tensor_assign_12_stride_0"), val = tensor([1, 1, 1, 1])]; tensor value_cache_internal_tensor_assign_12_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_12_begin_mask_0"), val = tensor([false, false, false, true])]; tensor value_cache_internal_tensor_assign_12_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_12_end_mask_0"), val = tensor([false, true, false, true])]; tensor value_cache_internal_tensor_assign_12_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_12_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor value_states_47_cast_fp16 = transpose(perm = value_states_47_perm_0, x = var_3382_cast_fp16)[name = string("transpose_37")]; tensor value_cache_internal_tensor_assign_12_cast_fp16 = slice_update(begin = concat_91, begin_mask = value_cache_internal_tensor_assign_12_begin_mask_0, end = concat_92, end_mask = value_cache_internal_tensor_assign_12_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_12_squeeze_mask_0, stride = value_cache_internal_tensor_assign_12_stride_0, update = value_states_47_cast_fp16, x = coreml_update_state_69)[name = string("value_cache_internal_tensor_assign_12_cast_fp16")]; write_state(data = value_cache_internal_tensor_assign_12_cast_fp16, input = value_cache)[name = string("coreml_update_state_333_write_state")]; tensor coreml_update_state_71 = read_state(input = value_cache)[name = string("coreml_update_state_333")]; tensor var_3446_begin_0 = const()[name = string("op_3446_begin_0"), val = tensor([11, 0, 0, 0])]; tensor var_3446_end_0 = const()[name = string("op_3446_end_0"), val = tensor([12, 2, 2048, 64])]; tensor var_3446_end_mask_0 = const()[name = string("op_3446_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_3446_cast_fp16 = slice_by_index(begin = var_3446_begin_0, end = var_3446_end_0, end_mask = var_3446_end_mask_0, x = coreml_update_state_70)[name = string("op_3446_cast_fp16")]; tensor tile_22 = const()[name = string("tile_22"), val = tensor([1, 1])]; int32 var_3449_axis_0 = const()[name = string("op_3449_axis_0"), val = int32(1)]; tensor var_3449_cast_fp16_0, tensor var_3449_cast_fp16_1 = split(axis = var_3449_axis_0, split_sizes = tile_22, x = var_3446_cast_fp16)[name = string("op_3449_cast_fp16")]; tensor var_3456_begin_0 = const()[name = string("op_3456_begin_0"), val = tensor([11, 0, 0, 0])]; tensor var_3456_end_0 = const()[name = string("op_3456_end_0"), val = tensor([12, 2, 2048, 64])]; tensor var_3456_end_mask_0 = const()[name = string("op_3456_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_3456_cast_fp16 = slice_by_index(begin = var_3456_begin_0, end = var_3456_end_0, end_mask = var_3456_end_mask_0, x = coreml_update_state_71)[name = string("op_3456_cast_fp16")]; tensor tile_23 = const()[name = string("tile_23"), val = tensor([1, 1])]; int32 var_3459_axis_0 = const()[name = string("op_3459_axis_0"), val = int32(1)]; tensor var_3459_cast_fp16_0, tensor var_3459_cast_fp16_1 = split(axis = var_3459_axis_0, split_sizes = tile_23, x = var_3456_cast_fp16)[name = string("op_3459_cast_fp16")]; tensor var_3462_split_sizes_0 = const()[name = string("op_3462_split_sizes_0"), val = tensor([8, 8])]; int32 var_3462_axis_0 = const()[name = string("op_3462_axis_0"), val = int32(1)]; tensor var_3462_cast_fp16_0, tensor var_3462_cast_fp16_1 = split(axis = var_3462_axis_0, split_sizes = var_3462_split_sizes_0, x = query_states_47_cast_fp16)[name = string("op_3462_cast_fp16")]; bool attn_weights_177_transpose_x_0 = const()[name = string("attn_weights_177_transpose_x_0"), val = bool(false)]; bool attn_weights_177_transpose_y_0 = const()[name = string("attn_weights_177_transpose_y_0"), val = bool(false)]; tensor attn_weights_177_cast_fp16 = matmul(transpose_x = attn_weights_177_transpose_x_0, transpose_y = attn_weights_177_transpose_y_0, x = var_3449_cast_fp16_0, y = var_3462_cast_fp16_0)[name = string("attn_weights_177_cast_fp16")]; fp16 _inversed_attn_weights_179_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_179_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_179_cast_fp16 = mul(x = attn_weights_177_cast_fp16, y = _inversed_attn_weights_179_y_0_to_fp16)[name = string("_inversed_attn_weights_179_cast_fp16")]; tensor attn_weights_181_cast_fp16 = add(x = _inversed_attn_weights_179_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_181_cast_fp16")]; int32 var_3469 = const()[name = string("op_3469"), val = int32(2)]; tensor attn_weights_183_cast_fp16 = softmax(axis = var_3469, x = attn_weights_181_cast_fp16)[name = string("attn_weights_183_cast_fp16")]; bool var_3475_transpose_x_1 = const()[name = string("op_3475_transpose_x_1"), val = bool(true)]; bool var_3475_transpose_y_1 = const()[name = string("op_3475_transpose_y_1"), val = bool(false)]; tensor var_3475_cast_fp16 = matmul(transpose_x = var_3475_transpose_x_1, transpose_y = var_3475_transpose_y_1, x = attn_weights_183_cast_fp16, y = var_3459_cast_fp16_0)[name = string("op_3475_cast_fp16")]; bool attn_weights_185_transpose_x_0 = const()[name = string("attn_weights_185_transpose_x_0"), val = bool(false)]; bool attn_weights_185_transpose_y_0 = const()[name = string("attn_weights_185_transpose_y_0"), val = bool(false)]; tensor attn_weights_185_cast_fp16 = matmul(transpose_x = attn_weights_185_transpose_x_0, transpose_y = attn_weights_185_transpose_y_0, x = var_3449_cast_fp16_1, y = var_3462_cast_fp16_1)[name = string("attn_weights_185_cast_fp16")]; fp16 _inversed_attn_weights_187_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_187_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_187_cast_fp16 = mul(x = attn_weights_185_cast_fp16, y = _inversed_attn_weights_187_y_0_to_fp16)[name = string("_inversed_attn_weights_187_cast_fp16")]; tensor attn_weights_189_cast_fp16 = add(x = _inversed_attn_weights_187_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_189_cast_fp16")]; int32 var_3481 = const()[name = string("op_3481"), val = int32(2)]; tensor attn_weights_191_cast_fp16 = softmax(axis = var_3481, x = attn_weights_189_cast_fp16)[name = string("attn_weights_191_cast_fp16")]; bool attn_output_67_transpose_x_1 = const()[name = string("attn_output_67_transpose_x_1"), val = bool(true)]; bool attn_output_67_transpose_y_1 = const()[name = string("attn_output_67_transpose_y_1"), val = bool(false)]; tensor attn_output_67_cast_fp16 = matmul(transpose_x = attn_output_67_transpose_x_1, transpose_y = attn_output_67_transpose_y_1, x = attn_weights_191_cast_fp16, y = var_3459_cast_fp16_1)[name = string("attn_output_67_cast_fp16")]; int32 var_3489 = const()[name = string("op_3489"), val = int32(1)]; bool attn_output_69_interleave_0 = const()[name = string("attn_output_69_interleave_0"), val = bool(false)]; tensor attn_output_69_cast_fp16 = concat(axis = var_3489, interleave = attn_output_69_interleave_0, values = (var_3475_cast_fp16, attn_output_67_cast_fp16))[name = string("attn_output_69_cast_fp16")]; tensor var_3493_perm_0 = const()[name = string("op_3493_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_3498 = const()[name = string("op_3498"), val = tensor([1, 1024, 1, 32])]; tensor var_3493_cast_fp16 = transpose(perm = var_3493_perm_0, x = attn_output_69_cast_fp16)[name = string("transpose_36")]; tensor x_203_cast_fp16 = reshape(shape = var_3498, x = var_3493_cast_fp16)[name = string("x_203_cast_fp16")]; string hidden_states_69_pad_type_0 = const()[name = string("hidden_states_69_pad_type_0"), val = string("valid")]; tensor hidden_states_69_strides_0 = const()[name = string("hidden_states_69_strides_0"), val = tensor([1, 1])]; tensor hidden_states_69_pad_0 = const()[name = string("hidden_states_69_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_69_dilations_0 = const()[name = string("hidden_states_69_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_69_groups_0 = const()[name = string("hidden_states_69_groups_0"), val = int32(1)]; tensor var_3505_to_fp16 = const()[name = string("op_3505_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(339852032)))]; tensor hidden_states_69_cast_fp16 = conv(dilations = hidden_states_69_dilations_0, groups = hidden_states_69_groups_0, pad = hidden_states_69_pad_0, pad_type = hidden_states_69_pad_type_0, strides = hidden_states_69_strides_0, weight = var_3505_to_fp16, x = x_203_cast_fp16)[name = string("hidden_states_69_cast_fp16")]; tensor x_205_cast_fp16 = add(x = x_197_cast_fp16, y = hidden_states_69_cast_fp16)[name = string("x_205_cast_fp16")]; int32 var_3517 = const()[name = string("op_3517"), val = int32(1)]; fp16 const_123_promoted_to_fp16 = const()[name = string("const_123_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_3520_cast_fp16 = mul(x = x_205_cast_fp16, y = const_123_promoted_to_fp16)[name = string("op_3520_cast_fp16")]; bool x_207_interleave_0 = const()[name = string("x_207_interleave_0"), val = bool(false)]; tensor x_207_cast_fp16 = concat(axis = var_3517, interleave = x_207_interleave_0, values = (x_205_cast_fp16, var_3520_cast_fp16))[name = string("x_207_cast_fp16")]; tensor out_139_axes_0 = const()[name = string("out_139_axes_0"), val = tensor([1])]; fp16 var_3530_to_fp16 = const()[name = string("op_3530_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_139_cast_fp16 = layer_norm(axes = out_139_axes_0, epsilon = var_3530_to_fp16, x = x_207_cast_fp16)[name = string("out_139_cast_fp16")]; tensor layer_layers_11_post_attention_layernorm_weight_to_fp16 = const()[name = string("layer_layers_11_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(341949248)))]; tensor out_141_cast_fp16 = mul(x = out_139_cast_fp16, y = layer_layers_11_post_attention_layernorm_weight_to_fp16)[name = string("out_141_cast_fp16")]; tensor var_3536_split_sizes_0 = const()[name = string("op_3536_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_3536_axis_0 = const()[name = string("op_3536_axis_0"), val = int32(1)]; tensor var_3536_cast_fp16_0, tensor var_3536_cast_fp16_1 = split(axis = var_3536_axis_0, split_sizes = var_3536_split_sizes_0, x = out_141_cast_fp16)[name = string("op_3536_cast_fp16")]; string input_23_pad_type_0 = const()[name = string("input_23_pad_type_0"), val = string("valid")]; tensor input_23_strides_0 = const()[name = string("input_23_strides_0"), val = tensor([1, 1])]; tensor input_23_pad_0 = const()[name = string("input_23_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_23_dilations_0 = const()[name = string("input_23_dilations_0"), val = tensor([1, 1])]; int32 input_23_groups_0 = const()[name = string("input_23_groups_0"), val = int32(1)]; tensor var_3541_to_fp16 = const()[name = string("op_3541_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(341953408)))]; tensor input_23_cast_fp16 = conv(dilations = input_23_dilations_0, groups = input_23_groups_0, pad = input_23_pad_0, pad_type = input_23_pad_type_0, strides = input_23_strides_0, weight = var_3541_to_fp16, x = var_3536_cast_fp16_0)[name = string("input_23_cast_fp16")]; tensor var_3552_cast_fp16 = silu(x = input_23_cast_fp16)[name = string("op_3552_cast_fp16")]; string var_3557_pad_type_0 = const()[name = string("op_3557_pad_type_0"), val = string("valid")]; tensor var_3557_strides_0 = const()[name = string("op_3557_strides_0"), val = tensor([1, 1])]; tensor var_3557_pad_0 = const()[name = string("op_3557_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_3557_dilations_0 = const()[name = string("op_3557_dilations_0"), val = tensor([1, 1])]; int32 var_3557_groups_0 = const()[name = string("op_3557_groups_0"), val = int32(1)]; tensor var_3540_to_fp16 = const()[name = string("op_3540_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(350342080)))]; tensor var_3557_cast_fp16 = conv(dilations = var_3557_dilations_0, groups = var_3557_groups_0, pad = var_3557_pad_0, pad_type = var_3557_pad_type_0, strides = var_3557_strides_0, weight = var_3540_to_fp16, x = var_3536_cast_fp16_0)[name = string("op_3557_cast_fp16")]; tensor x_213_cast_fp16 = mul(x = var_3552_cast_fp16, y = var_3557_cast_fp16)[name = string("x_213_cast_fp16")]; string hidden_states_71_pad_type_0 = const()[name = string("hidden_states_71_pad_type_0"), val = string("valid")]; tensor hidden_states_71_strides_0 = const()[name = string("hidden_states_71_strides_0"), val = tensor([1, 1])]; tensor hidden_states_71_pad_0 = const()[name = string("hidden_states_71_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_71_dilations_0 = const()[name = string("hidden_states_71_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_71_groups_0 = const()[name = string("hidden_states_71_groups_0"), val = int32(1)]; tensor var_3539_to_fp16 = const()[name = string("op_3539_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(358730752)))]; tensor hidden_states_71_cast_fp16 = conv(dilations = hidden_states_71_dilations_0, groups = hidden_states_71_groups_0, pad = hidden_states_71_pad_0, pad_type = hidden_states_71_pad_type_0, strides = hidden_states_71_strides_0, weight = var_3539_to_fp16, x = x_213_cast_fp16)[name = string("hidden_states_71_cast_fp16")]; tensor x_215_cast_fp16 = add(x = x_205_cast_fp16, y = hidden_states_71_cast_fp16)[name = string("x_215_cast_fp16")]; int32 var_3570 = const()[name = string("op_3570"), val = int32(1)]; fp16 const_124_promoted_to_fp16 = const()[name = string("const_124_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_3573_cast_fp16 = mul(x = x_215_cast_fp16, y = const_124_promoted_to_fp16)[name = string("op_3573_cast_fp16")]; bool x_217_interleave_0 = const()[name = string("x_217_interleave_0"), val = bool(false)]; tensor x_217_cast_fp16 = concat(axis = var_3570, interleave = x_217_interleave_0, values = (x_215_cast_fp16, var_3573_cast_fp16))[name = string("x_217_cast_fp16")]; tensor out_145_axes_0 = const()[name = string("out_145_axes_0"), val = tensor([1])]; fp16 var_3583_to_fp16 = const()[name = string("op_3583_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_145_cast_fp16 = layer_norm(axes = out_145_axes_0, epsilon = var_3583_to_fp16, x = x_217_cast_fp16)[name = string("out_145_cast_fp16")]; tensor layer_layers_12_input_layernorm_weight_to_fp16 = const()[name = string("layer_layers_12_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(367119424)))]; tensor out_147_cast_fp16 = mul(x = out_145_cast_fp16, y = layer_layers_12_input_layernorm_weight_to_fp16)[name = string("out_147_cast_fp16")]; tensor var_3589_split_sizes_0 = const()[name = string("op_3589_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_3589_axis_0 = const()[name = string("op_3589_axis_0"), val = int32(1)]; tensor var_3589_cast_fp16_0, tensor var_3589_cast_fp16_1 = split(axis = var_3589_axis_0, split_sizes = var_3589_split_sizes_0, x = out_147_cast_fp16)[name = string("op_3589_cast_fp16")]; string query_states_49_pad_type_0 = const()[name = string("query_states_49_pad_type_0"), val = string("valid")]; tensor query_states_49_strides_0 = const()[name = string("query_states_49_strides_0"), val = tensor([1, 1])]; tensor query_states_49_pad_0 = const()[name = string("query_states_49_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_states_49_dilations_0 = const()[name = string("query_states_49_dilations_0"), val = tensor([1, 1])]; int32 query_states_49_groups_0 = const()[name = string("query_states_49_groups_0"), val = int32(1)]; tensor var_3611_to_fp16 = const()[name = string("op_3611_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(367123584)))]; tensor query_states_49_cast_fp16 = conv(dilations = query_states_49_dilations_0, groups = query_states_49_groups_0, pad = query_states_49_pad_0, pad_type = query_states_49_pad_type_0, strides = query_states_49_strides_0, weight = var_3611_to_fp16, x = var_3589_cast_fp16_0)[name = string("query_states_49_cast_fp16")]; string key_states_49_pad_type_0 = const()[name = string("key_states_49_pad_type_0"), val = string("valid")]; tensor key_states_49_strides_0 = const()[name = string("key_states_49_strides_0"), val = tensor([1, 1])]; tensor key_states_49_pad_0 = const()[name = string("key_states_49_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_states_49_dilations_0 = const()[name = string("key_states_49_dilations_0"), val = tensor([1, 1])]; int32 key_states_49_groups_0 = const()[name = string("key_states_49_groups_0"), val = int32(1)]; tensor var_3622_to_fp16 = const()[name = string("op_3622_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(369220800)))]; tensor key_states_49_cast_fp16 = conv(dilations = key_states_49_dilations_0, groups = key_states_49_groups_0, pad = key_states_49_pad_0, pad_type = key_states_49_pad_type_0, strides = key_states_49_strides_0, weight = var_3622_to_fp16, x = var_3589_cast_fp16_0)[name = string("key_states_49_cast_fp16")]; string value_states_49_pad_type_0 = const()[name = string("value_states_49_pad_type_0"), val = string("valid")]; tensor value_states_49_strides_0 = const()[name = string("value_states_49_strides_0"), val = tensor([1, 1])]; tensor value_states_49_pad_0 = const()[name = string("value_states_49_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_states_49_dilations_0 = const()[name = string("value_states_49_dilations_0"), val = tensor([1, 1])]; int32 value_states_49_groups_0 = const()[name = string("value_states_49_groups_0"), val = int32(1)]; tensor var_3633_to_fp16 = const()[name = string("op_3633_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(369483008)))]; tensor value_states_49_cast_fp16 = conv(dilations = value_states_49_dilations_0, groups = value_states_49_groups_0, pad = value_states_49_pad_0, pad_type = value_states_49_pad_type_0, strides = value_states_49_strides_0, weight = var_3633_to_fp16, x = var_3589_cast_fp16_0)[name = string("value_states_49_cast_fp16")]; tensor var_3641 = const()[name = string("op_3641"), val = tensor([1, 16, 64, 32])]; tensor embed_49_cast_fp16 = reshape(shape = var_3641, x = query_states_49_cast_fp16)[name = string("embed_49_cast_fp16")]; tensor var_3645 = const()[name = string("op_3645"), val = tensor([1, 2, 64, 32])]; tensor var_3646_cast_fp16 = reshape(shape = var_3645, x = key_states_49_cast_fp16)[name = string("op_3646_cast_fp16")]; tensor embed_51_perm_0 = const()[name = string("embed_51_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_3652 = const()[name = string("op_3652"), val = tensor([1, 2, 64, 32])]; tensor var_3653_cast_fp16 = reshape(shape = var_3652, x = value_states_49_cast_fp16)[name = string("op_3653_cast_fp16")]; tensor value_states_51_perm_0 = const()[name = string("value_states_51_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_3657_cast_fp16 = mul(x = embed_49_cast_fp16, y = cos_1_cast_fp16)[name = string("op_3657_cast_fp16")]; tensor var_3658_split_sizes_0 = const()[name = string("op_3658_split_sizes_0"), val = tensor([32, 32])]; int32 var_3658_axis_0 = const()[name = string("op_3658_axis_0"), val = int32(-2)]; tensor var_3658_cast_fp16_0, tensor var_3658_cast_fp16_1 = split(axis = var_3658_axis_0, split_sizes = var_3658_split_sizes_0, x = embed_49_cast_fp16)[name = string("op_3658_cast_fp16")]; fp16 const_127_promoted_to_fp16 = const()[name = string("const_127_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_3660_cast_fp16 = mul(x = var_3658_cast_fp16_1, y = const_127_promoted_to_fp16)[name = string("op_3660_cast_fp16")]; int32 var_3662 = const()[name = string("op_3662"), val = int32(-2)]; bool var_3663_interleave_0 = const()[name = string("op_3663_interleave_0"), val = bool(false)]; tensor var_3663_cast_fp16 = concat(axis = var_3662, interleave = var_3663_interleave_0, values = (var_3660_cast_fp16, var_3658_cast_fp16_0))[name = string("op_3663_cast_fp16")]; tensor var_3664_cast_fp16 = mul(x = var_3663_cast_fp16, y = sin_1_cast_fp16)[name = string("op_3664_cast_fp16")]; tensor query_states_51_cast_fp16 = add(x = var_3657_cast_fp16, y = var_3664_cast_fp16)[name = string("query_states_51_cast_fp16")]; tensor embed_51_cast_fp16 = transpose(perm = embed_51_perm_0, x = var_3646_cast_fp16)[name = string("transpose_35")]; tensor var_3667_cast_fp16 = mul(x = embed_51_cast_fp16, y = cos_cast_fp16)[name = string("op_3667_cast_fp16")]; tensor var_3668_split_sizes_0 = const()[name = string("op_3668_split_sizes_0"), val = tensor([32, 32])]; int32 var_3668_axis_0 = const()[name = string("op_3668_axis_0"), val = int32(-1)]; tensor var_3668_cast_fp16_0, tensor var_3668_cast_fp16_1 = split(axis = var_3668_axis_0, split_sizes = var_3668_split_sizes_0, x = embed_51_cast_fp16)[name = string("op_3668_cast_fp16")]; fp16 const_128_promoted_to_fp16 = const()[name = string("const_128_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_3670_cast_fp16 = mul(x = var_3668_cast_fp16_1, y = const_128_promoted_to_fp16)[name = string("op_3670_cast_fp16")]; int32 var_3672 = const()[name = string("op_3672"), val = int32(-1)]; bool var_3673_interleave_0 = const()[name = string("op_3673_interleave_0"), val = bool(false)]; tensor var_3673_cast_fp16 = concat(axis = var_3672, interleave = var_3673_interleave_0, values = (var_3670_cast_fp16, var_3668_cast_fp16_0))[name = string("op_3673_cast_fp16")]; tensor var_3674_cast_fp16 = mul(x = var_3673_cast_fp16, y = sin_cast_fp16)[name = string("op_3674_cast_fp16")]; tensor key_states_51_cast_fp16 = add(x = var_3667_cast_fp16, y = var_3674_cast_fp16)[name = string("key_states_51_cast_fp16")]; tensor expand_dims_121 = const()[name = string("expand_dims_121"), val = tensor([12])]; tensor expand_dims_122 = const()[name = string("expand_dims_122"), val = tensor([0])]; tensor expand_dims_124 = const()[name = string("expand_dims_124"), val = tensor([13])]; tensor concat_99_values3_0 = const()[name = string("concat_99_values3_0"), val = tensor([0])]; int32 concat_99_axis_0 = const()[name = string("concat_99_axis_0"), val = int32(0)]; bool concat_99_interleave_0 = const()[name = string("concat_99_interleave_0"), val = bool(false)]; tensor concat_99 = concat(axis = concat_99_axis_0, interleave = concat_99_interleave_0, values = (expand_dims_121, expand_dims_122, position_id, concat_99_values3_0))[name = string("concat_99")]; tensor concat_100_values1_0 = const()[name = string("concat_100_values1_0"), val = tensor([0])]; tensor concat_100_values3_0 = const()[name = string("concat_100_values3_0"), val = tensor([0])]; int32 concat_100_axis_0 = const()[name = string("concat_100_axis_0"), val = int32(0)]; bool concat_100_interleave_0 = const()[name = string("concat_100_interleave_0"), val = bool(false)]; tensor concat_100 = concat(axis = concat_100_axis_0, interleave = concat_100_interleave_0, values = (expand_dims_124, concat_100_values1_0, var_426, concat_100_values3_0))[name = string("concat_100")]; tensor key_cache_internal_tensor_assign_13_stride_0 = const()[name = string("key_cache_internal_tensor_assign_13_stride_0"), val = tensor([1, 1, 1, 1])]; tensor key_cache_internal_tensor_assign_13_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_13_begin_mask_0"), val = tensor([false, false, false, true])]; tensor key_cache_internal_tensor_assign_13_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_13_end_mask_0"), val = tensor([false, true, false, true])]; tensor key_cache_internal_tensor_assign_13_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_13_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor key_cache_internal_tensor_assign_13_cast_fp16 = slice_update(begin = concat_99, begin_mask = key_cache_internal_tensor_assign_13_begin_mask_0, end = concat_100, end_mask = key_cache_internal_tensor_assign_13_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_13_squeeze_mask_0, stride = key_cache_internal_tensor_assign_13_stride_0, update = key_states_51_cast_fp16, x = coreml_update_state_70)[name = string("key_cache_internal_tensor_assign_13_cast_fp16")]; write_state(data = key_cache_internal_tensor_assign_13_cast_fp16, input = key_cache)[name = string("coreml_update_state_334_write_state")]; tensor coreml_update_state_72 = read_state(input = key_cache)[name = string("coreml_update_state_334")]; tensor value_cache_internal_tensor_assign_13_stride_0 = const()[name = string("value_cache_internal_tensor_assign_13_stride_0"), val = tensor([1, 1, 1, 1])]; tensor value_cache_internal_tensor_assign_13_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_13_begin_mask_0"), val = tensor([false, false, false, true])]; tensor value_cache_internal_tensor_assign_13_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_13_end_mask_0"), val = tensor([false, true, false, true])]; tensor value_cache_internal_tensor_assign_13_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_13_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor value_states_51_cast_fp16 = transpose(perm = value_states_51_perm_0, x = var_3653_cast_fp16)[name = string("transpose_34")]; tensor value_cache_internal_tensor_assign_13_cast_fp16 = slice_update(begin = concat_99, begin_mask = value_cache_internal_tensor_assign_13_begin_mask_0, end = concat_100, end_mask = value_cache_internal_tensor_assign_13_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_13_squeeze_mask_0, stride = value_cache_internal_tensor_assign_13_stride_0, update = value_states_51_cast_fp16, x = coreml_update_state_71)[name = string("value_cache_internal_tensor_assign_13_cast_fp16")]; write_state(data = value_cache_internal_tensor_assign_13_cast_fp16, input = value_cache)[name = string("coreml_update_state_335_write_state")]; tensor coreml_update_state_73 = read_state(input = value_cache)[name = string("coreml_update_state_335")]; tensor var_3717_begin_0 = const()[name = string("op_3717_begin_0"), val = tensor([12, 0, 0, 0])]; tensor var_3717_end_0 = const()[name = string("op_3717_end_0"), val = tensor([13, 2, 2048, 64])]; tensor var_3717_end_mask_0 = const()[name = string("op_3717_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_3717_cast_fp16 = slice_by_index(begin = var_3717_begin_0, end = var_3717_end_0, end_mask = var_3717_end_mask_0, x = coreml_update_state_72)[name = string("op_3717_cast_fp16")]; tensor tile_24 = const()[name = string("tile_24"), val = tensor([1, 1])]; int32 var_3720_axis_0 = const()[name = string("op_3720_axis_0"), val = int32(1)]; tensor var_3720_cast_fp16_0, tensor var_3720_cast_fp16_1 = split(axis = var_3720_axis_0, split_sizes = tile_24, x = var_3717_cast_fp16)[name = string("op_3720_cast_fp16")]; tensor var_3727_begin_0 = const()[name = string("op_3727_begin_0"), val = tensor([12, 0, 0, 0])]; tensor var_3727_end_0 = const()[name = string("op_3727_end_0"), val = tensor([13, 2, 2048, 64])]; tensor var_3727_end_mask_0 = const()[name = string("op_3727_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_3727_cast_fp16 = slice_by_index(begin = var_3727_begin_0, end = var_3727_end_0, end_mask = var_3727_end_mask_0, x = coreml_update_state_73)[name = string("op_3727_cast_fp16")]; tensor tile_25 = const()[name = string("tile_25"), val = tensor([1, 1])]; int32 var_3730_axis_0 = const()[name = string("op_3730_axis_0"), val = int32(1)]; tensor var_3730_cast_fp16_0, tensor var_3730_cast_fp16_1 = split(axis = var_3730_axis_0, split_sizes = tile_25, x = var_3727_cast_fp16)[name = string("op_3730_cast_fp16")]; tensor var_3733_split_sizes_0 = const()[name = string("op_3733_split_sizes_0"), val = tensor([8, 8])]; int32 var_3733_axis_0 = const()[name = string("op_3733_axis_0"), val = int32(1)]; tensor var_3733_cast_fp16_0, tensor var_3733_cast_fp16_1 = split(axis = var_3733_axis_0, split_sizes = var_3733_split_sizes_0, x = query_states_51_cast_fp16)[name = string("op_3733_cast_fp16")]; bool attn_weights_193_transpose_x_0 = const()[name = string("attn_weights_193_transpose_x_0"), val = bool(false)]; bool attn_weights_193_transpose_y_0 = const()[name = string("attn_weights_193_transpose_y_0"), val = bool(false)]; tensor attn_weights_193_cast_fp16 = matmul(transpose_x = attn_weights_193_transpose_x_0, transpose_y = attn_weights_193_transpose_y_0, x = var_3720_cast_fp16_0, y = var_3733_cast_fp16_0)[name = string("attn_weights_193_cast_fp16")]; fp16 _inversed_attn_weights_195_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_195_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_195_cast_fp16 = mul(x = attn_weights_193_cast_fp16, y = _inversed_attn_weights_195_y_0_to_fp16)[name = string("_inversed_attn_weights_195_cast_fp16")]; tensor attn_weights_197_cast_fp16 = add(x = _inversed_attn_weights_195_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_197_cast_fp16")]; int32 var_3740 = const()[name = string("op_3740"), val = int32(2)]; tensor attn_weights_199_cast_fp16 = softmax(axis = var_3740, x = attn_weights_197_cast_fp16)[name = string("attn_weights_199_cast_fp16")]; bool var_3746_transpose_x_1 = const()[name = string("op_3746_transpose_x_1"), val = bool(true)]; bool var_3746_transpose_y_1 = const()[name = string("op_3746_transpose_y_1"), val = bool(false)]; tensor var_3746_cast_fp16 = matmul(transpose_x = var_3746_transpose_x_1, transpose_y = var_3746_transpose_y_1, x = attn_weights_199_cast_fp16, y = var_3730_cast_fp16_0)[name = string("op_3746_cast_fp16")]; bool attn_weights_201_transpose_x_0 = const()[name = string("attn_weights_201_transpose_x_0"), val = bool(false)]; bool attn_weights_201_transpose_y_0 = const()[name = string("attn_weights_201_transpose_y_0"), val = bool(false)]; tensor attn_weights_201_cast_fp16 = matmul(transpose_x = attn_weights_201_transpose_x_0, transpose_y = attn_weights_201_transpose_y_0, x = var_3720_cast_fp16_1, y = var_3733_cast_fp16_1)[name = string("attn_weights_201_cast_fp16")]; fp16 _inversed_attn_weights_203_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_203_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_203_cast_fp16 = mul(x = attn_weights_201_cast_fp16, y = _inversed_attn_weights_203_y_0_to_fp16)[name = string("_inversed_attn_weights_203_cast_fp16")]; tensor attn_weights_205_cast_fp16 = add(x = _inversed_attn_weights_203_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_205_cast_fp16")]; int32 var_3752 = const()[name = string("op_3752"), val = int32(2)]; tensor attn_weights_207_cast_fp16 = softmax(axis = var_3752, x = attn_weights_205_cast_fp16)[name = string("attn_weights_207_cast_fp16")]; bool attn_output_73_transpose_x_1 = const()[name = string("attn_output_73_transpose_x_1"), val = bool(true)]; bool attn_output_73_transpose_y_1 = const()[name = string("attn_output_73_transpose_y_1"), val = bool(false)]; tensor attn_output_73_cast_fp16 = matmul(transpose_x = attn_output_73_transpose_x_1, transpose_y = attn_output_73_transpose_y_1, x = attn_weights_207_cast_fp16, y = var_3730_cast_fp16_1)[name = string("attn_output_73_cast_fp16")]; int32 var_3760 = const()[name = string("op_3760"), val = int32(1)]; bool attn_output_75_interleave_0 = const()[name = string("attn_output_75_interleave_0"), val = bool(false)]; tensor attn_output_75_cast_fp16 = concat(axis = var_3760, interleave = attn_output_75_interleave_0, values = (var_3746_cast_fp16, attn_output_73_cast_fp16))[name = string("attn_output_75_cast_fp16")]; tensor var_3764_perm_0 = const()[name = string("op_3764_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_3769 = const()[name = string("op_3769"), val = tensor([1, 1024, 1, 32])]; tensor var_3764_cast_fp16 = transpose(perm = var_3764_perm_0, x = attn_output_75_cast_fp16)[name = string("transpose_33")]; tensor x_221_cast_fp16 = reshape(shape = var_3769, x = var_3764_cast_fp16)[name = string("x_221_cast_fp16")]; string hidden_states_75_pad_type_0 = const()[name = string("hidden_states_75_pad_type_0"), val = string("valid")]; tensor hidden_states_75_strides_0 = const()[name = string("hidden_states_75_strides_0"), val = tensor([1, 1])]; tensor hidden_states_75_pad_0 = const()[name = string("hidden_states_75_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_75_dilations_0 = const()[name = string("hidden_states_75_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_75_groups_0 = const()[name = string("hidden_states_75_groups_0"), val = int32(1)]; tensor var_3776_to_fp16 = const()[name = string("op_3776_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(369745216)))]; tensor hidden_states_75_cast_fp16 = conv(dilations = hidden_states_75_dilations_0, groups = hidden_states_75_groups_0, pad = hidden_states_75_pad_0, pad_type = hidden_states_75_pad_type_0, strides = hidden_states_75_strides_0, weight = var_3776_to_fp16, x = x_221_cast_fp16)[name = string("hidden_states_75_cast_fp16")]; tensor x_223_cast_fp16 = add(x = x_215_cast_fp16, y = hidden_states_75_cast_fp16)[name = string("x_223_cast_fp16")]; int32 var_3788 = const()[name = string("op_3788"), val = int32(1)]; fp16 const_133_promoted_to_fp16 = const()[name = string("const_133_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_3791_cast_fp16 = mul(x = x_223_cast_fp16, y = const_133_promoted_to_fp16)[name = string("op_3791_cast_fp16")]; bool x_225_interleave_0 = const()[name = string("x_225_interleave_0"), val = bool(false)]; tensor x_225_cast_fp16 = concat(axis = var_3788, interleave = x_225_interleave_0, values = (x_223_cast_fp16, var_3791_cast_fp16))[name = string("x_225_cast_fp16")]; tensor out_151_axes_0 = const()[name = string("out_151_axes_0"), val = tensor([1])]; fp16 var_3801_to_fp16 = const()[name = string("op_3801_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_151_cast_fp16 = layer_norm(axes = out_151_axes_0, epsilon = var_3801_to_fp16, x = x_225_cast_fp16)[name = string("out_151_cast_fp16")]; tensor layer_layers_12_post_attention_layernorm_weight_to_fp16 = const()[name = string("layer_layers_12_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(371842432)))]; tensor out_153_cast_fp16 = mul(x = out_151_cast_fp16, y = layer_layers_12_post_attention_layernorm_weight_to_fp16)[name = string("out_153_cast_fp16")]; tensor var_3807_split_sizes_0 = const()[name = string("op_3807_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_3807_axis_0 = const()[name = string("op_3807_axis_0"), val = int32(1)]; tensor var_3807_cast_fp16_0, tensor var_3807_cast_fp16_1 = split(axis = var_3807_axis_0, split_sizes = var_3807_split_sizes_0, x = out_153_cast_fp16)[name = string("op_3807_cast_fp16")]; string input_25_pad_type_0 = const()[name = string("input_25_pad_type_0"), val = string("valid")]; tensor input_25_strides_0 = const()[name = string("input_25_strides_0"), val = tensor([1, 1])]; tensor input_25_pad_0 = const()[name = string("input_25_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_25_dilations_0 = const()[name = string("input_25_dilations_0"), val = tensor([1, 1])]; int32 input_25_groups_0 = const()[name = string("input_25_groups_0"), val = int32(1)]; tensor var_3812_to_fp16 = const()[name = string("op_3812_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(371846592)))]; tensor input_25_cast_fp16 = conv(dilations = input_25_dilations_0, groups = input_25_groups_0, pad = input_25_pad_0, pad_type = input_25_pad_type_0, strides = input_25_strides_0, weight = var_3812_to_fp16, x = var_3807_cast_fp16_0)[name = string("input_25_cast_fp16")]; tensor var_3823_cast_fp16 = silu(x = input_25_cast_fp16)[name = string("op_3823_cast_fp16")]; string var_3828_pad_type_0 = const()[name = string("op_3828_pad_type_0"), val = string("valid")]; tensor var_3828_strides_0 = const()[name = string("op_3828_strides_0"), val = tensor([1, 1])]; tensor var_3828_pad_0 = const()[name = string("op_3828_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_3828_dilations_0 = const()[name = string("op_3828_dilations_0"), val = tensor([1, 1])]; int32 var_3828_groups_0 = const()[name = string("op_3828_groups_0"), val = int32(1)]; tensor var_3811_to_fp16 = const()[name = string("op_3811_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(380235264)))]; tensor var_3828_cast_fp16 = conv(dilations = var_3828_dilations_0, groups = var_3828_groups_0, pad = var_3828_pad_0, pad_type = var_3828_pad_type_0, strides = var_3828_strides_0, weight = var_3811_to_fp16, x = var_3807_cast_fp16_0)[name = string("op_3828_cast_fp16")]; tensor x_231_cast_fp16 = mul(x = var_3823_cast_fp16, y = var_3828_cast_fp16)[name = string("x_231_cast_fp16")]; string hidden_states_77_pad_type_0 = const()[name = string("hidden_states_77_pad_type_0"), val = string("valid")]; tensor hidden_states_77_strides_0 = const()[name = string("hidden_states_77_strides_0"), val = tensor([1, 1])]; tensor hidden_states_77_pad_0 = const()[name = string("hidden_states_77_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_77_dilations_0 = const()[name = string("hidden_states_77_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_77_groups_0 = const()[name = string("hidden_states_77_groups_0"), val = int32(1)]; tensor var_3810_to_fp16 = const()[name = string("op_3810_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(388623936)))]; tensor hidden_states_77_cast_fp16 = conv(dilations = hidden_states_77_dilations_0, groups = hidden_states_77_groups_0, pad = hidden_states_77_pad_0, pad_type = hidden_states_77_pad_type_0, strides = hidden_states_77_strides_0, weight = var_3810_to_fp16, x = x_231_cast_fp16)[name = string("hidden_states_77_cast_fp16")]; tensor x_233_cast_fp16 = add(x = x_223_cast_fp16, y = hidden_states_77_cast_fp16)[name = string("x_233_cast_fp16")]; int32 var_3841 = const()[name = string("op_3841"), val = int32(1)]; fp16 const_134_promoted_to_fp16 = const()[name = string("const_134_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_3844_cast_fp16 = mul(x = x_233_cast_fp16, y = const_134_promoted_to_fp16)[name = string("op_3844_cast_fp16")]; bool x_235_interleave_0 = const()[name = string("x_235_interleave_0"), val = bool(false)]; tensor x_235_cast_fp16 = concat(axis = var_3841, interleave = x_235_interleave_0, values = (x_233_cast_fp16, var_3844_cast_fp16))[name = string("x_235_cast_fp16")]; tensor out_157_axes_0 = const()[name = string("out_157_axes_0"), val = tensor([1])]; fp16 var_3854_to_fp16 = const()[name = string("op_3854_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_157_cast_fp16 = layer_norm(axes = out_157_axes_0, epsilon = var_3854_to_fp16, x = x_235_cast_fp16)[name = string("out_157_cast_fp16")]; tensor layer_layers_13_input_layernorm_weight_to_fp16 = const()[name = string("layer_layers_13_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(397012608)))]; tensor out_159_cast_fp16 = mul(x = out_157_cast_fp16, y = layer_layers_13_input_layernorm_weight_to_fp16)[name = string("out_159_cast_fp16")]; tensor var_3860_split_sizes_0 = const()[name = string("op_3860_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_3860_axis_0 = const()[name = string("op_3860_axis_0"), val = int32(1)]; tensor var_3860_cast_fp16_0, tensor var_3860_cast_fp16_1 = split(axis = var_3860_axis_0, split_sizes = var_3860_split_sizes_0, x = out_159_cast_fp16)[name = string("op_3860_cast_fp16")]; string query_states_53_pad_type_0 = const()[name = string("query_states_53_pad_type_0"), val = string("valid")]; tensor query_states_53_strides_0 = const()[name = string("query_states_53_strides_0"), val = tensor([1, 1])]; tensor query_states_53_pad_0 = const()[name = string("query_states_53_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_states_53_dilations_0 = const()[name = string("query_states_53_dilations_0"), val = tensor([1, 1])]; int32 query_states_53_groups_0 = const()[name = string("query_states_53_groups_0"), val = int32(1)]; tensor var_3882_to_fp16 = const()[name = string("op_3882_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(397016768)))]; tensor query_states_53_cast_fp16 = conv(dilations = query_states_53_dilations_0, groups = query_states_53_groups_0, pad = query_states_53_pad_0, pad_type = query_states_53_pad_type_0, strides = query_states_53_strides_0, weight = var_3882_to_fp16, x = var_3860_cast_fp16_0)[name = string("query_states_53_cast_fp16")]; string key_states_53_pad_type_0 = const()[name = string("key_states_53_pad_type_0"), val = string("valid")]; tensor key_states_53_strides_0 = const()[name = string("key_states_53_strides_0"), val = tensor([1, 1])]; tensor key_states_53_pad_0 = const()[name = string("key_states_53_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_states_53_dilations_0 = const()[name = string("key_states_53_dilations_0"), val = tensor([1, 1])]; int32 key_states_53_groups_0 = const()[name = string("key_states_53_groups_0"), val = int32(1)]; tensor var_3893_to_fp16 = const()[name = string("op_3893_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(399113984)))]; tensor key_states_53_cast_fp16 = conv(dilations = key_states_53_dilations_0, groups = key_states_53_groups_0, pad = key_states_53_pad_0, pad_type = key_states_53_pad_type_0, strides = key_states_53_strides_0, weight = var_3893_to_fp16, x = var_3860_cast_fp16_0)[name = string("key_states_53_cast_fp16")]; string value_states_53_pad_type_0 = const()[name = string("value_states_53_pad_type_0"), val = string("valid")]; tensor value_states_53_strides_0 = const()[name = string("value_states_53_strides_0"), val = tensor([1, 1])]; tensor value_states_53_pad_0 = const()[name = string("value_states_53_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_states_53_dilations_0 = const()[name = string("value_states_53_dilations_0"), val = tensor([1, 1])]; int32 value_states_53_groups_0 = const()[name = string("value_states_53_groups_0"), val = int32(1)]; tensor var_3904_to_fp16 = const()[name = string("op_3904_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(399376192)))]; tensor value_states_53_cast_fp16 = conv(dilations = value_states_53_dilations_0, groups = value_states_53_groups_0, pad = value_states_53_pad_0, pad_type = value_states_53_pad_type_0, strides = value_states_53_strides_0, weight = var_3904_to_fp16, x = var_3860_cast_fp16_0)[name = string("value_states_53_cast_fp16")]; tensor var_3912 = const()[name = string("op_3912"), val = tensor([1, 16, 64, 32])]; tensor embed_53_cast_fp16 = reshape(shape = var_3912, x = query_states_53_cast_fp16)[name = string("embed_53_cast_fp16")]; tensor var_3916 = const()[name = string("op_3916"), val = tensor([1, 2, 64, 32])]; tensor var_3917_cast_fp16 = reshape(shape = var_3916, x = key_states_53_cast_fp16)[name = string("op_3917_cast_fp16")]; tensor embed_55_perm_0 = const()[name = string("embed_55_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_3923 = const()[name = string("op_3923"), val = tensor([1, 2, 64, 32])]; tensor var_3924_cast_fp16 = reshape(shape = var_3923, x = value_states_53_cast_fp16)[name = string("op_3924_cast_fp16")]; tensor value_states_55_perm_0 = const()[name = string("value_states_55_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_3928_cast_fp16 = mul(x = embed_53_cast_fp16, y = cos_1_cast_fp16)[name = string("op_3928_cast_fp16")]; tensor var_3929_split_sizes_0 = const()[name = string("op_3929_split_sizes_0"), val = tensor([32, 32])]; int32 var_3929_axis_0 = const()[name = string("op_3929_axis_0"), val = int32(-2)]; tensor var_3929_cast_fp16_0, tensor var_3929_cast_fp16_1 = split(axis = var_3929_axis_0, split_sizes = var_3929_split_sizes_0, x = embed_53_cast_fp16)[name = string("op_3929_cast_fp16")]; fp16 const_137_promoted_to_fp16 = const()[name = string("const_137_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_3931_cast_fp16 = mul(x = var_3929_cast_fp16_1, y = const_137_promoted_to_fp16)[name = string("op_3931_cast_fp16")]; int32 var_3933 = const()[name = string("op_3933"), val = int32(-2)]; bool var_3934_interleave_0 = const()[name = string("op_3934_interleave_0"), val = bool(false)]; tensor var_3934_cast_fp16 = concat(axis = var_3933, interleave = var_3934_interleave_0, values = (var_3931_cast_fp16, var_3929_cast_fp16_0))[name = string("op_3934_cast_fp16")]; tensor var_3935_cast_fp16 = mul(x = var_3934_cast_fp16, y = sin_1_cast_fp16)[name = string("op_3935_cast_fp16")]; tensor query_states_55_cast_fp16 = add(x = var_3928_cast_fp16, y = var_3935_cast_fp16)[name = string("query_states_55_cast_fp16")]; tensor embed_55_cast_fp16 = transpose(perm = embed_55_perm_0, x = var_3917_cast_fp16)[name = string("transpose_32")]; tensor var_3938_cast_fp16 = mul(x = embed_55_cast_fp16, y = cos_cast_fp16)[name = string("op_3938_cast_fp16")]; tensor var_3939_split_sizes_0 = const()[name = string("op_3939_split_sizes_0"), val = tensor([32, 32])]; int32 var_3939_axis_0 = const()[name = string("op_3939_axis_0"), val = int32(-1)]; tensor var_3939_cast_fp16_0, tensor var_3939_cast_fp16_1 = split(axis = var_3939_axis_0, split_sizes = var_3939_split_sizes_0, x = embed_55_cast_fp16)[name = string("op_3939_cast_fp16")]; fp16 const_138_promoted_to_fp16 = const()[name = string("const_138_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_3941_cast_fp16 = mul(x = var_3939_cast_fp16_1, y = const_138_promoted_to_fp16)[name = string("op_3941_cast_fp16")]; int32 var_3943 = const()[name = string("op_3943"), val = int32(-1)]; bool var_3944_interleave_0 = const()[name = string("op_3944_interleave_0"), val = bool(false)]; tensor var_3944_cast_fp16 = concat(axis = var_3943, interleave = var_3944_interleave_0, values = (var_3941_cast_fp16, var_3939_cast_fp16_0))[name = string("op_3944_cast_fp16")]; tensor var_3945_cast_fp16 = mul(x = var_3944_cast_fp16, y = sin_cast_fp16)[name = string("op_3945_cast_fp16")]; tensor key_states_55_cast_fp16 = add(x = var_3938_cast_fp16, y = var_3945_cast_fp16)[name = string("key_states_55_cast_fp16")]; tensor expand_dims_131 = const()[name = string("expand_dims_131"), val = tensor([13])]; tensor expand_dims_132 = const()[name = string("expand_dims_132"), val = tensor([0])]; tensor expand_dims_134 = const()[name = string("expand_dims_134"), val = tensor([14])]; tensor concat_107_values3_0 = const()[name = string("concat_107_values3_0"), val = tensor([0])]; int32 concat_107_axis_0 = const()[name = string("concat_107_axis_0"), val = int32(0)]; bool concat_107_interleave_0 = const()[name = string("concat_107_interleave_0"), val = bool(false)]; tensor concat_107 = concat(axis = concat_107_axis_0, interleave = concat_107_interleave_0, values = (expand_dims_131, expand_dims_132, position_id, concat_107_values3_0))[name = string("concat_107")]; tensor concat_108_values1_0 = const()[name = string("concat_108_values1_0"), val = tensor([0])]; tensor concat_108_values3_0 = const()[name = string("concat_108_values3_0"), val = tensor([0])]; int32 concat_108_axis_0 = const()[name = string("concat_108_axis_0"), val = int32(0)]; bool concat_108_interleave_0 = const()[name = string("concat_108_interleave_0"), val = bool(false)]; tensor concat_108 = concat(axis = concat_108_axis_0, interleave = concat_108_interleave_0, values = (expand_dims_134, concat_108_values1_0, var_426, concat_108_values3_0))[name = string("concat_108")]; tensor key_cache_internal_tensor_assign_14_stride_0 = const()[name = string("key_cache_internal_tensor_assign_14_stride_0"), val = tensor([1, 1, 1, 1])]; tensor key_cache_internal_tensor_assign_14_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_14_begin_mask_0"), val = tensor([false, false, false, true])]; tensor key_cache_internal_tensor_assign_14_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_14_end_mask_0"), val = tensor([false, true, false, true])]; tensor key_cache_internal_tensor_assign_14_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_14_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor key_cache_internal_tensor_assign_14_cast_fp16 = slice_update(begin = concat_107, begin_mask = key_cache_internal_tensor_assign_14_begin_mask_0, end = concat_108, end_mask = key_cache_internal_tensor_assign_14_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_14_squeeze_mask_0, stride = key_cache_internal_tensor_assign_14_stride_0, update = key_states_55_cast_fp16, x = coreml_update_state_72)[name = string("key_cache_internal_tensor_assign_14_cast_fp16")]; write_state(data = key_cache_internal_tensor_assign_14_cast_fp16, input = key_cache)[name = string("coreml_update_state_336_write_state")]; tensor coreml_update_state_74 = read_state(input = key_cache)[name = string("coreml_update_state_336")]; tensor value_cache_internal_tensor_assign_14_stride_0 = const()[name = string("value_cache_internal_tensor_assign_14_stride_0"), val = tensor([1, 1, 1, 1])]; tensor value_cache_internal_tensor_assign_14_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_14_begin_mask_0"), val = tensor([false, false, false, true])]; tensor value_cache_internal_tensor_assign_14_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_14_end_mask_0"), val = tensor([false, true, false, true])]; tensor value_cache_internal_tensor_assign_14_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_14_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor value_states_55_cast_fp16 = transpose(perm = value_states_55_perm_0, x = var_3924_cast_fp16)[name = string("transpose_31")]; tensor value_cache_internal_tensor_assign_14_cast_fp16 = slice_update(begin = concat_107, begin_mask = value_cache_internal_tensor_assign_14_begin_mask_0, end = concat_108, end_mask = value_cache_internal_tensor_assign_14_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_14_squeeze_mask_0, stride = value_cache_internal_tensor_assign_14_stride_0, update = value_states_55_cast_fp16, x = coreml_update_state_73)[name = string("value_cache_internal_tensor_assign_14_cast_fp16")]; write_state(data = value_cache_internal_tensor_assign_14_cast_fp16, input = value_cache)[name = string("coreml_update_state_337_write_state")]; tensor coreml_update_state_75 = read_state(input = value_cache)[name = string("coreml_update_state_337")]; tensor var_3988_begin_0 = const()[name = string("op_3988_begin_0"), val = tensor([13, 0, 0, 0])]; tensor var_3988_end_0 = const()[name = string("op_3988_end_0"), val = tensor([14, 2, 2048, 64])]; tensor var_3988_end_mask_0 = const()[name = string("op_3988_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_3988_cast_fp16 = slice_by_index(begin = var_3988_begin_0, end = var_3988_end_0, end_mask = var_3988_end_mask_0, x = coreml_update_state_74)[name = string("op_3988_cast_fp16")]; tensor tile_26 = const()[name = string("tile_26"), val = tensor([1, 1])]; int32 var_3991_axis_0 = const()[name = string("op_3991_axis_0"), val = int32(1)]; tensor var_3991_cast_fp16_0, tensor var_3991_cast_fp16_1 = split(axis = var_3991_axis_0, split_sizes = tile_26, x = var_3988_cast_fp16)[name = string("op_3991_cast_fp16")]; tensor var_3998_begin_0 = const()[name = string("op_3998_begin_0"), val = tensor([13, 0, 0, 0])]; tensor var_3998_end_0 = const()[name = string("op_3998_end_0"), val = tensor([14, 2, 2048, 64])]; tensor var_3998_end_mask_0 = const()[name = string("op_3998_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_3998_cast_fp16 = slice_by_index(begin = var_3998_begin_0, end = var_3998_end_0, end_mask = var_3998_end_mask_0, x = coreml_update_state_75)[name = string("op_3998_cast_fp16")]; tensor tile_27 = const()[name = string("tile_27"), val = tensor([1, 1])]; int32 var_4001_axis_0 = const()[name = string("op_4001_axis_0"), val = int32(1)]; tensor var_4001_cast_fp16_0, tensor var_4001_cast_fp16_1 = split(axis = var_4001_axis_0, split_sizes = tile_27, x = var_3998_cast_fp16)[name = string("op_4001_cast_fp16")]; tensor var_4004_split_sizes_0 = const()[name = string("op_4004_split_sizes_0"), val = tensor([8, 8])]; int32 var_4004_axis_0 = const()[name = string("op_4004_axis_0"), val = int32(1)]; tensor var_4004_cast_fp16_0, tensor var_4004_cast_fp16_1 = split(axis = var_4004_axis_0, split_sizes = var_4004_split_sizes_0, x = query_states_55_cast_fp16)[name = string("op_4004_cast_fp16")]; bool attn_weights_209_transpose_x_0 = const()[name = string("attn_weights_209_transpose_x_0"), val = bool(false)]; bool attn_weights_209_transpose_y_0 = const()[name = string("attn_weights_209_transpose_y_0"), val = bool(false)]; tensor attn_weights_209_cast_fp16 = matmul(transpose_x = attn_weights_209_transpose_x_0, transpose_y = attn_weights_209_transpose_y_0, x = var_3991_cast_fp16_0, y = var_4004_cast_fp16_0)[name = string("attn_weights_209_cast_fp16")]; fp16 _inversed_attn_weights_211_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_211_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_211_cast_fp16 = mul(x = attn_weights_209_cast_fp16, y = _inversed_attn_weights_211_y_0_to_fp16)[name = string("_inversed_attn_weights_211_cast_fp16")]; tensor attn_weights_213_cast_fp16 = add(x = _inversed_attn_weights_211_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_213_cast_fp16")]; int32 var_4011 = const()[name = string("op_4011"), val = int32(2)]; tensor attn_weights_215_cast_fp16 = softmax(axis = var_4011, x = attn_weights_213_cast_fp16)[name = string("attn_weights_215_cast_fp16")]; bool var_4017_transpose_x_1 = const()[name = string("op_4017_transpose_x_1"), val = bool(true)]; bool var_4017_transpose_y_1 = const()[name = string("op_4017_transpose_y_1"), val = bool(false)]; tensor var_4017_cast_fp16 = matmul(transpose_x = var_4017_transpose_x_1, transpose_y = var_4017_transpose_y_1, x = attn_weights_215_cast_fp16, y = var_4001_cast_fp16_0)[name = string("op_4017_cast_fp16")]; bool attn_weights_217_transpose_x_0 = const()[name = string("attn_weights_217_transpose_x_0"), val = bool(false)]; bool attn_weights_217_transpose_y_0 = const()[name = string("attn_weights_217_transpose_y_0"), val = bool(false)]; tensor attn_weights_217_cast_fp16 = matmul(transpose_x = attn_weights_217_transpose_x_0, transpose_y = attn_weights_217_transpose_y_0, x = var_3991_cast_fp16_1, y = var_4004_cast_fp16_1)[name = string("attn_weights_217_cast_fp16")]; fp16 _inversed_attn_weights_219_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_219_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_219_cast_fp16 = mul(x = attn_weights_217_cast_fp16, y = _inversed_attn_weights_219_y_0_to_fp16)[name = string("_inversed_attn_weights_219_cast_fp16")]; tensor attn_weights_221_cast_fp16 = add(x = _inversed_attn_weights_219_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_221_cast_fp16")]; int32 var_4023 = const()[name = string("op_4023"), val = int32(2)]; tensor attn_weights_223_cast_fp16 = softmax(axis = var_4023, x = attn_weights_221_cast_fp16)[name = string("attn_weights_223_cast_fp16")]; bool attn_output_79_transpose_x_1 = const()[name = string("attn_output_79_transpose_x_1"), val = bool(true)]; bool attn_output_79_transpose_y_1 = const()[name = string("attn_output_79_transpose_y_1"), val = bool(false)]; tensor attn_output_79_cast_fp16 = matmul(transpose_x = attn_output_79_transpose_x_1, transpose_y = attn_output_79_transpose_y_1, x = attn_weights_223_cast_fp16, y = var_4001_cast_fp16_1)[name = string("attn_output_79_cast_fp16")]; int32 var_4031 = const()[name = string("op_4031"), val = int32(1)]; bool attn_output_81_interleave_0 = const()[name = string("attn_output_81_interleave_0"), val = bool(false)]; tensor attn_output_81_cast_fp16 = concat(axis = var_4031, interleave = attn_output_81_interleave_0, values = (var_4017_cast_fp16, attn_output_79_cast_fp16))[name = string("attn_output_81_cast_fp16")]; tensor var_4035_perm_0 = const()[name = string("op_4035_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_4040 = const()[name = string("op_4040"), val = tensor([1, 1024, 1, 32])]; tensor var_4035_cast_fp16 = transpose(perm = var_4035_perm_0, x = attn_output_81_cast_fp16)[name = string("transpose_30")]; tensor x_239_cast_fp16 = reshape(shape = var_4040, x = var_4035_cast_fp16)[name = string("x_239_cast_fp16")]; string hidden_states_81_pad_type_0 = const()[name = string("hidden_states_81_pad_type_0"), val = string("valid")]; tensor hidden_states_81_strides_0 = const()[name = string("hidden_states_81_strides_0"), val = tensor([1, 1])]; tensor hidden_states_81_pad_0 = const()[name = string("hidden_states_81_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_81_dilations_0 = const()[name = string("hidden_states_81_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_81_groups_0 = const()[name = string("hidden_states_81_groups_0"), val = int32(1)]; tensor var_4047_to_fp16 = const()[name = string("op_4047_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(399638400)))]; tensor hidden_states_81_cast_fp16 = conv(dilations = hidden_states_81_dilations_0, groups = hidden_states_81_groups_0, pad = hidden_states_81_pad_0, pad_type = hidden_states_81_pad_type_0, strides = hidden_states_81_strides_0, weight = var_4047_to_fp16, x = x_239_cast_fp16)[name = string("hidden_states_81_cast_fp16")]; tensor x_241_cast_fp16 = add(x = x_233_cast_fp16, y = hidden_states_81_cast_fp16)[name = string("x_241_cast_fp16")]; int32 var_4059 = const()[name = string("op_4059"), val = int32(1)]; fp16 const_143_promoted_to_fp16 = const()[name = string("const_143_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_4062_cast_fp16 = mul(x = x_241_cast_fp16, y = const_143_promoted_to_fp16)[name = string("op_4062_cast_fp16")]; bool x_243_interleave_0 = const()[name = string("x_243_interleave_0"), val = bool(false)]; tensor x_243_cast_fp16 = concat(axis = var_4059, interleave = x_243_interleave_0, values = (x_241_cast_fp16, var_4062_cast_fp16))[name = string("x_243_cast_fp16")]; tensor out_163_axes_0 = const()[name = string("out_163_axes_0"), val = tensor([1])]; fp16 var_4072_to_fp16 = const()[name = string("op_4072_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_163_cast_fp16 = layer_norm(axes = out_163_axes_0, epsilon = var_4072_to_fp16, x = x_243_cast_fp16)[name = string("out_163_cast_fp16")]; tensor layer_layers_13_post_attention_layernorm_weight_to_fp16 = const()[name = string("layer_layers_13_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(401735616)))]; tensor out_165_cast_fp16 = mul(x = out_163_cast_fp16, y = layer_layers_13_post_attention_layernorm_weight_to_fp16)[name = string("out_165_cast_fp16")]; tensor var_4078_split_sizes_0 = const()[name = string("op_4078_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_4078_axis_0 = const()[name = string("op_4078_axis_0"), val = int32(1)]; tensor var_4078_cast_fp16_0, tensor var_4078_cast_fp16_1 = split(axis = var_4078_axis_0, split_sizes = var_4078_split_sizes_0, x = out_165_cast_fp16)[name = string("op_4078_cast_fp16")]; string input_27_pad_type_0 = const()[name = string("input_27_pad_type_0"), val = string("valid")]; tensor input_27_strides_0 = const()[name = string("input_27_strides_0"), val = tensor([1, 1])]; tensor input_27_pad_0 = const()[name = string("input_27_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_27_dilations_0 = const()[name = string("input_27_dilations_0"), val = tensor([1, 1])]; int32 input_27_groups_0 = const()[name = string("input_27_groups_0"), val = int32(1)]; tensor var_4083_to_fp16 = const()[name = string("op_4083_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(401739776)))]; tensor input_27_cast_fp16 = conv(dilations = input_27_dilations_0, groups = input_27_groups_0, pad = input_27_pad_0, pad_type = input_27_pad_type_0, strides = input_27_strides_0, weight = var_4083_to_fp16, x = var_4078_cast_fp16_0)[name = string("input_27_cast_fp16")]; tensor var_4094_cast_fp16 = silu(x = input_27_cast_fp16)[name = string("op_4094_cast_fp16")]; string var_4099_pad_type_0 = const()[name = string("op_4099_pad_type_0"), val = string("valid")]; tensor var_4099_strides_0 = const()[name = string("op_4099_strides_0"), val = tensor([1, 1])]; tensor var_4099_pad_0 = const()[name = string("op_4099_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_4099_dilations_0 = const()[name = string("op_4099_dilations_0"), val = tensor([1, 1])]; int32 var_4099_groups_0 = const()[name = string("op_4099_groups_0"), val = int32(1)]; tensor var_4082_to_fp16 = const()[name = string("op_4082_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(410128448)))]; tensor var_4099_cast_fp16 = conv(dilations = var_4099_dilations_0, groups = var_4099_groups_0, pad = var_4099_pad_0, pad_type = var_4099_pad_type_0, strides = var_4099_strides_0, weight = var_4082_to_fp16, x = var_4078_cast_fp16_0)[name = string("op_4099_cast_fp16")]; tensor x_249_cast_fp16 = mul(x = var_4094_cast_fp16, y = var_4099_cast_fp16)[name = string("x_249_cast_fp16")]; string hidden_states_83_pad_type_0 = const()[name = string("hidden_states_83_pad_type_0"), val = string("valid")]; tensor hidden_states_83_strides_0 = const()[name = string("hidden_states_83_strides_0"), val = tensor([1, 1])]; tensor hidden_states_83_pad_0 = const()[name = string("hidden_states_83_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_83_dilations_0 = const()[name = string("hidden_states_83_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_83_groups_0 = const()[name = string("hidden_states_83_groups_0"), val = int32(1)]; tensor var_4081_to_fp16 = const()[name = string("op_4081_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(418517120)))]; tensor hidden_states_83_cast_fp16 = conv(dilations = hidden_states_83_dilations_0, groups = hidden_states_83_groups_0, pad = hidden_states_83_pad_0, pad_type = hidden_states_83_pad_type_0, strides = hidden_states_83_strides_0, weight = var_4081_to_fp16, x = x_249_cast_fp16)[name = string("hidden_states_83_cast_fp16")]; tensor x_251_cast_fp16 = add(x = x_241_cast_fp16, y = hidden_states_83_cast_fp16)[name = string("x_251_cast_fp16")]; int32 var_4112 = const()[name = string("op_4112"), val = int32(1)]; fp16 const_144_promoted_to_fp16 = const()[name = string("const_144_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_4115_cast_fp16 = mul(x = x_251_cast_fp16, y = const_144_promoted_to_fp16)[name = string("op_4115_cast_fp16")]; bool x_253_interleave_0 = const()[name = string("x_253_interleave_0"), val = bool(false)]; tensor x_253_cast_fp16 = concat(axis = var_4112, interleave = x_253_interleave_0, values = (x_251_cast_fp16, var_4115_cast_fp16))[name = string("x_253_cast_fp16")]; tensor out_169_axes_0 = const()[name = string("out_169_axes_0"), val = tensor([1])]; fp16 var_4125_to_fp16 = const()[name = string("op_4125_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_169_cast_fp16 = layer_norm(axes = out_169_axes_0, epsilon = var_4125_to_fp16, x = x_253_cast_fp16)[name = string("out_169_cast_fp16")]; tensor layer_layers_14_input_layernorm_weight_to_fp16 = const()[name = string("layer_layers_14_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(426905792)))]; tensor out_171_cast_fp16 = mul(x = out_169_cast_fp16, y = layer_layers_14_input_layernorm_weight_to_fp16)[name = string("out_171_cast_fp16")]; tensor var_4131_split_sizes_0 = const()[name = string("op_4131_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_4131_axis_0 = const()[name = string("op_4131_axis_0"), val = int32(1)]; tensor var_4131_cast_fp16_0, tensor var_4131_cast_fp16_1 = split(axis = var_4131_axis_0, split_sizes = var_4131_split_sizes_0, x = out_171_cast_fp16)[name = string("op_4131_cast_fp16")]; string query_states_57_pad_type_0 = const()[name = string("query_states_57_pad_type_0"), val = string("valid")]; tensor query_states_57_strides_0 = const()[name = string("query_states_57_strides_0"), val = tensor([1, 1])]; tensor query_states_57_pad_0 = const()[name = string("query_states_57_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_states_57_dilations_0 = const()[name = string("query_states_57_dilations_0"), val = tensor([1, 1])]; int32 query_states_57_groups_0 = const()[name = string("query_states_57_groups_0"), val = int32(1)]; tensor var_4153_to_fp16 = const()[name = string("op_4153_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(426909952)))]; tensor query_states_57_cast_fp16 = conv(dilations = query_states_57_dilations_0, groups = query_states_57_groups_0, pad = query_states_57_pad_0, pad_type = query_states_57_pad_type_0, strides = query_states_57_strides_0, weight = var_4153_to_fp16, x = var_4131_cast_fp16_0)[name = string("query_states_57_cast_fp16")]; string key_states_57_pad_type_0 = const()[name = string("key_states_57_pad_type_0"), val = string("valid")]; tensor key_states_57_strides_0 = const()[name = string("key_states_57_strides_0"), val = tensor([1, 1])]; tensor key_states_57_pad_0 = const()[name = string("key_states_57_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_states_57_dilations_0 = const()[name = string("key_states_57_dilations_0"), val = tensor([1, 1])]; int32 key_states_57_groups_0 = const()[name = string("key_states_57_groups_0"), val = int32(1)]; tensor var_4164_to_fp16 = const()[name = string("op_4164_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(429007168)))]; tensor key_states_57_cast_fp16 = conv(dilations = key_states_57_dilations_0, groups = key_states_57_groups_0, pad = key_states_57_pad_0, pad_type = key_states_57_pad_type_0, strides = key_states_57_strides_0, weight = var_4164_to_fp16, x = var_4131_cast_fp16_0)[name = string("key_states_57_cast_fp16")]; string value_states_57_pad_type_0 = const()[name = string("value_states_57_pad_type_0"), val = string("valid")]; tensor value_states_57_strides_0 = const()[name = string("value_states_57_strides_0"), val = tensor([1, 1])]; tensor value_states_57_pad_0 = const()[name = string("value_states_57_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_states_57_dilations_0 = const()[name = string("value_states_57_dilations_0"), val = tensor([1, 1])]; int32 value_states_57_groups_0 = const()[name = string("value_states_57_groups_0"), val = int32(1)]; tensor var_4175_to_fp16 = const()[name = string("op_4175_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(429269376)))]; tensor value_states_57_cast_fp16 = conv(dilations = value_states_57_dilations_0, groups = value_states_57_groups_0, pad = value_states_57_pad_0, pad_type = value_states_57_pad_type_0, strides = value_states_57_strides_0, weight = var_4175_to_fp16, x = var_4131_cast_fp16_0)[name = string("value_states_57_cast_fp16")]; tensor var_4183 = const()[name = string("op_4183"), val = tensor([1, 16, 64, 32])]; tensor embed_57_cast_fp16 = reshape(shape = var_4183, x = query_states_57_cast_fp16)[name = string("embed_57_cast_fp16")]; tensor var_4187 = const()[name = string("op_4187"), val = tensor([1, 2, 64, 32])]; tensor var_4188_cast_fp16 = reshape(shape = var_4187, x = key_states_57_cast_fp16)[name = string("op_4188_cast_fp16")]; tensor embed_59_perm_0 = const()[name = string("embed_59_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_4194 = const()[name = string("op_4194"), val = tensor([1, 2, 64, 32])]; tensor var_4195_cast_fp16 = reshape(shape = var_4194, x = value_states_57_cast_fp16)[name = string("op_4195_cast_fp16")]; tensor value_states_59_perm_0 = const()[name = string("value_states_59_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_4199_cast_fp16 = mul(x = embed_57_cast_fp16, y = cos_1_cast_fp16)[name = string("op_4199_cast_fp16")]; tensor var_4200_split_sizes_0 = const()[name = string("op_4200_split_sizes_0"), val = tensor([32, 32])]; int32 var_4200_axis_0 = const()[name = string("op_4200_axis_0"), val = int32(-2)]; tensor var_4200_cast_fp16_0, tensor var_4200_cast_fp16_1 = split(axis = var_4200_axis_0, split_sizes = var_4200_split_sizes_0, x = embed_57_cast_fp16)[name = string("op_4200_cast_fp16")]; fp16 const_147_promoted_to_fp16 = const()[name = string("const_147_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_4202_cast_fp16 = mul(x = var_4200_cast_fp16_1, y = const_147_promoted_to_fp16)[name = string("op_4202_cast_fp16")]; int32 var_4204 = const()[name = string("op_4204"), val = int32(-2)]; bool var_4205_interleave_0 = const()[name = string("op_4205_interleave_0"), val = bool(false)]; tensor var_4205_cast_fp16 = concat(axis = var_4204, interleave = var_4205_interleave_0, values = (var_4202_cast_fp16, var_4200_cast_fp16_0))[name = string("op_4205_cast_fp16")]; tensor var_4206_cast_fp16 = mul(x = var_4205_cast_fp16, y = sin_1_cast_fp16)[name = string("op_4206_cast_fp16")]; tensor query_states_59_cast_fp16 = add(x = var_4199_cast_fp16, y = var_4206_cast_fp16)[name = string("query_states_59_cast_fp16")]; tensor embed_59_cast_fp16 = transpose(perm = embed_59_perm_0, x = var_4188_cast_fp16)[name = string("transpose_29")]; tensor var_4209_cast_fp16 = mul(x = embed_59_cast_fp16, y = cos_cast_fp16)[name = string("op_4209_cast_fp16")]; tensor var_4210_split_sizes_0 = const()[name = string("op_4210_split_sizes_0"), val = tensor([32, 32])]; int32 var_4210_axis_0 = const()[name = string("op_4210_axis_0"), val = int32(-1)]; tensor var_4210_cast_fp16_0, tensor var_4210_cast_fp16_1 = split(axis = var_4210_axis_0, split_sizes = var_4210_split_sizes_0, x = embed_59_cast_fp16)[name = string("op_4210_cast_fp16")]; fp16 const_148_promoted_to_fp16 = const()[name = string("const_148_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_4212_cast_fp16 = mul(x = var_4210_cast_fp16_1, y = const_148_promoted_to_fp16)[name = string("op_4212_cast_fp16")]; int32 var_4214 = const()[name = string("op_4214"), val = int32(-1)]; bool var_4215_interleave_0 = const()[name = string("op_4215_interleave_0"), val = bool(false)]; tensor var_4215_cast_fp16 = concat(axis = var_4214, interleave = var_4215_interleave_0, values = (var_4212_cast_fp16, var_4210_cast_fp16_0))[name = string("op_4215_cast_fp16")]; tensor var_4216_cast_fp16 = mul(x = var_4215_cast_fp16, y = sin_cast_fp16)[name = string("op_4216_cast_fp16")]; tensor key_states_59_cast_fp16 = add(x = var_4209_cast_fp16, y = var_4216_cast_fp16)[name = string("key_states_59_cast_fp16")]; tensor expand_dims_141 = const()[name = string("expand_dims_141"), val = tensor([14])]; tensor expand_dims_142 = const()[name = string("expand_dims_142"), val = tensor([0])]; tensor expand_dims_144 = const()[name = string("expand_dims_144"), val = tensor([15])]; tensor concat_115_values3_0 = const()[name = string("concat_115_values3_0"), val = tensor([0])]; int32 concat_115_axis_0 = const()[name = string("concat_115_axis_0"), val = int32(0)]; bool concat_115_interleave_0 = const()[name = string("concat_115_interleave_0"), val = bool(false)]; tensor concat_115 = concat(axis = concat_115_axis_0, interleave = concat_115_interleave_0, values = (expand_dims_141, expand_dims_142, position_id, concat_115_values3_0))[name = string("concat_115")]; tensor concat_116_values1_0 = const()[name = string("concat_116_values1_0"), val = tensor([0])]; tensor concat_116_values3_0 = const()[name = string("concat_116_values3_0"), val = tensor([0])]; int32 concat_116_axis_0 = const()[name = string("concat_116_axis_0"), val = int32(0)]; bool concat_116_interleave_0 = const()[name = string("concat_116_interleave_0"), val = bool(false)]; tensor concat_116 = concat(axis = concat_116_axis_0, interleave = concat_116_interleave_0, values = (expand_dims_144, concat_116_values1_0, var_426, concat_116_values3_0))[name = string("concat_116")]; tensor key_cache_internal_tensor_assign_15_stride_0 = const()[name = string("key_cache_internal_tensor_assign_15_stride_0"), val = tensor([1, 1, 1, 1])]; tensor key_cache_internal_tensor_assign_15_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_15_begin_mask_0"), val = tensor([false, false, false, true])]; tensor key_cache_internal_tensor_assign_15_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_15_end_mask_0"), val = tensor([false, true, false, true])]; tensor key_cache_internal_tensor_assign_15_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_15_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor key_cache_internal_tensor_assign_15_cast_fp16 = slice_update(begin = concat_115, begin_mask = key_cache_internal_tensor_assign_15_begin_mask_0, end = concat_116, end_mask = key_cache_internal_tensor_assign_15_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_15_squeeze_mask_0, stride = key_cache_internal_tensor_assign_15_stride_0, update = key_states_59_cast_fp16, x = coreml_update_state_74)[name = string("key_cache_internal_tensor_assign_15_cast_fp16")]; write_state(data = key_cache_internal_tensor_assign_15_cast_fp16, input = key_cache)[name = string("coreml_update_state_338_write_state")]; tensor coreml_update_state_76 = read_state(input = key_cache)[name = string("coreml_update_state_338")]; tensor value_cache_internal_tensor_assign_15_stride_0 = const()[name = string("value_cache_internal_tensor_assign_15_stride_0"), val = tensor([1, 1, 1, 1])]; tensor value_cache_internal_tensor_assign_15_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_15_begin_mask_0"), val = tensor([false, false, false, true])]; tensor value_cache_internal_tensor_assign_15_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_15_end_mask_0"), val = tensor([false, true, false, true])]; tensor value_cache_internal_tensor_assign_15_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_15_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor value_states_59_cast_fp16 = transpose(perm = value_states_59_perm_0, x = var_4195_cast_fp16)[name = string("transpose_28")]; tensor value_cache_internal_tensor_assign_15_cast_fp16 = slice_update(begin = concat_115, begin_mask = value_cache_internal_tensor_assign_15_begin_mask_0, end = concat_116, end_mask = value_cache_internal_tensor_assign_15_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_15_squeeze_mask_0, stride = value_cache_internal_tensor_assign_15_stride_0, update = value_states_59_cast_fp16, x = coreml_update_state_75)[name = string("value_cache_internal_tensor_assign_15_cast_fp16")]; write_state(data = value_cache_internal_tensor_assign_15_cast_fp16, input = value_cache)[name = string("coreml_update_state_339_write_state")]; tensor coreml_update_state_77 = read_state(input = value_cache)[name = string("coreml_update_state_339")]; tensor var_4259_begin_0 = const()[name = string("op_4259_begin_0"), val = tensor([14, 0, 0, 0])]; tensor var_4259_end_0 = const()[name = string("op_4259_end_0"), val = tensor([15, 2, 2048, 64])]; tensor var_4259_end_mask_0 = const()[name = string("op_4259_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_4259_cast_fp16 = slice_by_index(begin = var_4259_begin_0, end = var_4259_end_0, end_mask = var_4259_end_mask_0, x = coreml_update_state_76)[name = string("op_4259_cast_fp16")]; tensor tile_28 = const()[name = string("tile_28"), val = tensor([1, 1])]; int32 var_4262_axis_0 = const()[name = string("op_4262_axis_0"), val = int32(1)]; tensor var_4262_cast_fp16_0, tensor var_4262_cast_fp16_1 = split(axis = var_4262_axis_0, split_sizes = tile_28, x = var_4259_cast_fp16)[name = string("op_4262_cast_fp16")]; tensor var_4269_begin_0 = const()[name = string("op_4269_begin_0"), val = tensor([14, 0, 0, 0])]; tensor var_4269_end_0 = const()[name = string("op_4269_end_0"), val = tensor([15, 2, 2048, 64])]; tensor var_4269_end_mask_0 = const()[name = string("op_4269_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_4269_cast_fp16 = slice_by_index(begin = var_4269_begin_0, end = var_4269_end_0, end_mask = var_4269_end_mask_0, x = coreml_update_state_77)[name = string("op_4269_cast_fp16")]; tensor tile_29 = const()[name = string("tile_29"), val = tensor([1, 1])]; int32 var_4272_axis_0 = const()[name = string("op_4272_axis_0"), val = int32(1)]; tensor var_4272_cast_fp16_0, tensor var_4272_cast_fp16_1 = split(axis = var_4272_axis_0, split_sizes = tile_29, x = var_4269_cast_fp16)[name = string("op_4272_cast_fp16")]; tensor var_4275_split_sizes_0 = const()[name = string("op_4275_split_sizes_0"), val = tensor([8, 8])]; int32 var_4275_axis_0 = const()[name = string("op_4275_axis_0"), val = int32(1)]; tensor var_4275_cast_fp16_0, tensor var_4275_cast_fp16_1 = split(axis = var_4275_axis_0, split_sizes = var_4275_split_sizes_0, x = query_states_59_cast_fp16)[name = string("op_4275_cast_fp16")]; bool attn_weights_225_transpose_x_0 = const()[name = string("attn_weights_225_transpose_x_0"), val = bool(false)]; bool attn_weights_225_transpose_y_0 = const()[name = string("attn_weights_225_transpose_y_0"), val = bool(false)]; tensor attn_weights_225_cast_fp16 = matmul(transpose_x = attn_weights_225_transpose_x_0, transpose_y = attn_weights_225_transpose_y_0, x = var_4262_cast_fp16_0, y = var_4275_cast_fp16_0)[name = string("attn_weights_225_cast_fp16")]; fp16 _inversed_attn_weights_227_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_227_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_227_cast_fp16 = mul(x = attn_weights_225_cast_fp16, y = _inversed_attn_weights_227_y_0_to_fp16)[name = string("_inversed_attn_weights_227_cast_fp16")]; tensor attn_weights_229_cast_fp16 = add(x = _inversed_attn_weights_227_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_229_cast_fp16")]; int32 var_4282 = const()[name = string("op_4282"), val = int32(2)]; tensor attn_weights_231_cast_fp16 = softmax(axis = var_4282, x = attn_weights_229_cast_fp16)[name = string("attn_weights_231_cast_fp16")]; bool var_4288_transpose_x_1 = const()[name = string("op_4288_transpose_x_1"), val = bool(true)]; bool var_4288_transpose_y_1 = const()[name = string("op_4288_transpose_y_1"), val = bool(false)]; tensor var_4288_cast_fp16 = matmul(transpose_x = var_4288_transpose_x_1, transpose_y = var_4288_transpose_y_1, x = attn_weights_231_cast_fp16, y = var_4272_cast_fp16_0)[name = string("op_4288_cast_fp16")]; bool attn_weights_233_transpose_x_0 = const()[name = string("attn_weights_233_transpose_x_0"), val = bool(false)]; bool attn_weights_233_transpose_y_0 = const()[name = string("attn_weights_233_transpose_y_0"), val = bool(false)]; tensor attn_weights_233_cast_fp16 = matmul(transpose_x = attn_weights_233_transpose_x_0, transpose_y = attn_weights_233_transpose_y_0, x = var_4262_cast_fp16_1, y = var_4275_cast_fp16_1)[name = string("attn_weights_233_cast_fp16")]; fp16 _inversed_attn_weights_235_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_235_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_235_cast_fp16 = mul(x = attn_weights_233_cast_fp16, y = _inversed_attn_weights_235_y_0_to_fp16)[name = string("_inversed_attn_weights_235_cast_fp16")]; tensor attn_weights_237_cast_fp16 = add(x = _inversed_attn_weights_235_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_237_cast_fp16")]; int32 var_4294 = const()[name = string("op_4294"), val = int32(2)]; tensor attn_weights_239_cast_fp16 = softmax(axis = var_4294, x = attn_weights_237_cast_fp16)[name = string("attn_weights_239_cast_fp16")]; bool attn_output_85_transpose_x_1 = const()[name = string("attn_output_85_transpose_x_1"), val = bool(true)]; bool attn_output_85_transpose_y_1 = const()[name = string("attn_output_85_transpose_y_1"), val = bool(false)]; tensor attn_output_85_cast_fp16 = matmul(transpose_x = attn_output_85_transpose_x_1, transpose_y = attn_output_85_transpose_y_1, x = attn_weights_239_cast_fp16, y = var_4272_cast_fp16_1)[name = string("attn_output_85_cast_fp16")]; int32 var_4302 = const()[name = string("op_4302"), val = int32(1)]; bool attn_output_87_interleave_0 = const()[name = string("attn_output_87_interleave_0"), val = bool(false)]; tensor attn_output_87_cast_fp16 = concat(axis = var_4302, interleave = attn_output_87_interleave_0, values = (var_4288_cast_fp16, attn_output_85_cast_fp16))[name = string("attn_output_87_cast_fp16")]; tensor var_4306_perm_0 = const()[name = string("op_4306_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_4311 = const()[name = string("op_4311"), val = tensor([1, 1024, 1, 32])]; tensor var_4306_cast_fp16 = transpose(perm = var_4306_perm_0, x = attn_output_87_cast_fp16)[name = string("transpose_27")]; tensor x_257_cast_fp16 = reshape(shape = var_4311, x = var_4306_cast_fp16)[name = string("x_257_cast_fp16")]; string hidden_states_87_pad_type_0 = const()[name = string("hidden_states_87_pad_type_0"), val = string("valid")]; tensor hidden_states_87_strides_0 = const()[name = string("hidden_states_87_strides_0"), val = tensor([1, 1])]; tensor hidden_states_87_pad_0 = const()[name = string("hidden_states_87_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_87_dilations_0 = const()[name = string("hidden_states_87_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_87_groups_0 = const()[name = string("hidden_states_87_groups_0"), val = int32(1)]; tensor var_4318_to_fp16 = const()[name = string("op_4318_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(429531584)))]; tensor hidden_states_87_cast_fp16 = conv(dilations = hidden_states_87_dilations_0, groups = hidden_states_87_groups_0, pad = hidden_states_87_pad_0, pad_type = hidden_states_87_pad_type_0, strides = hidden_states_87_strides_0, weight = var_4318_to_fp16, x = x_257_cast_fp16)[name = string("hidden_states_87_cast_fp16")]; tensor x_259_cast_fp16 = add(x = x_251_cast_fp16, y = hidden_states_87_cast_fp16)[name = string("x_259_cast_fp16")]; int32 var_4330 = const()[name = string("op_4330"), val = int32(1)]; fp16 const_153_promoted_to_fp16 = const()[name = string("const_153_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_4333_cast_fp16 = mul(x = x_259_cast_fp16, y = const_153_promoted_to_fp16)[name = string("op_4333_cast_fp16")]; bool x_261_interleave_0 = const()[name = string("x_261_interleave_0"), val = bool(false)]; tensor x_261_cast_fp16 = concat(axis = var_4330, interleave = x_261_interleave_0, values = (x_259_cast_fp16, var_4333_cast_fp16))[name = string("x_261_cast_fp16")]; tensor out_175_axes_0 = const()[name = string("out_175_axes_0"), val = tensor([1])]; fp16 var_4343_to_fp16 = const()[name = string("op_4343_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_175_cast_fp16 = layer_norm(axes = out_175_axes_0, epsilon = var_4343_to_fp16, x = x_261_cast_fp16)[name = string("out_175_cast_fp16")]; tensor layer_layers_14_post_attention_layernorm_weight_to_fp16 = const()[name = string("layer_layers_14_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(431628800)))]; tensor out_177_cast_fp16 = mul(x = out_175_cast_fp16, y = layer_layers_14_post_attention_layernorm_weight_to_fp16)[name = string("out_177_cast_fp16")]; tensor var_4349_split_sizes_0 = const()[name = string("op_4349_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_4349_axis_0 = const()[name = string("op_4349_axis_0"), val = int32(1)]; tensor var_4349_cast_fp16_0, tensor var_4349_cast_fp16_1 = split(axis = var_4349_axis_0, split_sizes = var_4349_split_sizes_0, x = out_177_cast_fp16)[name = string("op_4349_cast_fp16")]; string input_29_pad_type_0 = const()[name = string("input_29_pad_type_0"), val = string("valid")]; tensor input_29_strides_0 = const()[name = string("input_29_strides_0"), val = tensor([1, 1])]; tensor input_29_pad_0 = const()[name = string("input_29_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_29_dilations_0 = const()[name = string("input_29_dilations_0"), val = tensor([1, 1])]; int32 input_29_groups_0 = const()[name = string("input_29_groups_0"), val = int32(1)]; tensor var_4354_to_fp16 = const()[name = string("op_4354_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(431632960)))]; tensor input_29_cast_fp16 = conv(dilations = input_29_dilations_0, groups = input_29_groups_0, pad = input_29_pad_0, pad_type = input_29_pad_type_0, strides = input_29_strides_0, weight = var_4354_to_fp16, x = var_4349_cast_fp16_0)[name = string("input_29_cast_fp16")]; tensor var_4365_cast_fp16 = silu(x = input_29_cast_fp16)[name = string("op_4365_cast_fp16")]; string var_4370_pad_type_0 = const()[name = string("op_4370_pad_type_0"), val = string("valid")]; tensor var_4370_strides_0 = const()[name = string("op_4370_strides_0"), val = tensor([1, 1])]; tensor var_4370_pad_0 = const()[name = string("op_4370_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_4370_dilations_0 = const()[name = string("op_4370_dilations_0"), val = tensor([1, 1])]; int32 var_4370_groups_0 = const()[name = string("op_4370_groups_0"), val = int32(1)]; tensor var_4353_to_fp16 = const()[name = string("op_4353_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(440021632)))]; tensor var_4370_cast_fp16 = conv(dilations = var_4370_dilations_0, groups = var_4370_groups_0, pad = var_4370_pad_0, pad_type = var_4370_pad_type_0, strides = var_4370_strides_0, weight = var_4353_to_fp16, x = var_4349_cast_fp16_0)[name = string("op_4370_cast_fp16")]; tensor x_267_cast_fp16 = mul(x = var_4365_cast_fp16, y = var_4370_cast_fp16)[name = string("x_267_cast_fp16")]; string hidden_states_89_pad_type_0 = const()[name = string("hidden_states_89_pad_type_0"), val = string("valid")]; tensor hidden_states_89_strides_0 = const()[name = string("hidden_states_89_strides_0"), val = tensor([1, 1])]; tensor hidden_states_89_pad_0 = const()[name = string("hidden_states_89_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_89_dilations_0 = const()[name = string("hidden_states_89_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_89_groups_0 = const()[name = string("hidden_states_89_groups_0"), val = int32(1)]; tensor var_4352_to_fp16 = const()[name = string("op_4352_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(448410304)))]; tensor hidden_states_89_cast_fp16 = conv(dilations = hidden_states_89_dilations_0, groups = hidden_states_89_groups_0, pad = hidden_states_89_pad_0, pad_type = hidden_states_89_pad_type_0, strides = hidden_states_89_strides_0, weight = var_4352_to_fp16, x = x_267_cast_fp16)[name = string("hidden_states_89_cast_fp16")]; tensor x_269_cast_fp16 = add(x = x_259_cast_fp16, y = hidden_states_89_cast_fp16)[name = string("x_269_cast_fp16")]; int32 var_4383 = const()[name = string("op_4383"), val = int32(1)]; fp16 const_154_promoted_to_fp16 = const()[name = string("const_154_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_4386_cast_fp16 = mul(x = x_269_cast_fp16, y = const_154_promoted_to_fp16)[name = string("op_4386_cast_fp16")]; bool x_271_interleave_0 = const()[name = string("x_271_interleave_0"), val = bool(false)]; tensor x_271_cast_fp16 = concat(axis = var_4383, interleave = x_271_interleave_0, values = (x_269_cast_fp16, var_4386_cast_fp16))[name = string("x_271_cast_fp16")]; tensor out_181_axes_0 = const()[name = string("out_181_axes_0"), val = tensor([1])]; fp16 var_4396_to_fp16 = const()[name = string("op_4396_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_181_cast_fp16 = layer_norm(axes = out_181_axes_0, epsilon = var_4396_to_fp16, x = x_271_cast_fp16)[name = string("out_181_cast_fp16")]; tensor layer_layers_15_input_layernorm_weight_to_fp16 = const()[name = string("layer_layers_15_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(456798976)))]; tensor out_183_cast_fp16 = mul(x = out_181_cast_fp16, y = layer_layers_15_input_layernorm_weight_to_fp16)[name = string("out_183_cast_fp16")]; tensor var_4402_split_sizes_0 = const()[name = string("op_4402_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_4402_axis_0 = const()[name = string("op_4402_axis_0"), val = int32(1)]; tensor var_4402_cast_fp16_0, tensor var_4402_cast_fp16_1 = split(axis = var_4402_axis_0, split_sizes = var_4402_split_sizes_0, x = out_183_cast_fp16)[name = string("op_4402_cast_fp16")]; string query_states_61_pad_type_0 = const()[name = string("query_states_61_pad_type_0"), val = string("valid")]; tensor query_states_61_strides_0 = const()[name = string("query_states_61_strides_0"), val = tensor([1, 1])]; tensor query_states_61_pad_0 = const()[name = string("query_states_61_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_states_61_dilations_0 = const()[name = string("query_states_61_dilations_0"), val = tensor([1, 1])]; int32 query_states_61_groups_0 = const()[name = string("query_states_61_groups_0"), val = int32(1)]; tensor var_4424_to_fp16 = const()[name = string("op_4424_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(456803136)))]; tensor query_states_61_cast_fp16 = conv(dilations = query_states_61_dilations_0, groups = query_states_61_groups_0, pad = query_states_61_pad_0, pad_type = query_states_61_pad_type_0, strides = query_states_61_strides_0, weight = var_4424_to_fp16, x = var_4402_cast_fp16_0)[name = string("query_states_61_cast_fp16")]; string key_states_61_pad_type_0 = const()[name = string("key_states_61_pad_type_0"), val = string("valid")]; tensor key_states_61_strides_0 = const()[name = string("key_states_61_strides_0"), val = tensor([1, 1])]; tensor key_states_61_pad_0 = const()[name = string("key_states_61_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_states_61_dilations_0 = const()[name = string("key_states_61_dilations_0"), val = tensor([1, 1])]; int32 key_states_61_groups_0 = const()[name = string("key_states_61_groups_0"), val = int32(1)]; tensor var_4435_to_fp16 = const()[name = string("op_4435_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(458900352)))]; tensor key_states_61_cast_fp16 = conv(dilations = key_states_61_dilations_0, groups = key_states_61_groups_0, pad = key_states_61_pad_0, pad_type = key_states_61_pad_type_0, strides = key_states_61_strides_0, weight = var_4435_to_fp16, x = var_4402_cast_fp16_0)[name = string("key_states_61_cast_fp16")]; string value_states_61_pad_type_0 = const()[name = string("value_states_61_pad_type_0"), val = string("valid")]; tensor value_states_61_strides_0 = const()[name = string("value_states_61_strides_0"), val = tensor([1, 1])]; tensor value_states_61_pad_0 = const()[name = string("value_states_61_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_states_61_dilations_0 = const()[name = string("value_states_61_dilations_0"), val = tensor([1, 1])]; int32 value_states_61_groups_0 = const()[name = string("value_states_61_groups_0"), val = int32(1)]; tensor var_4446_to_fp16 = const()[name = string("op_4446_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(459162560)))]; tensor value_states_61_cast_fp16 = conv(dilations = value_states_61_dilations_0, groups = value_states_61_groups_0, pad = value_states_61_pad_0, pad_type = value_states_61_pad_type_0, strides = value_states_61_strides_0, weight = var_4446_to_fp16, x = var_4402_cast_fp16_0)[name = string("value_states_61_cast_fp16")]; tensor var_4454 = const()[name = string("op_4454"), val = tensor([1, 16, 64, 32])]; tensor embed_61_cast_fp16 = reshape(shape = var_4454, x = query_states_61_cast_fp16)[name = string("embed_61_cast_fp16")]; tensor var_4458 = const()[name = string("op_4458"), val = tensor([1, 2, 64, 32])]; tensor var_4459_cast_fp16 = reshape(shape = var_4458, x = key_states_61_cast_fp16)[name = string("op_4459_cast_fp16")]; tensor embed_63_perm_0 = const()[name = string("embed_63_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_4465 = const()[name = string("op_4465"), val = tensor([1, 2, 64, 32])]; tensor var_4466_cast_fp16 = reshape(shape = var_4465, x = value_states_61_cast_fp16)[name = string("op_4466_cast_fp16")]; tensor value_states_63_perm_0 = const()[name = string("value_states_63_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_4470_cast_fp16 = mul(x = embed_61_cast_fp16, y = cos_1_cast_fp16)[name = string("op_4470_cast_fp16")]; tensor var_4471_split_sizes_0 = const()[name = string("op_4471_split_sizes_0"), val = tensor([32, 32])]; int32 var_4471_axis_0 = const()[name = string("op_4471_axis_0"), val = int32(-2)]; tensor var_4471_cast_fp16_0, tensor var_4471_cast_fp16_1 = split(axis = var_4471_axis_0, split_sizes = var_4471_split_sizes_0, x = embed_61_cast_fp16)[name = string("op_4471_cast_fp16")]; fp16 const_157_promoted_to_fp16 = const()[name = string("const_157_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_4473_cast_fp16 = mul(x = var_4471_cast_fp16_1, y = const_157_promoted_to_fp16)[name = string("op_4473_cast_fp16")]; int32 var_4475 = const()[name = string("op_4475"), val = int32(-2)]; bool var_4476_interleave_0 = const()[name = string("op_4476_interleave_0"), val = bool(false)]; tensor var_4476_cast_fp16 = concat(axis = var_4475, interleave = var_4476_interleave_0, values = (var_4473_cast_fp16, var_4471_cast_fp16_0))[name = string("op_4476_cast_fp16")]; tensor var_4477_cast_fp16 = mul(x = var_4476_cast_fp16, y = sin_1_cast_fp16)[name = string("op_4477_cast_fp16")]; tensor query_states_63_cast_fp16 = add(x = var_4470_cast_fp16, y = var_4477_cast_fp16)[name = string("query_states_63_cast_fp16")]; tensor embed_63_cast_fp16 = transpose(perm = embed_63_perm_0, x = var_4459_cast_fp16)[name = string("transpose_26")]; tensor var_4480_cast_fp16 = mul(x = embed_63_cast_fp16, y = cos_cast_fp16)[name = string("op_4480_cast_fp16")]; tensor var_4481_split_sizes_0 = const()[name = string("op_4481_split_sizes_0"), val = tensor([32, 32])]; int32 var_4481_axis_0 = const()[name = string("op_4481_axis_0"), val = int32(-1)]; tensor var_4481_cast_fp16_0, tensor var_4481_cast_fp16_1 = split(axis = var_4481_axis_0, split_sizes = var_4481_split_sizes_0, x = embed_63_cast_fp16)[name = string("op_4481_cast_fp16")]; fp16 const_158_promoted_to_fp16 = const()[name = string("const_158_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_4483_cast_fp16 = mul(x = var_4481_cast_fp16_1, y = const_158_promoted_to_fp16)[name = string("op_4483_cast_fp16")]; int32 var_4485 = const()[name = string("op_4485"), val = int32(-1)]; bool var_4486_interleave_0 = const()[name = string("op_4486_interleave_0"), val = bool(false)]; tensor var_4486_cast_fp16 = concat(axis = var_4485, interleave = var_4486_interleave_0, values = (var_4483_cast_fp16, var_4481_cast_fp16_0))[name = string("op_4486_cast_fp16")]; tensor var_4487_cast_fp16 = mul(x = var_4486_cast_fp16, y = sin_cast_fp16)[name = string("op_4487_cast_fp16")]; tensor key_states_63_cast_fp16 = add(x = var_4480_cast_fp16, y = var_4487_cast_fp16)[name = string("key_states_63_cast_fp16")]; tensor expand_dims_151 = const()[name = string("expand_dims_151"), val = tensor([15])]; tensor expand_dims_152 = const()[name = string("expand_dims_152"), val = tensor([0])]; tensor expand_dims_154 = const()[name = string("expand_dims_154"), val = tensor([16])]; tensor concat_123_values3_0 = const()[name = string("concat_123_values3_0"), val = tensor([0])]; int32 concat_123_axis_0 = const()[name = string("concat_123_axis_0"), val = int32(0)]; bool concat_123_interleave_0 = const()[name = string("concat_123_interleave_0"), val = bool(false)]; tensor concat_123 = concat(axis = concat_123_axis_0, interleave = concat_123_interleave_0, values = (expand_dims_151, expand_dims_152, position_id, concat_123_values3_0))[name = string("concat_123")]; tensor concat_124_values1_0 = const()[name = string("concat_124_values1_0"), val = tensor([0])]; tensor concat_124_values3_0 = const()[name = string("concat_124_values3_0"), val = tensor([0])]; int32 concat_124_axis_0 = const()[name = string("concat_124_axis_0"), val = int32(0)]; bool concat_124_interleave_0 = const()[name = string("concat_124_interleave_0"), val = bool(false)]; tensor concat_124 = concat(axis = concat_124_axis_0, interleave = concat_124_interleave_0, values = (expand_dims_154, concat_124_values1_0, var_426, concat_124_values3_0))[name = string("concat_124")]; tensor key_cache_internal_tensor_assign_16_stride_0 = const()[name = string("key_cache_internal_tensor_assign_16_stride_0"), val = tensor([1, 1, 1, 1])]; tensor key_cache_internal_tensor_assign_16_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_16_begin_mask_0"), val = tensor([false, false, false, true])]; tensor key_cache_internal_tensor_assign_16_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_16_end_mask_0"), val = tensor([false, true, false, true])]; tensor key_cache_internal_tensor_assign_16_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_16_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor key_cache_internal_tensor_assign_16_cast_fp16 = slice_update(begin = concat_123, begin_mask = key_cache_internal_tensor_assign_16_begin_mask_0, end = concat_124, end_mask = key_cache_internal_tensor_assign_16_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_16_squeeze_mask_0, stride = key_cache_internal_tensor_assign_16_stride_0, update = key_states_63_cast_fp16, x = coreml_update_state_76)[name = string("key_cache_internal_tensor_assign_16_cast_fp16")]; write_state(data = key_cache_internal_tensor_assign_16_cast_fp16, input = key_cache)[name = string("coreml_update_state_340_write_state")]; tensor coreml_update_state_78 = read_state(input = key_cache)[name = string("coreml_update_state_340")]; tensor value_cache_internal_tensor_assign_16_stride_0 = const()[name = string("value_cache_internal_tensor_assign_16_stride_0"), val = tensor([1, 1, 1, 1])]; tensor value_cache_internal_tensor_assign_16_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_16_begin_mask_0"), val = tensor([false, false, false, true])]; tensor value_cache_internal_tensor_assign_16_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_16_end_mask_0"), val = tensor([false, true, false, true])]; tensor value_cache_internal_tensor_assign_16_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_16_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor value_states_63_cast_fp16 = transpose(perm = value_states_63_perm_0, x = var_4466_cast_fp16)[name = string("transpose_25")]; tensor value_cache_internal_tensor_assign_16_cast_fp16 = slice_update(begin = concat_123, begin_mask = value_cache_internal_tensor_assign_16_begin_mask_0, end = concat_124, end_mask = value_cache_internal_tensor_assign_16_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_16_squeeze_mask_0, stride = value_cache_internal_tensor_assign_16_stride_0, update = value_states_63_cast_fp16, x = coreml_update_state_77)[name = string("value_cache_internal_tensor_assign_16_cast_fp16")]; write_state(data = value_cache_internal_tensor_assign_16_cast_fp16, input = value_cache)[name = string("coreml_update_state_341_write_state")]; tensor coreml_update_state_79 = read_state(input = value_cache)[name = string("coreml_update_state_341")]; tensor var_4530_begin_0 = const()[name = string("op_4530_begin_0"), val = tensor([15, 0, 0, 0])]; tensor var_4530_end_0 = const()[name = string("op_4530_end_0"), val = tensor([16, 2, 2048, 64])]; tensor var_4530_end_mask_0 = const()[name = string("op_4530_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_4530_cast_fp16 = slice_by_index(begin = var_4530_begin_0, end = var_4530_end_0, end_mask = var_4530_end_mask_0, x = coreml_update_state_78)[name = string("op_4530_cast_fp16")]; tensor tile_30 = const()[name = string("tile_30"), val = tensor([1, 1])]; int32 var_4533_axis_0 = const()[name = string("op_4533_axis_0"), val = int32(1)]; tensor var_4533_cast_fp16_0, tensor var_4533_cast_fp16_1 = split(axis = var_4533_axis_0, split_sizes = tile_30, x = var_4530_cast_fp16)[name = string("op_4533_cast_fp16")]; tensor var_4540_begin_0 = const()[name = string("op_4540_begin_0"), val = tensor([15, 0, 0, 0])]; tensor var_4540_end_0 = const()[name = string("op_4540_end_0"), val = tensor([16, 2, 2048, 64])]; tensor var_4540_end_mask_0 = const()[name = string("op_4540_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_4540_cast_fp16 = slice_by_index(begin = var_4540_begin_0, end = var_4540_end_0, end_mask = var_4540_end_mask_0, x = coreml_update_state_79)[name = string("op_4540_cast_fp16")]; tensor tile_31 = const()[name = string("tile_31"), val = tensor([1, 1])]; int32 var_4543_axis_0 = const()[name = string("op_4543_axis_0"), val = int32(1)]; tensor var_4543_cast_fp16_0, tensor var_4543_cast_fp16_1 = split(axis = var_4543_axis_0, split_sizes = tile_31, x = var_4540_cast_fp16)[name = string("op_4543_cast_fp16")]; tensor var_4546_split_sizes_0 = const()[name = string("op_4546_split_sizes_0"), val = tensor([8, 8])]; int32 var_4546_axis_0 = const()[name = string("op_4546_axis_0"), val = int32(1)]; tensor var_4546_cast_fp16_0, tensor var_4546_cast_fp16_1 = split(axis = var_4546_axis_0, split_sizes = var_4546_split_sizes_0, x = query_states_63_cast_fp16)[name = string("op_4546_cast_fp16")]; bool attn_weights_241_transpose_x_0 = const()[name = string("attn_weights_241_transpose_x_0"), val = bool(false)]; bool attn_weights_241_transpose_y_0 = const()[name = string("attn_weights_241_transpose_y_0"), val = bool(false)]; tensor attn_weights_241_cast_fp16 = matmul(transpose_x = attn_weights_241_transpose_x_0, transpose_y = attn_weights_241_transpose_y_0, x = var_4533_cast_fp16_0, y = var_4546_cast_fp16_0)[name = string("attn_weights_241_cast_fp16")]; fp16 _inversed_attn_weights_243_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_243_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_243_cast_fp16 = mul(x = attn_weights_241_cast_fp16, y = _inversed_attn_weights_243_y_0_to_fp16)[name = string("_inversed_attn_weights_243_cast_fp16")]; tensor attn_weights_245_cast_fp16 = add(x = _inversed_attn_weights_243_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_245_cast_fp16")]; int32 var_4553 = const()[name = string("op_4553"), val = int32(2)]; tensor attn_weights_247_cast_fp16 = softmax(axis = var_4553, x = attn_weights_245_cast_fp16)[name = string("attn_weights_247_cast_fp16")]; bool var_4559_transpose_x_1 = const()[name = string("op_4559_transpose_x_1"), val = bool(true)]; bool var_4559_transpose_y_1 = const()[name = string("op_4559_transpose_y_1"), val = bool(false)]; tensor var_4559_cast_fp16 = matmul(transpose_x = var_4559_transpose_x_1, transpose_y = var_4559_transpose_y_1, x = attn_weights_247_cast_fp16, y = var_4543_cast_fp16_0)[name = string("op_4559_cast_fp16")]; bool attn_weights_249_transpose_x_0 = const()[name = string("attn_weights_249_transpose_x_0"), val = bool(false)]; bool attn_weights_249_transpose_y_0 = const()[name = string("attn_weights_249_transpose_y_0"), val = bool(false)]; tensor attn_weights_249_cast_fp16 = matmul(transpose_x = attn_weights_249_transpose_x_0, transpose_y = attn_weights_249_transpose_y_0, x = var_4533_cast_fp16_1, y = var_4546_cast_fp16_1)[name = string("attn_weights_249_cast_fp16")]; fp16 _inversed_attn_weights_251_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_251_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_251_cast_fp16 = mul(x = attn_weights_249_cast_fp16, y = _inversed_attn_weights_251_y_0_to_fp16)[name = string("_inversed_attn_weights_251_cast_fp16")]; tensor attn_weights_253_cast_fp16 = add(x = _inversed_attn_weights_251_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_253_cast_fp16")]; int32 var_4565 = const()[name = string("op_4565"), val = int32(2)]; tensor attn_weights_255_cast_fp16 = softmax(axis = var_4565, x = attn_weights_253_cast_fp16)[name = string("attn_weights_255_cast_fp16")]; bool attn_output_91_transpose_x_1 = const()[name = string("attn_output_91_transpose_x_1"), val = bool(true)]; bool attn_output_91_transpose_y_1 = const()[name = string("attn_output_91_transpose_y_1"), val = bool(false)]; tensor attn_output_91_cast_fp16 = matmul(transpose_x = attn_output_91_transpose_x_1, transpose_y = attn_output_91_transpose_y_1, x = attn_weights_255_cast_fp16, y = var_4543_cast_fp16_1)[name = string("attn_output_91_cast_fp16")]; int32 var_4573 = const()[name = string("op_4573"), val = int32(1)]; bool attn_output_93_interleave_0 = const()[name = string("attn_output_93_interleave_0"), val = bool(false)]; tensor attn_output_93_cast_fp16 = concat(axis = var_4573, interleave = attn_output_93_interleave_0, values = (var_4559_cast_fp16, attn_output_91_cast_fp16))[name = string("attn_output_93_cast_fp16")]; tensor var_4577_perm_0 = const()[name = string("op_4577_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_4582 = const()[name = string("op_4582"), val = tensor([1, 1024, 1, 32])]; tensor var_4577_cast_fp16 = transpose(perm = var_4577_perm_0, x = attn_output_93_cast_fp16)[name = string("transpose_24")]; tensor x_275_cast_fp16 = reshape(shape = var_4582, x = var_4577_cast_fp16)[name = string("x_275_cast_fp16")]; string hidden_states_93_pad_type_0 = const()[name = string("hidden_states_93_pad_type_0"), val = string("valid")]; tensor hidden_states_93_strides_0 = const()[name = string("hidden_states_93_strides_0"), val = tensor([1, 1])]; tensor hidden_states_93_pad_0 = const()[name = string("hidden_states_93_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_93_dilations_0 = const()[name = string("hidden_states_93_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_93_groups_0 = const()[name = string("hidden_states_93_groups_0"), val = int32(1)]; tensor var_4589_to_fp16 = const()[name = string("op_4589_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(459424768)))]; tensor hidden_states_93_cast_fp16 = conv(dilations = hidden_states_93_dilations_0, groups = hidden_states_93_groups_0, pad = hidden_states_93_pad_0, pad_type = hidden_states_93_pad_type_0, strides = hidden_states_93_strides_0, weight = var_4589_to_fp16, x = x_275_cast_fp16)[name = string("hidden_states_93_cast_fp16")]; tensor x_277_cast_fp16 = add(x = x_269_cast_fp16, y = hidden_states_93_cast_fp16)[name = string("x_277_cast_fp16")]; int32 var_4601 = const()[name = string("op_4601"), val = int32(1)]; fp16 const_163_promoted_to_fp16 = const()[name = string("const_163_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_4604_cast_fp16 = mul(x = x_277_cast_fp16, y = const_163_promoted_to_fp16)[name = string("op_4604_cast_fp16")]; bool x_279_interleave_0 = const()[name = string("x_279_interleave_0"), val = bool(false)]; tensor x_279_cast_fp16 = concat(axis = var_4601, interleave = x_279_interleave_0, values = (x_277_cast_fp16, var_4604_cast_fp16))[name = string("x_279_cast_fp16")]; tensor out_187_axes_0 = const()[name = string("out_187_axes_0"), val = tensor([1])]; fp16 var_4614_to_fp16 = const()[name = string("op_4614_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_187_cast_fp16 = layer_norm(axes = out_187_axes_0, epsilon = var_4614_to_fp16, x = x_279_cast_fp16)[name = string("out_187_cast_fp16")]; tensor layer_layers_15_post_attention_layernorm_weight_to_fp16 = const()[name = string("layer_layers_15_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(461521984)))]; tensor out_189_cast_fp16 = mul(x = out_187_cast_fp16, y = layer_layers_15_post_attention_layernorm_weight_to_fp16)[name = string("out_189_cast_fp16")]; tensor var_4620_split_sizes_0 = const()[name = string("op_4620_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_4620_axis_0 = const()[name = string("op_4620_axis_0"), val = int32(1)]; tensor var_4620_cast_fp16_0, tensor var_4620_cast_fp16_1 = split(axis = var_4620_axis_0, split_sizes = var_4620_split_sizes_0, x = out_189_cast_fp16)[name = string("op_4620_cast_fp16")]; string input_31_pad_type_0 = const()[name = string("input_31_pad_type_0"), val = string("valid")]; tensor input_31_strides_0 = const()[name = string("input_31_strides_0"), val = tensor([1, 1])]; tensor input_31_pad_0 = const()[name = string("input_31_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_31_dilations_0 = const()[name = string("input_31_dilations_0"), val = tensor([1, 1])]; int32 input_31_groups_0 = const()[name = string("input_31_groups_0"), val = int32(1)]; tensor var_4625_to_fp16 = const()[name = string("op_4625_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(461526144)))]; tensor input_31_cast_fp16 = conv(dilations = input_31_dilations_0, groups = input_31_groups_0, pad = input_31_pad_0, pad_type = input_31_pad_type_0, strides = input_31_strides_0, weight = var_4625_to_fp16, x = var_4620_cast_fp16_0)[name = string("input_31_cast_fp16")]; tensor var_4636_cast_fp16 = silu(x = input_31_cast_fp16)[name = string("op_4636_cast_fp16")]; string var_4641_pad_type_0 = const()[name = string("op_4641_pad_type_0"), val = string("valid")]; tensor var_4641_strides_0 = const()[name = string("op_4641_strides_0"), val = tensor([1, 1])]; tensor var_4641_pad_0 = const()[name = string("op_4641_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_4641_dilations_0 = const()[name = string("op_4641_dilations_0"), val = tensor([1, 1])]; int32 var_4641_groups_0 = const()[name = string("op_4641_groups_0"), val = int32(1)]; tensor var_4624_to_fp16 = const()[name = string("op_4624_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(469914816)))]; tensor var_4641_cast_fp16 = conv(dilations = var_4641_dilations_0, groups = var_4641_groups_0, pad = var_4641_pad_0, pad_type = var_4641_pad_type_0, strides = var_4641_strides_0, weight = var_4624_to_fp16, x = var_4620_cast_fp16_0)[name = string("op_4641_cast_fp16")]; tensor x_285_cast_fp16 = mul(x = var_4636_cast_fp16, y = var_4641_cast_fp16)[name = string("x_285_cast_fp16")]; string hidden_states_95_pad_type_0 = const()[name = string("hidden_states_95_pad_type_0"), val = string("valid")]; tensor hidden_states_95_strides_0 = const()[name = string("hidden_states_95_strides_0"), val = tensor([1, 1])]; tensor hidden_states_95_pad_0 = const()[name = string("hidden_states_95_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_95_dilations_0 = const()[name = string("hidden_states_95_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_95_groups_0 = const()[name = string("hidden_states_95_groups_0"), val = int32(1)]; tensor var_4623_to_fp16 = const()[name = string("op_4623_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(478303488)))]; tensor hidden_states_95_cast_fp16 = conv(dilations = hidden_states_95_dilations_0, groups = hidden_states_95_groups_0, pad = hidden_states_95_pad_0, pad_type = hidden_states_95_pad_type_0, strides = hidden_states_95_strides_0, weight = var_4623_to_fp16, x = x_285_cast_fp16)[name = string("hidden_states_95_cast_fp16")]; tensor x_287_cast_fp16 = add(x = x_277_cast_fp16, y = hidden_states_95_cast_fp16)[name = string("x_287_cast_fp16")]; int32 var_4654 = const()[name = string("op_4654"), val = int32(1)]; fp16 const_164_promoted_to_fp16 = const()[name = string("const_164_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_4657_cast_fp16 = mul(x = x_287_cast_fp16, y = const_164_promoted_to_fp16)[name = string("op_4657_cast_fp16")]; bool x_289_interleave_0 = const()[name = string("x_289_interleave_0"), val = bool(false)]; tensor x_289_cast_fp16 = concat(axis = var_4654, interleave = x_289_interleave_0, values = (x_287_cast_fp16, var_4657_cast_fp16))[name = string("x_289_cast_fp16")]; tensor out_193_axes_0 = const()[name = string("out_193_axes_0"), val = tensor([1])]; fp16 var_4667_to_fp16 = const()[name = string("op_4667_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_193_cast_fp16 = layer_norm(axes = out_193_axes_0, epsilon = var_4667_to_fp16, x = x_289_cast_fp16)[name = string("out_193_cast_fp16")]; tensor layer_layers_16_input_layernorm_weight_to_fp16 = const()[name = string("layer_layers_16_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(486692160)))]; tensor out_195_cast_fp16 = mul(x = out_193_cast_fp16, y = layer_layers_16_input_layernorm_weight_to_fp16)[name = string("out_195_cast_fp16")]; tensor var_4673_split_sizes_0 = const()[name = string("op_4673_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_4673_axis_0 = const()[name = string("op_4673_axis_0"), val = int32(1)]; tensor var_4673_cast_fp16_0, tensor var_4673_cast_fp16_1 = split(axis = var_4673_axis_0, split_sizes = var_4673_split_sizes_0, x = out_195_cast_fp16)[name = string("op_4673_cast_fp16")]; string query_states_65_pad_type_0 = const()[name = string("query_states_65_pad_type_0"), val = string("valid")]; tensor query_states_65_strides_0 = const()[name = string("query_states_65_strides_0"), val = tensor([1, 1])]; tensor query_states_65_pad_0 = const()[name = string("query_states_65_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_states_65_dilations_0 = const()[name = string("query_states_65_dilations_0"), val = tensor([1, 1])]; int32 query_states_65_groups_0 = const()[name = string("query_states_65_groups_0"), val = int32(1)]; tensor var_4695_to_fp16 = const()[name = string("op_4695_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(486696320)))]; tensor query_states_65_cast_fp16 = conv(dilations = query_states_65_dilations_0, groups = query_states_65_groups_0, pad = query_states_65_pad_0, pad_type = query_states_65_pad_type_0, strides = query_states_65_strides_0, weight = var_4695_to_fp16, x = var_4673_cast_fp16_0)[name = string("query_states_65_cast_fp16")]; string key_states_65_pad_type_0 = const()[name = string("key_states_65_pad_type_0"), val = string("valid")]; tensor key_states_65_strides_0 = const()[name = string("key_states_65_strides_0"), val = tensor([1, 1])]; tensor key_states_65_pad_0 = const()[name = string("key_states_65_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_states_65_dilations_0 = const()[name = string("key_states_65_dilations_0"), val = tensor([1, 1])]; int32 key_states_65_groups_0 = const()[name = string("key_states_65_groups_0"), val = int32(1)]; tensor var_4706_to_fp16 = const()[name = string("op_4706_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(488793536)))]; tensor key_states_65_cast_fp16 = conv(dilations = key_states_65_dilations_0, groups = key_states_65_groups_0, pad = key_states_65_pad_0, pad_type = key_states_65_pad_type_0, strides = key_states_65_strides_0, weight = var_4706_to_fp16, x = var_4673_cast_fp16_0)[name = string("key_states_65_cast_fp16")]; string value_states_65_pad_type_0 = const()[name = string("value_states_65_pad_type_0"), val = string("valid")]; tensor value_states_65_strides_0 = const()[name = string("value_states_65_strides_0"), val = tensor([1, 1])]; tensor value_states_65_pad_0 = const()[name = string("value_states_65_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_states_65_dilations_0 = const()[name = string("value_states_65_dilations_0"), val = tensor([1, 1])]; int32 value_states_65_groups_0 = const()[name = string("value_states_65_groups_0"), val = int32(1)]; tensor var_4717_to_fp16 = const()[name = string("op_4717_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(489055744)))]; tensor value_states_65_cast_fp16 = conv(dilations = value_states_65_dilations_0, groups = value_states_65_groups_0, pad = value_states_65_pad_0, pad_type = value_states_65_pad_type_0, strides = value_states_65_strides_0, weight = var_4717_to_fp16, x = var_4673_cast_fp16_0)[name = string("value_states_65_cast_fp16")]; tensor var_4725 = const()[name = string("op_4725"), val = tensor([1, 16, 64, 32])]; tensor embed_65_cast_fp16 = reshape(shape = var_4725, x = query_states_65_cast_fp16)[name = string("embed_65_cast_fp16")]; tensor var_4729 = const()[name = string("op_4729"), val = tensor([1, 2, 64, 32])]; tensor var_4730_cast_fp16 = reshape(shape = var_4729, x = key_states_65_cast_fp16)[name = string("op_4730_cast_fp16")]; tensor embed_67_perm_0 = const()[name = string("embed_67_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_4736 = const()[name = string("op_4736"), val = tensor([1, 2, 64, 32])]; tensor var_4737_cast_fp16 = reshape(shape = var_4736, x = value_states_65_cast_fp16)[name = string("op_4737_cast_fp16")]; tensor value_states_67_perm_0 = const()[name = string("value_states_67_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_4741_cast_fp16 = mul(x = embed_65_cast_fp16, y = cos_1_cast_fp16)[name = string("op_4741_cast_fp16")]; tensor var_4742_split_sizes_0 = const()[name = string("op_4742_split_sizes_0"), val = tensor([32, 32])]; int32 var_4742_axis_0 = const()[name = string("op_4742_axis_0"), val = int32(-2)]; tensor var_4742_cast_fp16_0, tensor var_4742_cast_fp16_1 = split(axis = var_4742_axis_0, split_sizes = var_4742_split_sizes_0, x = embed_65_cast_fp16)[name = string("op_4742_cast_fp16")]; fp16 const_167_promoted_to_fp16 = const()[name = string("const_167_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_4744_cast_fp16 = mul(x = var_4742_cast_fp16_1, y = const_167_promoted_to_fp16)[name = string("op_4744_cast_fp16")]; int32 var_4746 = const()[name = string("op_4746"), val = int32(-2)]; bool var_4747_interleave_0 = const()[name = string("op_4747_interleave_0"), val = bool(false)]; tensor var_4747_cast_fp16 = concat(axis = var_4746, interleave = var_4747_interleave_0, values = (var_4744_cast_fp16, var_4742_cast_fp16_0))[name = string("op_4747_cast_fp16")]; tensor var_4748_cast_fp16 = mul(x = var_4747_cast_fp16, y = sin_1_cast_fp16)[name = string("op_4748_cast_fp16")]; tensor query_states_67_cast_fp16 = add(x = var_4741_cast_fp16, y = var_4748_cast_fp16)[name = string("query_states_67_cast_fp16")]; tensor embed_67_cast_fp16 = transpose(perm = embed_67_perm_0, x = var_4730_cast_fp16)[name = string("transpose_23")]; tensor var_4751_cast_fp16 = mul(x = embed_67_cast_fp16, y = cos_cast_fp16)[name = string("op_4751_cast_fp16")]; tensor var_4752_split_sizes_0 = const()[name = string("op_4752_split_sizes_0"), val = tensor([32, 32])]; int32 var_4752_axis_0 = const()[name = string("op_4752_axis_0"), val = int32(-1)]; tensor var_4752_cast_fp16_0, tensor var_4752_cast_fp16_1 = split(axis = var_4752_axis_0, split_sizes = var_4752_split_sizes_0, x = embed_67_cast_fp16)[name = string("op_4752_cast_fp16")]; fp16 const_168_promoted_to_fp16 = const()[name = string("const_168_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_4754_cast_fp16 = mul(x = var_4752_cast_fp16_1, y = const_168_promoted_to_fp16)[name = string("op_4754_cast_fp16")]; int32 var_4756 = const()[name = string("op_4756"), val = int32(-1)]; bool var_4757_interleave_0 = const()[name = string("op_4757_interleave_0"), val = bool(false)]; tensor var_4757_cast_fp16 = concat(axis = var_4756, interleave = var_4757_interleave_0, values = (var_4754_cast_fp16, var_4752_cast_fp16_0))[name = string("op_4757_cast_fp16")]; tensor var_4758_cast_fp16 = mul(x = var_4757_cast_fp16, y = sin_cast_fp16)[name = string("op_4758_cast_fp16")]; tensor key_states_67_cast_fp16 = add(x = var_4751_cast_fp16, y = var_4758_cast_fp16)[name = string("key_states_67_cast_fp16")]; tensor expand_dims_161 = const()[name = string("expand_dims_161"), val = tensor([16])]; tensor expand_dims_162 = const()[name = string("expand_dims_162"), val = tensor([0])]; tensor expand_dims_164 = const()[name = string("expand_dims_164"), val = tensor([17])]; tensor concat_131_values3_0 = const()[name = string("concat_131_values3_0"), val = tensor([0])]; int32 concat_131_axis_0 = const()[name = string("concat_131_axis_0"), val = int32(0)]; bool concat_131_interleave_0 = const()[name = string("concat_131_interleave_0"), val = bool(false)]; tensor concat_131 = concat(axis = concat_131_axis_0, interleave = concat_131_interleave_0, values = (expand_dims_161, expand_dims_162, position_id, concat_131_values3_0))[name = string("concat_131")]; tensor concat_132_values1_0 = const()[name = string("concat_132_values1_0"), val = tensor([0])]; tensor concat_132_values3_0 = const()[name = string("concat_132_values3_0"), val = tensor([0])]; int32 concat_132_axis_0 = const()[name = string("concat_132_axis_0"), val = int32(0)]; bool concat_132_interleave_0 = const()[name = string("concat_132_interleave_0"), val = bool(false)]; tensor concat_132 = concat(axis = concat_132_axis_0, interleave = concat_132_interleave_0, values = (expand_dims_164, concat_132_values1_0, var_426, concat_132_values3_0))[name = string("concat_132")]; tensor key_cache_internal_tensor_assign_17_stride_0 = const()[name = string("key_cache_internal_tensor_assign_17_stride_0"), val = tensor([1, 1, 1, 1])]; tensor key_cache_internal_tensor_assign_17_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_17_begin_mask_0"), val = tensor([false, false, false, true])]; tensor key_cache_internal_tensor_assign_17_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_17_end_mask_0"), val = tensor([false, true, false, true])]; tensor key_cache_internal_tensor_assign_17_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_17_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor key_cache_internal_tensor_assign_17_cast_fp16 = slice_update(begin = concat_131, begin_mask = key_cache_internal_tensor_assign_17_begin_mask_0, end = concat_132, end_mask = key_cache_internal_tensor_assign_17_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_17_squeeze_mask_0, stride = key_cache_internal_tensor_assign_17_stride_0, update = key_states_67_cast_fp16, x = coreml_update_state_78)[name = string("key_cache_internal_tensor_assign_17_cast_fp16")]; write_state(data = key_cache_internal_tensor_assign_17_cast_fp16, input = key_cache)[name = string("coreml_update_state_342_write_state")]; tensor coreml_update_state_80 = read_state(input = key_cache)[name = string("coreml_update_state_342")]; tensor value_cache_internal_tensor_assign_17_stride_0 = const()[name = string("value_cache_internal_tensor_assign_17_stride_0"), val = tensor([1, 1, 1, 1])]; tensor value_cache_internal_tensor_assign_17_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_17_begin_mask_0"), val = tensor([false, false, false, true])]; tensor value_cache_internal_tensor_assign_17_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_17_end_mask_0"), val = tensor([false, true, false, true])]; tensor value_cache_internal_tensor_assign_17_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_17_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor value_states_67_cast_fp16 = transpose(perm = value_states_67_perm_0, x = var_4737_cast_fp16)[name = string("transpose_22")]; tensor value_cache_internal_tensor_assign_17_cast_fp16 = slice_update(begin = concat_131, begin_mask = value_cache_internal_tensor_assign_17_begin_mask_0, end = concat_132, end_mask = value_cache_internal_tensor_assign_17_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_17_squeeze_mask_0, stride = value_cache_internal_tensor_assign_17_stride_0, update = value_states_67_cast_fp16, x = coreml_update_state_79)[name = string("value_cache_internal_tensor_assign_17_cast_fp16")]; write_state(data = value_cache_internal_tensor_assign_17_cast_fp16, input = value_cache)[name = string("coreml_update_state_343_write_state")]; tensor coreml_update_state_81 = read_state(input = value_cache)[name = string("coreml_update_state_343")]; tensor var_4801_begin_0 = const()[name = string("op_4801_begin_0"), val = tensor([16, 0, 0, 0])]; tensor var_4801_end_0 = const()[name = string("op_4801_end_0"), val = tensor([17, 2, 2048, 64])]; tensor var_4801_end_mask_0 = const()[name = string("op_4801_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_4801_cast_fp16 = slice_by_index(begin = var_4801_begin_0, end = var_4801_end_0, end_mask = var_4801_end_mask_0, x = coreml_update_state_80)[name = string("op_4801_cast_fp16")]; tensor tile_32 = const()[name = string("tile_32"), val = tensor([1, 1])]; int32 var_4804_axis_0 = const()[name = string("op_4804_axis_0"), val = int32(1)]; tensor var_4804_cast_fp16_0, tensor var_4804_cast_fp16_1 = split(axis = var_4804_axis_0, split_sizes = tile_32, x = var_4801_cast_fp16)[name = string("op_4804_cast_fp16")]; tensor var_4811_begin_0 = const()[name = string("op_4811_begin_0"), val = tensor([16, 0, 0, 0])]; tensor var_4811_end_0 = const()[name = string("op_4811_end_0"), val = tensor([17, 2, 2048, 64])]; tensor var_4811_end_mask_0 = const()[name = string("op_4811_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_4811_cast_fp16 = slice_by_index(begin = var_4811_begin_0, end = var_4811_end_0, end_mask = var_4811_end_mask_0, x = coreml_update_state_81)[name = string("op_4811_cast_fp16")]; tensor tile_33 = const()[name = string("tile_33"), val = tensor([1, 1])]; int32 var_4814_axis_0 = const()[name = string("op_4814_axis_0"), val = int32(1)]; tensor var_4814_cast_fp16_0, tensor var_4814_cast_fp16_1 = split(axis = var_4814_axis_0, split_sizes = tile_33, x = var_4811_cast_fp16)[name = string("op_4814_cast_fp16")]; tensor var_4817_split_sizes_0 = const()[name = string("op_4817_split_sizes_0"), val = tensor([8, 8])]; int32 var_4817_axis_0 = const()[name = string("op_4817_axis_0"), val = int32(1)]; tensor var_4817_cast_fp16_0, tensor var_4817_cast_fp16_1 = split(axis = var_4817_axis_0, split_sizes = var_4817_split_sizes_0, x = query_states_67_cast_fp16)[name = string("op_4817_cast_fp16")]; bool attn_weights_257_transpose_x_0 = const()[name = string("attn_weights_257_transpose_x_0"), val = bool(false)]; bool attn_weights_257_transpose_y_0 = const()[name = string("attn_weights_257_transpose_y_0"), val = bool(false)]; tensor attn_weights_257_cast_fp16 = matmul(transpose_x = attn_weights_257_transpose_x_0, transpose_y = attn_weights_257_transpose_y_0, x = var_4804_cast_fp16_0, y = var_4817_cast_fp16_0)[name = string("attn_weights_257_cast_fp16")]; fp16 _inversed_attn_weights_259_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_259_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_259_cast_fp16 = mul(x = attn_weights_257_cast_fp16, y = _inversed_attn_weights_259_y_0_to_fp16)[name = string("_inversed_attn_weights_259_cast_fp16")]; tensor attn_weights_261_cast_fp16 = add(x = _inversed_attn_weights_259_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_261_cast_fp16")]; int32 var_4824 = const()[name = string("op_4824"), val = int32(2)]; tensor attn_weights_263_cast_fp16 = softmax(axis = var_4824, x = attn_weights_261_cast_fp16)[name = string("attn_weights_263_cast_fp16")]; bool var_4830_transpose_x_1 = const()[name = string("op_4830_transpose_x_1"), val = bool(true)]; bool var_4830_transpose_y_1 = const()[name = string("op_4830_transpose_y_1"), val = bool(false)]; tensor var_4830_cast_fp16 = matmul(transpose_x = var_4830_transpose_x_1, transpose_y = var_4830_transpose_y_1, x = attn_weights_263_cast_fp16, y = var_4814_cast_fp16_0)[name = string("op_4830_cast_fp16")]; bool attn_weights_265_transpose_x_0 = const()[name = string("attn_weights_265_transpose_x_0"), val = bool(false)]; bool attn_weights_265_transpose_y_0 = const()[name = string("attn_weights_265_transpose_y_0"), val = bool(false)]; tensor attn_weights_265_cast_fp16 = matmul(transpose_x = attn_weights_265_transpose_x_0, transpose_y = attn_weights_265_transpose_y_0, x = var_4804_cast_fp16_1, y = var_4817_cast_fp16_1)[name = string("attn_weights_265_cast_fp16")]; fp16 _inversed_attn_weights_267_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_267_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_267_cast_fp16 = mul(x = attn_weights_265_cast_fp16, y = _inversed_attn_weights_267_y_0_to_fp16)[name = string("_inversed_attn_weights_267_cast_fp16")]; tensor attn_weights_269_cast_fp16 = add(x = _inversed_attn_weights_267_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_269_cast_fp16")]; int32 var_4836 = const()[name = string("op_4836"), val = int32(2)]; tensor attn_weights_271_cast_fp16 = softmax(axis = var_4836, x = attn_weights_269_cast_fp16)[name = string("attn_weights_271_cast_fp16")]; bool attn_output_97_transpose_x_1 = const()[name = string("attn_output_97_transpose_x_1"), val = bool(true)]; bool attn_output_97_transpose_y_1 = const()[name = string("attn_output_97_transpose_y_1"), val = bool(false)]; tensor attn_output_97_cast_fp16 = matmul(transpose_x = attn_output_97_transpose_x_1, transpose_y = attn_output_97_transpose_y_1, x = attn_weights_271_cast_fp16, y = var_4814_cast_fp16_1)[name = string("attn_output_97_cast_fp16")]; int32 var_4844 = const()[name = string("op_4844"), val = int32(1)]; bool attn_output_99_interleave_0 = const()[name = string("attn_output_99_interleave_0"), val = bool(false)]; tensor attn_output_99_cast_fp16 = concat(axis = var_4844, interleave = attn_output_99_interleave_0, values = (var_4830_cast_fp16, attn_output_97_cast_fp16))[name = string("attn_output_99_cast_fp16")]; tensor var_4848_perm_0 = const()[name = string("op_4848_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_4853 = const()[name = string("op_4853"), val = tensor([1, 1024, 1, 32])]; tensor var_4848_cast_fp16 = transpose(perm = var_4848_perm_0, x = attn_output_99_cast_fp16)[name = string("transpose_21")]; tensor x_293_cast_fp16 = reshape(shape = var_4853, x = var_4848_cast_fp16)[name = string("x_293_cast_fp16")]; string hidden_states_99_pad_type_0 = const()[name = string("hidden_states_99_pad_type_0"), val = string("valid")]; tensor hidden_states_99_strides_0 = const()[name = string("hidden_states_99_strides_0"), val = tensor([1, 1])]; tensor hidden_states_99_pad_0 = const()[name = string("hidden_states_99_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_99_dilations_0 = const()[name = string("hidden_states_99_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_99_groups_0 = const()[name = string("hidden_states_99_groups_0"), val = int32(1)]; tensor var_4860_to_fp16 = const()[name = string("op_4860_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(489317952)))]; tensor hidden_states_99_cast_fp16 = conv(dilations = hidden_states_99_dilations_0, groups = hidden_states_99_groups_0, pad = hidden_states_99_pad_0, pad_type = hidden_states_99_pad_type_0, strides = hidden_states_99_strides_0, weight = var_4860_to_fp16, x = x_293_cast_fp16)[name = string("hidden_states_99_cast_fp16")]; tensor x_295_cast_fp16 = add(x = x_287_cast_fp16, y = hidden_states_99_cast_fp16)[name = string("x_295_cast_fp16")]; int32 var_4872 = const()[name = string("op_4872"), val = int32(1)]; fp16 const_173_promoted_to_fp16 = const()[name = string("const_173_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_4875_cast_fp16 = mul(x = x_295_cast_fp16, y = const_173_promoted_to_fp16)[name = string("op_4875_cast_fp16")]; bool x_297_interleave_0 = const()[name = string("x_297_interleave_0"), val = bool(false)]; tensor x_297_cast_fp16 = concat(axis = var_4872, interleave = x_297_interleave_0, values = (x_295_cast_fp16, var_4875_cast_fp16))[name = string("x_297_cast_fp16")]; tensor out_199_axes_0 = const()[name = string("out_199_axes_0"), val = tensor([1])]; fp16 var_4885_to_fp16 = const()[name = string("op_4885_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_199_cast_fp16 = layer_norm(axes = out_199_axes_0, epsilon = var_4885_to_fp16, x = x_297_cast_fp16)[name = string("out_199_cast_fp16")]; tensor layer_layers_16_post_attention_layernorm_weight_to_fp16 = const()[name = string("layer_layers_16_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(491415168)))]; tensor out_201_cast_fp16 = mul(x = out_199_cast_fp16, y = layer_layers_16_post_attention_layernorm_weight_to_fp16)[name = string("out_201_cast_fp16")]; tensor var_4891_split_sizes_0 = const()[name = string("op_4891_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_4891_axis_0 = const()[name = string("op_4891_axis_0"), val = int32(1)]; tensor var_4891_cast_fp16_0, tensor var_4891_cast_fp16_1 = split(axis = var_4891_axis_0, split_sizes = var_4891_split_sizes_0, x = out_201_cast_fp16)[name = string("op_4891_cast_fp16")]; string input_33_pad_type_0 = const()[name = string("input_33_pad_type_0"), val = string("valid")]; tensor input_33_strides_0 = const()[name = string("input_33_strides_0"), val = tensor([1, 1])]; tensor input_33_pad_0 = const()[name = string("input_33_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_33_dilations_0 = const()[name = string("input_33_dilations_0"), val = tensor([1, 1])]; int32 input_33_groups_0 = const()[name = string("input_33_groups_0"), val = int32(1)]; tensor var_4896_to_fp16 = const()[name = string("op_4896_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(491419328)))]; tensor input_33_cast_fp16 = conv(dilations = input_33_dilations_0, groups = input_33_groups_0, pad = input_33_pad_0, pad_type = input_33_pad_type_0, strides = input_33_strides_0, weight = var_4896_to_fp16, x = var_4891_cast_fp16_0)[name = string("input_33_cast_fp16")]; tensor var_4907_cast_fp16 = silu(x = input_33_cast_fp16)[name = string("op_4907_cast_fp16")]; string var_4912_pad_type_0 = const()[name = string("op_4912_pad_type_0"), val = string("valid")]; tensor var_4912_strides_0 = const()[name = string("op_4912_strides_0"), val = tensor([1, 1])]; tensor var_4912_pad_0 = const()[name = string("op_4912_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_4912_dilations_0 = const()[name = string("op_4912_dilations_0"), val = tensor([1, 1])]; int32 var_4912_groups_0 = const()[name = string("op_4912_groups_0"), val = int32(1)]; tensor var_4895_to_fp16 = const()[name = string("op_4895_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(499808000)))]; tensor var_4912_cast_fp16 = conv(dilations = var_4912_dilations_0, groups = var_4912_groups_0, pad = var_4912_pad_0, pad_type = var_4912_pad_type_0, strides = var_4912_strides_0, weight = var_4895_to_fp16, x = var_4891_cast_fp16_0)[name = string("op_4912_cast_fp16")]; tensor x_303_cast_fp16 = mul(x = var_4907_cast_fp16, y = var_4912_cast_fp16)[name = string("x_303_cast_fp16")]; string hidden_states_101_pad_type_0 = const()[name = string("hidden_states_101_pad_type_0"), val = string("valid")]; tensor hidden_states_101_strides_0 = const()[name = string("hidden_states_101_strides_0"), val = tensor([1, 1])]; tensor hidden_states_101_pad_0 = const()[name = string("hidden_states_101_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_101_dilations_0 = const()[name = string("hidden_states_101_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_101_groups_0 = const()[name = string("hidden_states_101_groups_0"), val = int32(1)]; tensor var_4894_to_fp16 = const()[name = string("op_4894_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(508196672)))]; tensor hidden_states_101_cast_fp16 = conv(dilations = hidden_states_101_dilations_0, groups = hidden_states_101_groups_0, pad = hidden_states_101_pad_0, pad_type = hidden_states_101_pad_type_0, strides = hidden_states_101_strides_0, weight = var_4894_to_fp16, x = x_303_cast_fp16)[name = string("hidden_states_101_cast_fp16")]; tensor x_305_cast_fp16 = add(x = x_295_cast_fp16, y = hidden_states_101_cast_fp16)[name = string("x_305_cast_fp16")]; int32 var_4925 = const()[name = string("op_4925"), val = int32(1)]; fp16 const_174_promoted_to_fp16 = const()[name = string("const_174_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_4928_cast_fp16 = mul(x = x_305_cast_fp16, y = const_174_promoted_to_fp16)[name = string("op_4928_cast_fp16")]; bool x_307_interleave_0 = const()[name = string("x_307_interleave_0"), val = bool(false)]; tensor x_307_cast_fp16 = concat(axis = var_4925, interleave = x_307_interleave_0, values = (x_305_cast_fp16, var_4928_cast_fp16))[name = string("x_307_cast_fp16")]; tensor out_205_axes_0 = const()[name = string("out_205_axes_0"), val = tensor([1])]; fp16 var_4938_to_fp16 = const()[name = string("op_4938_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_205_cast_fp16 = layer_norm(axes = out_205_axes_0, epsilon = var_4938_to_fp16, x = x_307_cast_fp16)[name = string("out_205_cast_fp16")]; tensor layer_layers_17_input_layernorm_weight_to_fp16 = const()[name = string("layer_layers_17_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(516585344)))]; tensor out_207_cast_fp16 = mul(x = out_205_cast_fp16, y = layer_layers_17_input_layernorm_weight_to_fp16)[name = string("out_207_cast_fp16")]; tensor var_4944_split_sizes_0 = const()[name = string("op_4944_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_4944_axis_0 = const()[name = string("op_4944_axis_0"), val = int32(1)]; tensor var_4944_cast_fp16_0, tensor var_4944_cast_fp16_1 = split(axis = var_4944_axis_0, split_sizes = var_4944_split_sizes_0, x = out_207_cast_fp16)[name = string("op_4944_cast_fp16")]; string query_states_69_pad_type_0 = const()[name = string("query_states_69_pad_type_0"), val = string("valid")]; tensor query_states_69_strides_0 = const()[name = string("query_states_69_strides_0"), val = tensor([1, 1])]; tensor query_states_69_pad_0 = const()[name = string("query_states_69_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_states_69_dilations_0 = const()[name = string("query_states_69_dilations_0"), val = tensor([1, 1])]; int32 query_states_69_groups_0 = const()[name = string("query_states_69_groups_0"), val = int32(1)]; tensor var_4966_to_fp16 = const()[name = string("op_4966_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(516589504)))]; tensor query_states_69_cast_fp16 = conv(dilations = query_states_69_dilations_0, groups = query_states_69_groups_0, pad = query_states_69_pad_0, pad_type = query_states_69_pad_type_0, strides = query_states_69_strides_0, weight = var_4966_to_fp16, x = var_4944_cast_fp16_0)[name = string("query_states_69_cast_fp16")]; string key_states_69_pad_type_0 = const()[name = string("key_states_69_pad_type_0"), val = string("valid")]; tensor key_states_69_strides_0 = const()[name = string("key_states_69_strides_0"), val = tensor([1, 1])]; tensor key_states_69_pad_0 = const()[name = string("key_states_69_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_states_69_dilations_0 = const()[name = string("key_states_69_dilations_0"), val = tensor([1, 1])]; int32 key_states_69_groups_0 = const()[name = string("key_states_69_groups_0"), val = int32(1)]; tensor var_4977_to_fp16 = const()[name = string("op_4977_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(518686720)))]; tensor key_states_69_cast_fp16 = conv(dilations = key_states_69_dilations_0, groups = key_states_69_groups_0, pad = key_states_69_pad_0, pad_type = key_states_69_pad_type_0, strides = key_states_69_strides_0, weight = var_4977_to_fp16, x = var_4944_cast_fp16_0)[name = string("key_states_69_cast_fp16")]; string value_states_69_pad_type_0 = const()[name = string("value_states_69_pad_type_0"), val = string("valid")]; tensor value_states_69_strides_0 = const()[name = string("value_states_69_strides_0"), val = tensor([1, 1])]; tensor value_states_69_pad_0 = const()[name = string("value_states_69_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_states_69_dilations_0 = const()[name = string("value_states_69_dilations_0"), val = tensor([1, 1])]; int32 value_states_69_groups_0 = const()[name = string("value_states_69_groups_0"), val = int32(1)]; tensor var_4988_to_fp16 = const()[name = string("op_4988_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(518948928)))]; tensor value_states_69_cast_fp16 = conv(dilations = value_states_69_dilations_0, groups = value_states_69_groups_0, pad = value_states_69_pad_0, pad_type = value_states_69_pad_type_0, strides = value_states_69_strides_0, weight = var_4988_to_fp16, x = var_4944_cast_fp16_0)[name = string("value_states_69_cast_fp16")]; tensor var_4996 = const()[name = string("op_4996"), val = tensor([1, 16, 64, 32])]; tensor embed_69_cast_fp16 = reshape(shape = var_4996, x = query_states_69_cast_fp16)[name = string("embed_69_cast_fp16")]; tensor var_5000 = const()[name = string("op_5000"), val = tensor([1, 2, 64, 32])]; tensor var_5001_cast_fp16 = reshape(shape = var_5000, x = key_states_69_cast_fp16)[name = string("op_5001_cast_fp16")]; tensor embed_71_perm_0 = const()[name = string("embed_71_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_5007 = const()[name = string("op_5007"), val = tensor([1, 2, 64, 32])]; tensor var_5008_cast_fp16 = reshape(shape = var_5007, x = value_states_69_cast_fp16)[name = string("op_5008_cast_fp16")]; tensor value_states_71_perm_0 = const()[name = string("value_states_71_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_5012_cast_fp16 = mul(x = embed_69_cast_fp16, y = cos_1_cast_fp16)[name = string("op_5012_cast_fp16")]; tensor var_5013_split_sizes_0 = const()[name = string("op_5013_split_sizes_0"), val = tensor([32, 32])]; int32 var_5013_axis_0 = const()[name = string("op_5013_axis_0"), val = int32(-2)]; tensor var_5013_cast_fp16_0, tensor var_5013_cast_fp16_1 = split(axis = var_5013_axis_0, split_sizes = var_5013_split_sizes_0, x = embed_69_cast_fp16)[name = string("op_5013_cast_fp16")]; fp16 const_177_promoted_to_fp16 = const()[name = string("const_177_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_5015_cast_fp16 = mul(x = var_5013_cast_fp16_1, y = const_177_promoted_to_fp16)[name = string("op_5015_cast_fp16")]; int32 var_5017 = const()[name = string("op_5017"), val = int32(-2)]; bool var_5018_interleave_0 = const()[name = string("op_5018_interleave_0"), val = bool(false)]; tensor var_5018_cast_fp16 = concat(axis = var_5017, interleave = var_5018_interleave_0, values = (var_5015_cast_fp16, var_5013_cast_fp16_0))[name = string("op_5018_cast_fp16")]; tensor var_5019_cast_fp16 = mul(x = var_5018_cast_fp16, y = sin_1_cast_fp16)[name = string("op_5019_cast_fp16")]; tensor query_states_71_cast_fp16 = add(x = var_5012_cast_fp16, y = var_5019_cast_fp16)[name = string("query_states_71_cast_fp16")]; tensor embed_71_cast_fp16 = transpose(perm = embed_71_perm_0, x = var_5001_cast_fp16)[name = string("transpose_20")]; tensor var_5022_cast_fp16 = mul(x = embed_71_cast_fp16, y = cos_cast_fp16)[name = string("op_5022_cast_fp16")]; tensor var_5023_split_sizes_0 = const()[name = string("op_5023_split_sizes_0"), val = tensor([32, 32])]; int32 var_5023_axis_0 = const()[name = string("op_5023_axis_0"), val = int32(-1)]; tensor var_5023_cast_fp16_0, tensor var_5023_cast_fp16_1 = split(axis = var_5023_axis_0, split_sizes = var_5023_split_sizes_0, x = embed_71_cast_fp16)[name = string("op_5023_cast_fp16")]; fp16 const_178_promoted_to_fp16 = const()[name = string("const_178_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_5025_cast_fp16 = mul(x = var_5023_cast_fp16_1, y = const_178_promoted_to_fp16)[name = string("op_5025_cast_fp16")]; int32 var_5027 = const()[name = string("op_5027"), val = int32(-1)]; bool var_5028_interleave_0 = const()[name = string("op_5028_interleave_0"), val = bool(false)]; tensor var_5028_cast_fp16 = concat(axis = var_5027, interleave = var_5028_interleave_0, values = (var_5025_cast_fp16, var_5023_cast_fp16_0))[name = string("op_5028_cast_fp16")]; tensor var_5029_cast_fp16 = mul(x = var_5028_cast_fp16, y = sin_cast_fp16)[name = string("op_5029_cast_fp16")]; tensor key_states_71_cast_fp16 = add(x = var_5022_cast_fp16, y = var_5029_cast_fp16)[name = string("key_states_71_cast_fp16")]; tensor expand_dims_171 = const()[name = string("expand_dims_171"), val = tensor([17])]; tensor expand_dims_172 = const()[name = string("expand_dims_172"), val = tensor([0])]; tensor expand_dims_174 = const()[name = string("expand_dims_174"), val = tensor([18])]; tensor concat_139_values3_0 = const()[name = string("concat_139_values3_0"), val = tensor([0])]; int32 concat_139_axis_0 = const()[name = string("concat_139_axis_0"), val = int32(0)]; bool concat_139_interleave_0 = const()[name = string("concat_139_interleave_0"), val = bool(false)]; tensor concat_139 = concat(axis = concat_139_axis_0, interleave = concat_139_interleave_0, values = (expand_dims_171, expand_dims_172, position_id, concat_139_values3_0))[name = string("concat_139")]; tensor concat_140_values1_0 = const()[name = string("concat_140_values1_0"), val = tensor([0])]; tensor concat_140_values3_0 = const()[name = string("concat_140_values3_0"), val = tensor([0])]; int32 concat_140_axis_0 = const()[name = string("concat_140_axis_0"), val = int32(0)]; bool concat_140_interleave_0 = const()[name = string("concat_140_interleave_0"), val = bool(false)]; tensor concat_140 = concat(axis = concat_140_axis_0, interleave = concat_140_interleave_0, values = (expand_dims_174, concat_140_values1_0, var_426, concat_140_values3_0))[name = string("concat_140")]; tensor key_cache_internal_tensor_assign_18_stride_0 = const()[name = string("key_cache_internal_tensor_assign_18_stride_0"), val = tensor([1, 1, 1, 1])]; tensor key_cache_internal_tensor_assign_18_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_18_begin_mask_0"), val = tensor([false, false, false, true])]; tensor key_cache_internal_tensor_assign_18_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_18_end_mask_0"), val = tensor([false, true, false, true])]; tensor key_cache_internal_tensor_assign_18_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_18_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor key_cache_internal_tensor_assign_18_cast_fp16 = slice_update(begin = concat_139, begin_mask = key_cache_internal_tensor_assign_18_begin_mask_0, end = concat_140, end_mask = key_cache_internal_tensor_assign_18_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_18_squeeze_mask_0, stride = key_cache_internal_tensor_assign_18_stride_0, update = key_states_71_cast_fp16, x = coreml_update_state_80)[name = string("key_cache_internal_tensor_assign_18_cast_fp16")]; write_state(data = key_cache_internal_tensor_assign_18_cast_fp16, input = key_cache)[name = string("coreml_update_state_344_write_state")]; tensor coreml_update_state_82 = read_state(input = key_cache)[name = string("coreml_update_state_344")]; tensor value_cache_internal_tensor_assign_18_stride_0 = const()[name = string("value_cache_internal_tensor_assign_18_stride_0"), val = tensor([1, 1, 1, 1])]; tensor value_cache_internal_tensor_assign_18_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_18_begin_mask_0"), val = tensor([false, false, false, true])]; tensor value_cache_internal_tensor_assign_18_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_18_end_mask_0"), val = tensor([false, true, false, true])]; tensor value_cache_internal_tensor_assign_18_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_18_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor value_states_71_cast_fp16 = transpose(perm = value_states_71_perm_0, x = var_5008_cast_fp16)[name = string("transpose_19")]; tensor value_cache_internal_tensor_assign_18_cast_fp16 = slice_update(begin = concat_139, begin_mask = value_cache_internal_tensor_assign_18_begin_mask_0, end = concat_140, end_mask = value_cache_internal_tensor_assign_18_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_18_squeeze_mask_0, stride = value_cache_internal_tensor_assign_18_stride_0, update = value_states_71_cast_fp16, x = coreml_update_state_81)[name = string("value_cache_internal_tensor_assign_18_cast_fp16")]; write_state(data = value_cache_internal_tensor_assign_18_cast_fp16, input = value_cache)[name = string("coreml_update_state_345_write_state")]; tensor coreml_update_state_83 = read_state(input = value_cache)[name = string("coreml_update_state_345")]; tensor var_5072_begin_0 = const()[name = string("op_5072_begin_0"), val = tensor([17, 0, 0, 0])]; tensor var_5072_end_0 = const()[name = string("op_5072_end_0"), val = tensor([18, 2, 2048, 64])]; tensor var_5072_end_mask_0 = const()[name = string("op_5072_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_5072_cast_fp16 = slice_by_index(begin = var_5072_begin_0, end = var_5072_end_0, end_mask = var_5072_end_mask_0, x = coreml_update_state_82)[name = string("op_5072_cast_fp16")]; tensor tile_34 = const()[name = string("tile_34"), val = tensor([1, 1])]; int32 var_5075_axis_0 = const()[name = string("op_5075_axis_0"), val = int32(1)]; tensor var_5075_cast_fp16_0, tensor var_5075_cast_fp16_1 = split(axis = var_5075_axis_0, split_sizes = tile_34, x = var_5072_cast_fp16)[name = string("op_5075_cast_fp16")]; tensor var_5082_begin_0 = const()[name = string("op_5082_begin_0"), val = tensor([17, 0, 0, 0])]; tensor var_5082_end_0 = const()[name = string("op_5082_end_0"), val = tensor([18, 2, 2048, 64])]; tensor var_5082_end_mask_0 = const()[name = string("op_5082_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_5082_cast_fp16 = slice_by_index(begin = var_5082_begin_0, end = var_5082_end_0, end_mask = var_5082_end_mask_0, x = coreml_update_state_83)[name = string("op_5082_cast_fp16")]; tensor tile_35 = const()[name = string("tile_35"), val = tensor([1, 1])]; int32 var_5085_axis_0 = const()[name = string("op_5085_axis_0"), val = int32(1)]; tensor var_5085_cast_fp16_0, tensor var_5085_cast_fp16_1 = split(axis = var_5085_axis_0, split_sizes = tile_35, x = var_5082_cast_fp16)[name = string("op_5085_cast_fp16")]; tensor var_5088_split_sizes_0 = const()[name = string("op_5088_split_sizes_0"), val = tensor([8, 8])]; int32 var_5088_axis_0 = const()[name = string("op_5088_axis_0"), val = int32(1)]; tensor var_5088_cast_fp16_0, tensor var_5088_cast_fp16_1 = split(axis = var_5088_axis_0, split_sizes = var_5088_split_sizes_0, x = query_states_71_cast_fp16)[name = string("op_5088_cast_fp16")]; bool attn_weights_273_transpose_x_0 = const()[name = string("attn_weights_273_transpose_x_0"), val = bool(false)]; bool attn_weights_273_transpose_y_0 = const()[name = string("attn_weights_273_transpose_y_0"), val = bool(false)]; tensor attn_weights_273_cast_fp16 = matmul(transpose_x = attn_weights_273_transpose_x_0, transpose_y = attn_weights_273_transpose_y_0, x = var_5075_cast_fp16_0, y = var_5088_cast_fp16_0)[name = string("attn_weights_273_cast_fp16")]; fp16 _inversed_attn_weights_275_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_275_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_275_cast_fp16 = mul(x = attn_weights_273_cast_fp16, y = _inversed_attn_weights_275_y_0_to_fp16)[name = string("_inversed_attn_weights_275_cast_fp16")]; tensor attn_weights_277_cast_fp16 = add(x = _inversed_attn_weights_275_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_277_cast_fp16")]; int32 var_5095 = const()[name = string("op_5095"), val = int32(2)]; tensor attn_weights_279_cast_fp16 = softmax(axis = var_5095, x = attn_weights_277_cast_fp16)[name = string("attn_weights_279_cast_fp16")]; bool var_5101_transpose_x_1 = const()[name = string("op_5101_transpose_x_1"), val = bool(true)]; bool var_5101_transpose_y_1 = const()[name = string("op_5101_transpose_y_1"), val = bool(false)]; tensor var_5101_cast_fp16 = matmul(transpose_x = var_5101_transpose_x_1, transpose_y = var_5101_transpose_y_1, x = attn_weights_279_cast_fp16, y = var_5085_cast_fp16_0)[name = string("op_5101_cast_fp16")]; bool attn_weights_281_transpose_x_0 = const()[name = string("attn_weights_281_transpose_x_0"), val = bool(false)]; bool attn_weights_281_transpose_y_0 = const()[name = string("attn_weights_281_transpose_y_0"), val = bool(false)]; tensor attn_weights_281_cast_fp16 = matmul(transpose_x = attn_weights_281_transpose_x_0, transpose_y = attn_weights_281_transpose_y_0, x = var_5075_cast_fp16_1, y = var_5088_cast_fp16_1)[name = string("attn_weights_281_cast_fp16")]; fp16 _inversed_attn_weights_283_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_283_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_283_cast_fp16 = mul(x = attn_weights_281_cast_fp16, y = _inversed_attn_weights_283_y_0_to_fp16)[name = string("_inversed_attn_weights_283_cast_fp16")]; tensor attn_weights_285_cast_fp16 = add(x = _inversed_attn_weights_283_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_285_cast_fp16")]; int32 var_5107 = const()[name = string("op_5107"), val = int32(2)]; tensor attn_weights_287_cast_fp16 = softmax(axis = var_5107, x = attn_weights_285_cast_fp16)[name = string("attn_weights_287_cast_fp16")]; bool attn_output_103_transpose_x_1 = const()[name = string("attn_output_103_transpose_x_1"), val = bool(true)]; bool attn_output_103_transpose_y_1 = const()[name = string("attn_output_103_transpose_y_1"), val = bool(false)]; tensor attn_output_103_cast_fp16 = matmul(transpose_x = attn_output_103_transpose_x_1, transpose_y = attn_output_103_transpose_y_1, x = attn_weights_287_cast_fp16, y = var_5085_cast_fp16_1)[name = string("attn_output_103_cast_fp16")]; int32 var_5115 = const()[name = string("op_5115"), val = int32(1)]; bool attn_output_105_interleave_0 = const()[name = string("attn_output_105_interleave_0"), val = bool(false)]; tensor attn_output_105_cast_fp16 = concat(axis = var_5115, interleave = attn_output_105_interleave_0, values = (var_5101_cast_fp16, attn_output_103_cast_fp16))[name = string("attn_output_105_cast_fp16")]; tensor var_5119_perm_0 = const()[name = string("op_5119_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_5124 = const()[name = string("op_5124"), val = tensor([1, 1024, 1, 32])]; tensor var_5119_cast_fp16 = transpose(perm = var_5119_perm_0, x = attn_output_105_cast_fp16)[name = string("transpose_18")]; tensor x_311_cast_fp16 = reshape(shape = var_5124, x = var_5119_cast_fp16)[name = string("x_311_cast_fp16")]; string hidden_states_105_pad_type_0 = const()[name = string("hidden_states_105_pad_type_0"), val = string("valid")]; tensor hidden_states_105_strides_0 = const()[name = string("hidden_states_105_strides_0"), val = tensor([1, 1])]; tensor hidden_states_105_pad_0 = const()[name = string("hidden_states_105_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_105_dilations_0 = const()[name = string("hidden_states_105_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_105_groups_0 = const()[name = string("hidden_states_105_groups_0"), val = int32(1)]; tensor var_5131_to_fp16 = const()[name = string("op_5131_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(519211136)))]; tensor hidden_states_105_cast_fp16 = conv(dilations = hidden_states_105_dilations_0, groups = hidden_states_105_groups_0, pad = hidden_states_105_pad_0, pad_type = hidden_states_105_pad_type_0, strides = hidden_states_105_strides_0, weight = var_5131_to_fp16, x = x_311_cast_fp16)[name = string("hidden_states_105_cast_fp16")]; tensor x_313_cast_fp16 = add(x = x_305_cast_fp16, y = hidden_states_105_cast_fp16)[name = string("x_313_cast_fp16")]; int32 var_5143 = const()[name = string("op_5143"), val = int32(1)]; fp16 const_183_promoted_to_fp16 = const()[name = string("const_183_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_5146_cast_fp16 = mul(x = x_313_cast_fp16, y = const_183_promoted_to_fp16)[name = string("op_5146_cast_fp16")]; bool x_315_interleave_0 = const()[name = string("x_315_interleave_0"), val = bool(false)]; tensor x_315_cast_fp16 = concat(axis = var_5143, interleave = x_315_interleave_0, values = (x_313_cast_fp16, var_5146_cast_fp16))[name = string("x_315_cast_fp16")]; tensor out_211_axes_0 = const()[name = string("out_211_axes_0"), val = tensor([1])]; fp16 var_5156_to_fp16 = const()[name = string("op_5156_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_211_cast_fp16 = layer_norm(axes = out_211_axes_0, epsilon = var_5156_to_fp16, x = x_315_cast_fp16)[name = string("out_211_cast_fp16")]; tensor layer_layers_17_post_attention_layernorm_weight_to_fp16 = const()[name = string("layer_layers_17_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(521308352)))]; tensor out_213_cast_fp16 = mul(x = out_211_cast_fp16, y = layer_layers_17_post_attention_layernorm_weight_to_fp16)[name = string("out_213_cast_fp16")]; tensor var_5162_split_sizes_0 = const()[name = string("op_5162_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_5162_axis_0 = const()[name = string("op_5162_axis_0"), val = int32(1)]; tensor var_5162_cast_fp16_0, tensor var_5162_cast_fp16_1 = split(axis = var_5162_axis_0, split_sizes = var_5162_split_sizes_0, x = out_213_cast_fp16)[name = string("op_5162_cast_fp16")]; string input_35_pad_type_0 = const()[name = string("input_35_pad_type_0"), val = string("valid")]; tensor input_35_strides_0 = const()[name = string("input_35_strides_0"), val = tensor([1, 1])]; tensor input_35_pad_0 = const()[name = string("input_35_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_35_dilations_0 = const()[name = string("input_35_dilations_0"), val = tensor([1, 1])]; int32 input_35_groups_0 = const()[name = string("input_35_groups_0"), val = int32(1)]; tensor var_5167_to_fp16 = const()[name = string("op_5167_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(521312512)))]; tensor input_35_cast_fp16 = conv(dilations = input_35_dilations_0, groups = input_35_groups_0, pad = input_35_pad_0, pad_type = input_35_pad_type_0, strides = input_35_strides_0, weight = var_5167_to_fp16, x = var_5162_cast_fp16_0)[name = string("input_35_cast_fp16")]; tensor var_5178_cast_fp16 = silu(x = input_35_cast_fp16)[name = string("op_5178_cast_fp16")]; string var_5183_pad_type_0 = const()[name = string("op_5183_pad_type_0"), val = string("valid")]; tensor var_5183_strides_0 = const()[name = string("op_5183_strides_0"), val = tensor([1, 1])]; tensor var_5183_pad_0 = const()[name = string("op_5183_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_5183_dilations_0 = const()[name = string("op_5183_dilations_0"), val = tensor([1, 1])]; int32 var_5183_groups_0 = const()[name = string("op_5183_groups_0"), val = int32(1)]; tensor var_5166_to_fp16 = const()[name = string("op_5166_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(529701184)))]; tensor var_5183_cast_fp16 = conv(dilations = var_5183_dilations_0, groups = var_5183_groups_0, pad = var_5183_pad_0, pad_type = var_5183_pad_type_0, strides = var_5183_strides_0, weight = var_5166_to_fp16, x = var_5162_cast_fp16_0)[name = string("op_5183_cast_fp16")]; tensor x_321_cast_fp16 = mul(x = var_5178_cast_fp16, y = var_5183_cast_fp16)[name = string("x_321_cast_fp16")]; string hidden_states_107_pad_type_0 = const()[name = string("hidden_states_107_pad_type_0"), val = string("valid")]; tensor hidden_states_107_strides_0 = const()[name = string("hidden_states_107_strides_0"), val = tensor([1, 1])]; tensor hidden_states_107_pad_0 = const()[name = string("hidden_states_107_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_107_dilations_0 = const()[name = string("hidden_states_107_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_107_groups_0 = const()[name = string("hidden_states_107_groups_0"), val = int32(1)]; tensor var_5165_to_fp16 = const()[name = string("op_5165_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(538089856)))]; tensor hidden_states_107_cast_fp16 = conv(dilations = hidden_states_107_dilations_0, groups = hidden_states_107_groups_0, pad = hidden_states_107_pad_0, pad_type = hidden_states_107_pad_type_0, strides = hidden_states_107_strides_0, weight = var_5165_to_fp16, x = x_321_cast_fp16)[name = string("hidden_states_107_cast_fp16")]; tensor x_323_cast_fp16 = add(x = x_313_cast_fp16, y = hidden_states_107_cast_fp16)[name = string("x_323_cast_fp16")]; int32 var_5196 = const()[name = string("op_5196"), val = int32(1)]; fp16 const_184_promoted_to_fp16 = const()[name = string("const_184_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_5199_cast_fp16 = mul(x = x_323_cast_fp16, y = const_184_promoted_to_fp16)[name = string("op_5199_cast_fp16")]; bool x_325_interleave_0 = const()[name = string("x_325_interleave_0"), val = bool(false)]; tensor x_325_cast_fp16 = concat(axis = var_5196, interleave = x_325_interleave_0, values = (x_323_cast_fp16, var_5199_cast_fp16))[name = string("x_325_cast_fp16")]; tensor out_217_axes_0 = const()[name = string("out_217_axes_0"), val = tensor([1])]; fp16 var_5209_to_fp16 = const()[name = string("op_5209_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_217_cast_fp16 = layer_norm(axes = out_217_axes_0, epsilon = var_5209_to_fp16, x = x_325_cast_fp16)[name = string("out_217_cast_fp16")]; tensor layer_layers_18_input_layernorm_weight_to_fp16 = const()[name = string("layer_layers_18_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(546478528)))]; tensor out_219_cast_fp16 = mul(x = out_217_cast_fp16, y = layer_layers_18_input_layernorm_weight_to_fp16)[name = string("out_219_cast_fp16")]; tensor var_5215_split_sizes_0 = const()[name = string("op_5215_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_5215_axis_0 = const()[name = string("op_5215_axis_0"), val = int32(1)]; tensor var_5215_cast_fp16_0, tensor var_5215_cast_fp16_1 = split(axis = var_5215_axis_0, split_sizes = var_5215_split_sizes_0, x = out_219_cast_fp16)[name = string("op_5215_cast_fp16")]; string query_states_73_pad_type_0 = const()[name = string("query_states_73_pad_type_0"), val = string("valid")]; tensor query_states_73_strides_0 = const()[name = string("query_states_73_strides_0"), val = tensor([1, 1])]; tensor query_states_73_pad_0 = const()[name = string("query_states_73_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_states_73_dilations_0 = const()[name = string("query_states_73_dilations_0"), val = tensor([1, 1])]; int32 query_states_73_groups_0 = const()[name = string("query_states_73_groups_0"), val = int32(1)]; tensor var_5237_to_fp16 = const()[name = string("op_5237_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(546482688)))]; tensor query_states_73_cast_fp16 = conv(dilations = query_states_73_dilations_0, groups = query_states_73_groups_0, pad = query_states_73_pad_0, pad_type = query_states_73_pad_type_0, strides = query_states_73_strides_0, weight = var_5237_to_fp16, x = var_5215_cast_fp16_0)[name = string("query_states_73_cast_fp16")]; string key_states_73_pad_type_0 = const()[name = string("key_states_73_pad_type_0"), val = string("valid")]; tensor key_states_73_strides_0 = const()[name = string("key_states_73_strides_0"), val = tensor([1, 1])]; tensor key_states_73_pad_0 = const()[name = string("key_states_73_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_states_73_dilations_0 = const()[name = string("key_states_73_dilations_0"), val = tensor([1, 1])]; int32 key_states_73_groups_0 = const()[name = string("key_states_73_groups_0"), val = int32(1)]; tensor var_5248_to_fp16 = const()[name = string("op_5248_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(548579904)))]; tensor key_states_73_cast_fp16 = conv(dilations = key_states_73_dilations_0, groups = key_states_73_groups_0, pad = key_states_73_pad_0, pad_type = key_states_73_pad_type_0, strides = key_states_73_strides_0, weight = var_5248_to_fp16, x = var_5215_cast_fp16_0)[name = string("key_states_73_cast_fp16")]; string value_states_73_pad_type_0 = const()[name = string("value_states_73_pad_type_0"), val = string("valid")]; tensor value_states_73_strides_0 = const()[name = string("value_states_73_strides_0"), val = tensor([1, 1])]; tensor value_states_73_pad_0 = const()[name = string("value_states_73_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_states_73_dilations_0 = const()[name = string("value_states_73_dilations_0"), val = tensor([1, 1])]; int32 value_states_73_groups_0 = const()[name = string("value_states_73_groups_0"), val = int32(1)]; tensor var_5259_to_fp16 = const()[name = string("op_5259_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(548842112)))]; tensor value_states_73_cast_fp16 = conv(dilations = value_states_73_dilations_0, groups = value_states_73_groups_0, pad = value_states_73_pad_0, pad_type = value_states_73_pad_type_0, strides = value_states_73_strides_0, weight = var_5259_to_fp16, x = var_5215_cast_fp16_0)[name = string("value_states_73_cast_fp16")]; tensor var_5267 = const()[name = string("op_5267"), val = tensor([1, 16, 64, 32])]; tensor embed_73_cast_fp16 = reshape(shape = var_5267, x = query_states_73_cast_fp16)[name = string("embed_73_cast_fp16")]; tensor var_5271 = const()[name = string("op_5271"), val = tensor([1, 2, 64, 32])]; tensor var_5272_cast_fp16 = reshape(shape = var_5271, x = key_states_73_cast_fp16)[name = string("op_5272_cast_fp16")]; tensor embed_75_perm_0 = const()[name = string("embed_75_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_5278 = const()[name = string("op_5278"), val = tensor([1, 2, 64, 32])]; tensor var_5279_cast_fp16 = reshape(shape = var_5278, x = value_states_73_cast_fp16)[name = string("op_5279_cast_fp16")]; tensor value_states_75_perm_0 = const()[name = string("value_states_75_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_5283_cast_fp16 = mul(x = embed_73_cast_fp16, y = cos_1_cast_fp16)[name = string("op_5283_cast_fp16")]; tensor var_5284_split_sizes_0 = const()[name = string("op_5284_split_sizes_0"), val = tensor([32, 32])]; int32 var_5284_axis_0 = const()[name = string("op_5284_axis_0"), val = int32(-2)]; tensor var_5284_cast_fp16_0, tensor var_5284_cast_fp16_1 = split(axis = var_5284_axis_0, split_sizes = var_5284_split_sizes_0, x = embed_73_cast_fp16)[name = string("op_5284_cast_fp16")]; fp16 const_187_promoted_to_fp16 = const()[name = string("const_187_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_5286_cast_fp16 = mul(x = var_5284_cast_fp16_1, y = const_187_promoted_to_fp16)[name = string("op_5286_cast_fp16")]; int32 var_5288 = const()[name = string("op_5288"), val = int32(-2)]; bool var_5289_interleave_0 = const()[name = string("op_5289_interleave_0"), val = bool(false)]; tensor var_5289_cast_fp16 = concat(axis = var_5288, interleave = var_5289_interleave_0, values = (var_5286_cast_fp16, var_5284_cast_fp16_0))[name = string("op_5289_cast_fp16")]; tensor var_5290_cast_fp16 = mul(x = var_5289_cast_fp16, y = sin_1_cast_fp16)[name = string("op_5290_cast_fp16")]; tensor query_states_75_cast_fp16 = add(x = var_5283_cast_fp16, y = var_5290_cast_fp16)[name = string("query_states_75_cast_fp16")]; tensor embed_75_cast_fp16 = transpose(perm = embed_75_perm_0, x = var_5272_cast_fp16)[name = string("transpose_17")]; tensor var_5293_cast_fp16 = mul(x = embed_75_cast_fp16, y = cos_cast_fp16)[name = string("op_5293_cast_fp16")]; tensor var_5294_split_sizes_0 = const()[name = string("op_5294_split_sizes_0"), val = tensor([32, 32])]; int32 var_5294_axis_0 = const()[name = string("op_5294_axis_0"), val = int32(-1)]; tensor var_5294_cast_fp16_0, tensor var_5294_cast_fp16_1 = split(axis = var_5294_axis_0, split_sizes = var_5294_split_sizes_0, x = embed_75_cast_fp16)[name = string("op_5294_cast_fp16")]; fp16 const_188_promoted_to_fp16 = const()[name = string("const_188_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_5296_cast_fp16 = mul(x = var_5294_cast_fp16_1, y = const_188_promoted_to_fp16)[name = string("op_5296_cast_fp16")]; int32 var_5298 = const()[name = string("op_5298"), val = int32(-1)]; bool var_5299_interleave_0 = const()[name = string("op_5299_interleave_0"), val = bool(false)]; tensor var_5299_cast_fp16 = concat(axis = var_5298, interleave = var_5299_interleave_0, values = (var_5296_cast_fp16, var_5294_cast_fp16_0))[name = string("op_5299_cast_fp16")]; tensor var_5300_cast_fp16 = mul(x = var_5299_cast_fp16, y = sin_cast_fp16)[name = string("op_5300_cast_fp16")]; tensor key_states_75_cast_fp16 = add(x = var_5293_cast_fp16, y = var_5300_cast_fp16)[name = string("key_states_75_cast_fp16")]; tensor expand_dims_181 = const()[name = string("expand_dims_181"), val = tensor([18])]; tensor expand_dims_182 = const()[name = string("expand_dims_182"), val = tensor([0])]; tensor expand_dims_184 = const()[name = string("expand_dims_184"), val = tensor([19])]; tensor concat_147_values3_0 = const()[name = string("concat_147_values3_0"), val = tensor([0])]; int32 concat_147_axis_0 = const()[name = string("concat_147_axis_0"), val = int32(0)]; bool concat_147_interleave_0 = const()[name = string("concat_147_interleave_0"), val = bool(false)]; tensor concat_147 = concat(axis = concat_147_axis_0, interleave = concat_147_interleave_0, values = (expand_dims_181, expand_dims_182, position_id, concat_147_values3_0))[name = string("concat_147")]; tensor concat_148_values1_0 = const()[name = string("concat_148_values1_0"), val = tensor([0])]; tensor concat_148_values3_0 = const()[name = string("concat_148_values3_0"), val = tensor([0])]; int32 concat_148_axis_0 = const()[name = string("concat_148_axis_0"), val = int32(0)]; bool concat_148_interleave_0 = const()[name = string("concat_148_interleave_0"), val = bool(false)]; tensor concat_148 = concat(axis = concat_148_axis_0, interleave = concat_148_interleave_0, values = (expand_dims_184, concat_148_values1_0, var_426, concat_148_values3_0))[name = string("concat_148")]; tensor key_cache_internal_tensor_assign_19_stride_0 = const()[name = string("key_cache_internal_tensor_assign_19_stride_0"), val = tensor([1, 1, 1, 1])]; tensor key_cache_internal_tensor_assign_19_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_19_begin_mask_0"), val = tensor([false, false, false, true])]; tensor key_cache_internal_tensor_assign_19_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_19_end_mask_0"), val = tensor([false, true, false, true])]; tensor key_cache_internal_tensor_assign_19_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_19_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor key_cache_internal_tensor_assign_19_cast_fp16 = slice_update(begin = concat_147, begin_mask = key_cache_internal_tensor_assign_19_begin_mask_0, end = concat_148, end_mask = key_cache_internal_tensor_assign_19_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_19_squeeze_mask_0, stride = key_cache_internal_tensor_assign_19_stride_0, update = key_states_75_cast_fp16, x = coreml_update_state_82)[name = string("key_cache_internal_tensor_assign_19_cast_fp16")]; write_state(data = key_cache_internal_tensor_assign_19_cast_fp16, input = key_cache)[name = string("coreml_update_state_346_write_state")]; tensor coreml_update_state_84 = read_state(input = key_cache)[name = string("coreml_update_state_346")]; tensor value_cache_internal_tensor_assign_19_stride_0 = const()[name = string("value_cache_internal_tensor_assign_19_stride_0"), val = tensor([1, 1, 1, 1])]; tensor value_cache_internal_tensor_assign_19_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_19_begin_mask_0"), val = tensor([false, false, false, true])]; tensor value_cache_internal_tensor_assign_19_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_19_end_mask_0"), val = tensor([false, true, false, true])]; tensor value_cache_internal_tensor_assign_19_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_19_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor value_states_75_cast_fp16 = transpose(perm = value_states_75_perm_0, x = var_5279_cast_fp16)[name = string("transpose_16")]; tensor value_cache_internal_tensor_assign_19_cast_fp16 = slice_update(begin = concat_147, begin_mask = value_cache_internal_tensor_assign_19_begin_mask_0, end = concat_148, end_mask = value_cache_internal_tensor_assign_19_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_19_squeeze_mask_0, stride = value_cache_internal_tensor_assign_19_stride_0, update = value_states_75_cast_fp16, x = coreml_update_state_83)[name = string("value_cache_internal_tensor_assign_19_cast_fp16")]; write_state(data = value_cache_internal_tensor_assign_19_cast_fp16, input = value_cache)[name = string("coreml_update_state_347_write_state")]; tensor coreml_update_state_85 = read_state(input = value_cache)[name = string("coreml_update_state_347")]; tensor var_5343_begin_0 = const()[name = string("op_5343_begin_0"), val = tensor([18, 0, 0, 0])]; tensor var_5343_end_0 = const()[name = string("op_5343_end_0"), val = tensor([19, 2, 2048, 64])]; tensor var_5343_end_mask_0 = const()[name = string("op_5343_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_5343_cast_fp16 = slice_by_index(begin = var_5343_begin_0, end = var_5343_end_0, end_mask = var_5343_end_mask_0, x = coreml_update_state_84)[name = string("op_5343_cast_fp16")]; tensor tile_36 = const()[name = string("tile_36"), val = tensor([1, 1])]; int32 var_5346_axis_0 = const()[name = string("op_5346_axis_0"), val = int32(1)]; tensor var_5346_cast_fp16_0, tensor var_5346_cast_fp16_1 = split(axis = var_5346_axis_0, split_sizes = tile_36, x = var_5343_cast_fp16)[name = string("op_5346_cast_fp16")]; tensor var_5353_begin_0 = const()[name = string("op_5353_begin_0"), val = tensor([18, 0, 0, 0])]; tensor var_5353_end_0 = const()[name = string("op_5353_end_0"), val = tensor([19, 2, 2048, 64])]; tensor var_5353_end_mask_0 = const()[name = string("op_5353_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_5353_cast_fp16 = slice_by_index(begin = var_5353_begin_0, end = var_5353_end_0, end_mask = var_5353_end_mask_0, x = coreml_update_state_85)[name = string("op_5353_cast_fp16")]; tensor tile_37 = const()[name = string("tile_37"), val = tensor([1, 1])]; int32 var_5356_axis_0 = const()[name = string("op_5356_axis_0"), val = int32(1)]; tensor var_5356_cast_fp16_0, tensor var_5356_cast_fp16_1 = split(axis = var_5356_axis_0, split_sizes = tile_37, x = var_5353_cast_fp16)[name = string("op_5356_cast_fp16")]; tensor var_5359_split_sizes_0 = const()[name = string("op_5359_split_sizes_0"), val = tensor([8, 8])]; int32 var_5359_axis_0 = const()[name = string("op_5359_axis_0"), val = int32(1)]; tensor var_5359_cast_fp16_0, tensor var_5359_cast_fp16_1 = split(axis = var_5359_axis_0, split_sizes = var_5359_split_sizes_0, x = query_states_75_cast_fp16)[name = string("op_5359_cast_fp16")]; bool attn_weights_289_transpose_x_0 = const()[name = string("attn_weights_289_transpose_x_0"), val = bool(false)]; bool attn_weights_289_transpose_y_0 = const()[name = string("attn_weights_289_transpose_y_0"), val = bool(false)]; tensor attn_weights_289_cast_fp16 = matmul(transpose_x = attn_weights_289_transpose_x_0, transpose_y = attn_weights_289_transpose_y_0, x = var_5346_cast_fp16_0, y = var_5359_cast_fp16_0)[name = string("attn_weights_289_cast_fp16")]; fp16 _inversed_attn_weights_291_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_291_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_291_cast_fp16 = mul(x = attn_weights_289_cast_fp16, y = _inversed_attn_weights_291_y_0_to_fp16)[name = string("_inversed_attn_weights_291_cast_fp16")]; tensor attn_weights_293_cast_fp16 = add(x = _inversed_attn_weights_291_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_293_cast_fp16")]; int32 var_5366 = const()[name = string("op_5366"), val = int32(2)]; tensor attn_weights_295_cast_fp16 = softmax(axis = var_5366, x = attn_weights_293_cast_fp16)[name = string("attn_weights_295_cast_fp16")]; bool var_5372_transpose_x_1 = const()[name = string("op_5372_transpose_x_1"), val = bool(true)]; bool var_5372_transpose_y_1 = const()[name = string("op_5372_transpose_y_1"), val = bool(false)]; tensor var_5372_cast_fp16 = matmul(transpose_x = var_5372_transpose_x_1, transpose_y = var_5372_transpose_y_1, x = attn_weights_295_cast_fp16, y = var_5356_cast_fp16_0)[name = string("op_5372_cast_fp16")]; bool attn_weights_297_transpose_x_0 = const()[name = string("attn_weights_297_transpose_x_0"), val = bool(false)]; bool attn_weights_297_transpose_y_0 = const()[name = string("attn_weights_297_transpose_y_0"), val = bool(false)]; tensor attn_weights_297_cast_fp16 = matmul(transpose_x = attn_weights_297_transpose_x_0, transpose_y = attn_weights_297_transpose_y_0, x = var_5346_cast_fp16_1, y = var_5359_cast_fp16_1)[name = string("attn_weights_297_cast_fp16")]; fp16 _inversed_attn_weights_299_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_299_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_299_cast_fp16 = mul(x = attn_weights_297_cast_fp16, y = _inversed_attn_weights_299_y_0_to_fp16)[name = string("_inversed_attn_weights_299_cast_fp16")]; tensor attn_weights_301_cast_fp16 = add(x = _inversed_attn_weights_299_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_301_cast_fp16")]; int32 var_5378 = const()[name = string("op_5378"), val = int32(2)]; tensor attn_weights_303_cast_fp16 = softmax(axis = var_5378, x = attn_weights_301_cast_fp16)[name = string("attn_weights_303_cast_fp16")]; bool attn_output_109_transpose_x_1 = const()[name = string("attn_output_109_transpose_x_1"), val = bool(true)]; bool attn_output_109_transpose_y_1 = const()[name = string("attn_output_109_transpose_y_1"), val = bool(false)]; tensor attn_output_109_cast_fp16 = matmul(transpose_x = attn_output_109_transpose_x_1, transpose_y = attn_output_109_transpose_y_1, x = attn_weights_303_cast_fp16, y = var_5356_cast_fp16_1)[name = string("attn_output_109_cast_fp16")]; int32 var_5386 = const()[name = string("op_5386"), val = int32(1)]; bool attn_output_111_interleave_0 = const()[name = string("attn_output_111_interleave_0"), val = bool(false)]; tensor attn_output_111_cast_fp16 = concat(axis = var_5386, interleave = attn_output_111_interleave_0, values = (var_5372_cast_fp16, attn_output_109_cast_fp16))[name = string("attn_output_111_cast_fp16")]; tensor var_5390_perm_0 = const()[name = string("op_5390_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_5395 = const()[name = string("op_5395"), val = tensor([1, 1024, 1, 32])]; tensor var_5390_cast_fp16 = transpose(perm = var_5390_perm_0, x = attn_output_111_cast_fp16)[name = string("transpose_15")]; tensor x_329_cast_fp16 = reshape(shape = var_5395, x = var_5390_cast_fp16)[name = string("x_329_cast_fp16")]; string hidden_states_111_pad_type_0 = const()[name = string("hidden_states_111_pad_type_0"), val = string("valid")]; tensor hidden_states_111_strides_0 = const()[name = string("hidden_states_111_strides_0"), val = tensor([1, 1])]; tensor hidden_states_111_pad_0 = const()[name = string("hidden_states_111_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_111_dilations_0 = const()[name = string("hidden_states_111_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_111_groups_0 = const()[name = string("hidden_states_111_groups_0"), val = int32(1)]; tensor var_5402_to_fp16 = const()[name = string("op_5402_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(549104320)))]; tensor hidden_states_111_cast_fp16 = conv(dilations = hidden_states_111_dilations_0, groups = hidden_states_111_groups_0, pad = hidden_states_111_pad_0, pad_type = hidden_states_111_pad_type_0, strides = hidden_states_111_strides_0, weight = var_5402_to_fp16, x = x_329_cast_fp16)[name = string("hidden_states_111_cast_fp16")]; tensor x_331_cast_fp16 = add(x = x_323_cast_fp16, y = hidden_states_111_cast_fp16)[name = string("x_331_cast_fp16")]; int32 var_5414 = const()[name = string("op_5414"), val = int32(1)]; fp16 const_193_promoted_to_fp16 = const()[name = string("const_193_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_5417_cast_fp16 = mul(x = x_331_cast_fp16, y = const_193_promoted_to_fp16)[name = string("op_5417_cast_fp16")]; bool x_333_interleave_0 = const()[name = string("x_333_interleave_0"), val = bool(false)]; tensor x_333_cast_fp16 = concat(axis = var_5414, interleave = x_333_interleave_0, values = (x_331_cast_fp16, var_5417_cast_fp16))[name = string("x_333_cast_fp16")]; tensor out_223_axes_0 = const()[name = string("out_223_axes_0"), val = tensor([1])]; fp16 var_5427_to_fp16 = const()[name = string("op_5427_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_223_cast_fp16 = layer_norm(axes = out_223_axes_0, epsilon = var_5427_to_fp16, x = x_333_cast_fp16)[name = string("out_223_cast_fp16")]; tensor layer_layers_18_post_attention_layernorm_weight_to_fp16 = const()[name = string("layer_layers_18_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(551201536)))]; tensor out_225_cast_fp16 = mul(x = out_223_cast_fp16, y = layer_layers_18_post_attention_layernorm_weight_to_fp16)[name = string("out_225_cast_fp16")]; tensor var_5433_split_sizes_0 = const()[name = string("op_5433_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_5433_axis_0 = const()[name = string("op_5433_axis_0"), val = int32(1)]; tensor var_5433_cast_fp16_0, tensor var_5433_cast_fp16_1 = split(axis = var_5433_axis_0, split_sizes = var_5433_split_sizes_0, x = out_225_cast_fp16)[name = string("op_5433_cast_fp16")]; string input_37_pad_type_0 = const()[name = string("input_37_pad_type_0"), val = string("valid")]; tensor input_37_strides_0 = const()[name = string("input_37_strides_0"), val = tensor([1, 1])]; tensor input_37_pad_0 = const()[name = string("input_37_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_37_dilations_0 = const()[name = string("input_37_dilations_0"), val = tensor([1, 1])]; int32 input_37_groups_0 = const()[name = string("input_37_groups_0"), val = int32(1)]; tensor var_5438_to_fp16 = const()[name = string("op_5438_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(551205696)))]; tensor input_37_cast_fp16 = conv(dilations = input_37_dilations_0, groups = input_37_groups_0, pad = input_37_pad_0, pad_type = input_37_pad_type_0, strides = input_37_strides_0, weight = var_5438_to_fp16, x = var_5433_cast_fp16_0)[name = string("input_37_cast_fp16")]; tensor var_5449_cast_fp16 = silu(x = input_37_cast_fp16)[name = string("op_5449_cast_fp16")]; string var_5454_pad_type_0 = const()[name = string("op_5454_pad_type_0"), val = string("valid")]; tensor var_5454_strides_0 = const()[name = string("op_5454_strides_0"), val = tensor([1, 1])]; tensor var_5454_pad_0 = const()[name = string("op_5454_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_5454_dilations_0 = const()[name = string("op_5454_dilations_0"), val = tensor([1, 1])]; int32 var_5454_groups_0 = const()[name = string("op_5454_groups_0"), val = int32(1)]; tensor var_5437_to_fp16 = const()[name = string("op_5437_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(559594368)))]; tensor var_5454_cast_fp16 = conv(dilations = var_5454_dilations_0, groups = var_5454_groups_0, pad = var_5454_pad_0, pad_type = var_5454_pad_type_0, strides = var_5454_strides_0, weight = var_5437_to_fp16, x = var_5433_cast_fp16_0)[name = string("op_5454_cast_fp16")]; tensor x_339_cast_fp16 = mul(x = var_5449_cast_fp16, y = var_5454_cast_fp16)[name = string("x_339_cast_fp16")]; string hidden_states_113_pad_type_0 = const()[name = string("hidden_states_113_pad_type_0"), val = string("valid")]; tensor hidden_states_113_strides_0 = const()[name = string("hidden_states_113_strides_0"), val = tensor([1, 1])]; tensor hidden_states_113_pad_0 = const()[name = string("hidden_states_113_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_113_dilations_0 = const()[name = string("hidden_states_113_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_113_groups_0 = const()[name = string("hidden_states_113_groups_0"), val = int32(1)]; tensor var_5436_to_fp16 = const()[name = string("op_5436_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(567983040)))]; tensor hidden_states_113_cast_fp16 = conv(dilations = hidden_states_113_dilations_0, groups = hidden_states_113_groups_0, pad = hidden_states_113_pad_0, pad_type = hidden_states_113_pad_type_0, strides = hidden_states_113_strides_0, weight = var_5436_to_fp16, x = x_339_cast_fp16)[name = string("hidden_states_113_cast_fp16")]; tensor x_341_cast_fp16 = add(x = x_331_cast_fp16, y = hidden_states_113_cast_fp16)[name = string("x_341_cast_fp16")]; int32 var_5467 = const()[name = string("op_5467"), val = int32(1)]; fp16 const_194_promoted_to_fp16 = const()[name = string("const_194_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_5470_cast_fp16 = mul(x = x_341_cast_fp16, y = const_194_promoted_to_fp16)[name = string("op_5470_cast_fp16")]; bool x_343_interleave_0 = const()[name = string("x_343_interleave_0"), val = bool(false)]; tensor x_343_cast_fp16 = concat(axis = var_5467, interleave = x_343_interleave_0, values = (x_341_cast_fp16, var_5470_cast_fp16))[name = string("x_343_cast_fp16")]; tensor out_229_axes_0 = const()[name = string("out_229_axes_0"), val = tensor([1])]; fp16 var_5480_to_fp16 = const()[name = string("op_5480_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_229_cast_fp16 = layer_norm(axes = out_229_axes_0, epsilon = var_5480_to_fp16, x = x_343_cast_fp16)[name = string("out_229_cast_fp16")]; tensor layer_layers_19_input_layernorm_weight_to_fp16 = const()[name = string("layer_layers_19_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(576371712)))]; tensor out_231_cast_fp16 = mul(x = out_229_cast_fp16, y = layer_layers_19_input_layernorm_weight_to_fp16)[name = string("out_231_cast_fp16")]; tensor var_5486_split_sizes_0 = const()[name = string("op_5486_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_5486_axis_0 = const()[name = string("op_5486_axis_0"), val = int32(1)]; tensor var_5486_cast_fp16_0, tensor var_5486_cast_fp16_1 = split(axis = var_5486_axis_0, split_sizes = var_5486_split_sizes_0, x = out_231_cast_fp16)[name = string("op_5486_cast_fp16")]; string query_states_77_pad_type_0 = const()[name = string("query_states_77_pad_type_0"), val = string("valid")]; tensor query_states_77_strides_0 = const()[name = string("query_states_77_strides_0"), val = tensor([1, 1])]; tensor query_states_77_pad_0 = const()[name = string("query_states_77_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_states_77_dilations_0 = const()[name = string("query_states_77_dilations_0"), val = tensor([1, 1])]; int32 query_states_77_groups_0 = const()[name = string("query_states_77_groups_0"), val = int32(1)]; tensor var_5508_to_fp16 = const()[name = string("op_5508_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(576375872)))]; tensor query_states_77_cast_fp16 = conv(dilations = query_states_77_dilations_0, groups = query_states_77_groups_0, pad = query_states_77_pad_0, pad_type = query_states_77_pad_type_0, strides = query_states_77_strides_0, weight = var_5508_to_fp16, x = var_5486_cast_fp16_0)[name = string("query_states_77_cast_fp16")]; string key_states_77_pad_type_0 = const()[name = string("key_states_77_pad_type_0"), val = string("valid")]; tensor key_states_77_strides_0 = const()[name = string("key_states_77_strides_0"), val = tensor([1, 1])]; tensor key_states_77_pad_0 = const()[name = string("key_states_77_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_states_77_dilations_0 = const()[name = string("key_states_77_dilations_0"), val = tensor([1, 1])]; int32 key_states_77_groups_0 = const()[name = string("key_states_77_groups_0"), val = int32(1)]; tensor var_5519_to_fp16 = const()[name = string("op_5519_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(578473088)))]; tensor key_states_77_cast_fp16 = conv(dilations = key_states_77_dilations_0, groups = key_states_77_groups_0, pad = key_states_77_pad_0, pad_type = key_states_77_pad_type_0, strides = key_states_77_strides_0, weight = var_5519_to_fp16, x = var_5486_cast_fp16_0)[name = string("key_states_77_cast_fp16")]; string value_states_77_pad_type_0 = const()[name = string("value_states_77_pad_type_0"), val = string("valid")]; tensor value_states_77_strides_0 = const()[name = string("value_states_77_strides_0"), val = tensor([1, 1])]; tensor value_states_77_pad_0 = const()[name = string("value_states_77_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_states_77_dilations_0 = const()[name = string("value_states_77_dilations_0"), val = tensor([1, 1])]; int32 value_states_77_groups_0 = const()[name = string("value_states_77_groups_0"), val = int32(1)]; tensor var_5530_to_fp16 = const()[name = string("op_5530_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(578735296)))]; tensor value_states_77_cast_fp16 = conv(dilations = value_states_77_dilations_0, groups = value_states_77_groups_0, pad = value_states_77_pad_0, pad_type = value_states_77_pad_type_0, strides = value_states_77_strides_0, weight = var_5530_to_fp16, x = var_5486_cast_fp16_0)[name = string("value_states_77_cast_fp16")]; tensor var_5538 = const()[name = string("op_5538"), val = tensor([1, 16, 64, 32])]; tensor embed_77_cast_fp16 = reshape(shape = var_5538, x = query_states_77_cast_fp16)[name = string("embed_77_cast_fp16")]; tensor var_5542 = const()[name = string("op_5542"), val = tensor([1, 2, 64, 32])]; tensor var_5543_cast_fp16 = reshape(shape = var_5542, x = key_states_77_cast_fp16)[name = string("op_5543_cast_fp16")]; tensor embed_79_perm_0 = const()[name = string("embed_79_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_5549 = const()[name = string("op_5549"), val = tensor([1, 2, 64, 32])]; tensor var_5550_cast_fp16 = reshape(shape = var_5549, x = value_states_77_cast_fp16)[name = string("op_5550_cast_fp16")]; tensor value_states_79_perm_0 = const()[name = string("value_states_79_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_5554_cast_fp16 = mul(x = embed_77_cast_fp16, y = cos_1_cast_fp16)[name = string("op_5554_cast_fp16")]; tensor var_5555_split_sizes_0 = const()[name = string("op_5555_split_sizes_0"), val = tensor([32, 32])]; int32 var_5555_axis_0 = const()[name = string("op_5555_axis_0"), val = int32(-2)]; tensor var_5555_cast_fp16_0, tensor var_5555_cast_fp16_1 = split(axis = var_5555_axis_0, split_sizes = var_5555_split_sizes_0, x = embed_77_cast_fp16)[name = string("op_5555_cast_fp16")]; fp16 const_197_promoted_to_fp16 = const()[name = string("const_197_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_5557_cast_fp16 = mul(x = var_5555_cast_fp16_1, y = const_197_promoted_to_fp16)[name = string("op_5557_cast_fp16")]; int32 var_5559 = const()[name = string("op_5559"), val = int32(-2)]; bool var_5560_interleave_0 = const()[name = string("op_5560_interleave_0"), val = bool(false)]; tensor var_5560_cast_fp16 = concat(axis = var_5559, interleave = var_5560_interleave_0, values = (var_5557_cast_fp16, var_5555_cast_fp16_0))[name = string("op_5560_cast_fp16")]; tensor var_5561_cast_fp16 = mul(x = var_5560_cast_fp16, y = sin_1_cast_fp16)[name = string("op_5561_cast_fp16")]; tensor query_states_79_cast_fp16 = add(x = var_5554_cast_fp16, y = var_5561_cast_fp16)[name = string("query_states_79_cast_fp16")]; tensor embed_79_cast_fp16 = transpose(perm = embed_79_perm_0, x = var_5543_cast_fp16)[name = string("transpose_14")]; tensor var_5564_cast_fp16 = mul(x = embed_79_cast_fp16, y = cos_cast_fp16)[name = string("op_5564_cast_fp16")]; tensor var_5565_split_sizes_0 = const()[name = string("op_5565_split_sizes_0"), val = tensor([32, 32])]; int32 var_5565_axis_0 = const()[name = string("op_5565_axis_0"), val = int32(-1)]; tensor var_5565_cast_fp16_0, tensor var_5565_cast_fp16_1 = split(axis = var_5565_axis_0, split_sizes = var_5565_split_sizes_0, x = embed_79_cast_fp16)[name = string("op_5565_cast_fp16")]; fp16 const_198_promoted_to_fp16 = const()[name = string("const_198_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_5567_cast_fp16 = mul(x = var_5565_cast_fp16_1, y = const_198_promoted_to_fp16)[name = string("op_5567_cast_fp16")]; int32 var_5569 = const()[name = string("op_5569"), val = int32(-1)]; bool var_5570_interleave_0 = const()[name = string("op_5570_interleave_0"), val = bool(false)]; tensor var_5570_cast_fp16 = concat(axis = var_5569, interleave = var_5570_interleave_0, values = (var_5567_cast_fp16, var_5565_cast_fp16_0))[name = string("op_5570_cast_fp16")]; tensor var_5571_cast_fp16 = mul(x = var_5570_cast_fp16, y = sin_cast_fp16)[name = string("op_5571_cast_fp16")]; tensor key_states_79_cast_fp16 = add(x = var_5564_cast_fp16, y = var_5571_cast_fp16)[name = string("key_states_79_cast_fp16")]; tensor expand_dims_191 = const()[name = string("expand_dims_191"), val = tensor([19])]; tensor expand_dims_192 = const()[name = string("expand_dims_192"), val = tensor([0])]; tensor expand_dims_194 = const()[name = string("expand_dims_194"), val = tensor([20])]; tensor concat_155_values3_0 = const()[name = string("concat_155_values3_0"), val = tensor([0])]; int32 concat_155_axis_0 = const()[name = string("concat_155_axis_0"), val = int32(0)]; bool concat_155_interleave_0 = const()[name = string("concat_155_interleave_0"), val = bool(false)]; tensor concat_155 = concat(axis = concat_155_axis_0, interleave = concat_155_interleave_0, values = (expand_dims_191, expand_dims_192, position_id, concat_155_values3_0))[name = string("concat_155")]; tensor concat_156_values1_0 = const()[name = string("concat_156_values1_0"), val = tensor([0])]; tensor concat_156_values3_0 = const()[name = string("concat_156_values3_0"), val = tensor([0])]; int32 concat_156_axis_0 = const()[name = string("concat_156_axis_0"), val = int32(0)]; bool concat_156_interleave_0 = const()[name = string("concat_156_interleave_0"), val = bool(false)]; tensor concat_156 = concat(axis = concat_156_axis_0, interleave = concat_156_interleave_0, values = (expand_dims_194, concat_156_values1_0, var_426, concat_156_values3_0))[name = string("concat_156")]; tensor key_cache_internal_tensor_assign_20_stride_0 = const()[name = string("key_cache_internal_tensor_assign_20_stride_0"), val = tensor([1, 1, 1, 1])]; tensor key_cache_internal_tensor_assign_20_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_20_begin_mask_0"), val = tensor([false, false, false, true])]; tensor key_cache_internal_tensor_assign_20_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_20_end_mask_0"), val = tensor([false, true, false, true])]; tensor key_cache_internal_tensor_assign_20_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_20_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor key_cache_internal_tensor_assign_20_cast_fp16 = slice_update(begin = concat_155, begin_mask = key_cache_internal_tensor_assign_20_begin_mask_0, end = concat_156, end_mask = key_cache_internal_tensor_assign_20_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_20_squeeze_mask_0, stride = key_cache_internal_tensor_assign_20_stride_0, update = key_states_79_cast_fp16, x = coreml_update_state_84)[name = string("key_cache_internal_tensor_assign_20_cast_fp16")]; write_state(data = key_cache_internal_tensor_assign_20_cast_fp16, input = key_cache)[name = string("coreml_update_state_348_write_state")]; tensor coreml_update_state_86 = read_state(input = key_cache)[name = string("coreml_update_state_348")]; tensor value_cache_internal_tensor_assign_20_stride_0 = const()[name = string("value_cache_internal_tensor_assign_20_stride_0"), val = tensor([1, 1, 1, 1])]; tensor value_cache_internal_tensor_assign_20_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_20_begin_mask_0"), val = tensor([false, false, false, true])]; tensor value_cache_internal_tensor_assign_20_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_20_end_mask_0"), val = tensor([false, true, false, true])]; tensor value_cache_internal_tensor_assign_20_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_20_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor value_states_79_cast_fp16 = transpose(perm = value_states_79_perm_0, x = var_5550_cast_fp16)[name = string("transpose_13")]; tensor value_cache_internal_tensor_assign_20_cast_fp16 = slice_update(begin = concat_155, begin_mask = value_cache_internal_tensor_assign_20_begin_mask_0, end = concat_156, end_mask = value_cache_internal_tensor_assign_20_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_20_squeeze_mask_0, stride = value_cache_internal_tensor_assign_20_stride_0, update = value_states_79_cast_fp16, x = coreml_update_state_85)[name = string("value_cache_internal_tensor_assign_20_cast_fp16")]; write_state(data = value_cache_internal_tensor_assign_20_cast_fp16, input = value_cache)[name = string("coreml_update_state_349_write_state")]; tensor coreml_update_state_87 = read_state(input = value_cache)[name = string("coreml_update_state_349")]; tensor var_5614_begin_0 = const()[name = string("op_5614_begin_0"), val = tensor([19, 0, 0, 0])]; tensor var_5614_end_0 = const()[name = string("op_5614_end_0"), val = tensor([20, 2, 2048, 64])]; tensor var_5614_end_mask_0 = const()[name = string("op_5614_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_5614_cast_fp16 = slice_by_index(begin = var_5614_begin_0, end = var_5614_end_0, end_mask = var_5614_end_mask_0, x = coreml_update_state_86)[name = string("op_5614_cast_fp16")]; tensor tile_38 = const()[name = string("tile_38"), val = tensor([1, 1])]; int32 var_5617_axis_0 = const()[name = string("op_5617_axis_0"), val = int32(1)]; tensor var_5617_cast_fp16_0, tensor var_5617_cast_fp16_1 = split(axis = var_5617_axis_0, split_sizes = tile_38, x = var_5614_cast_fp16)[name = string("op_5617_cast_fp16")]; tensor var_5624_begin_0 = const()[name = string("op_5624_begin_0"), val = tensor([19, 0, 0, 0])]; tensor var_5624_end_0 = const()[name = string("op_5624_end_0"), val = tensor([20, 2, 2048, 64])]; tensor var_5624_end_mask_0 = const()[name = string("op_5624_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_5624_cast_fp16 = slice_by_index(begin = var_5624_begin_0, end = var_5624_end_0, end_mask = var_5624_end_mask_0, x = coreml_update_state_87)[name = string("op_5624_cast_fp16")]; tensor tile_39 = const()[name = string("tile_39"), val = tensor([1, 1])]; int32 var_5627_axis_0 = const()[name = string("op_5627_axis_0"), val = int32(1)]; tensor var_5627_cast_fp16_0, tensor var_5627_cast_fp16_1 = split(axis = var_5627_axis_0, split_sizes = tile_39, x = var_5624_cast_fp16)[name = string("op_5627_cast_fp16")]; tensor var_5630_split_sizes_0 = const()[name = string("op_5630_split_sizes_0"), val = tensor([8, 8])]; int32 var_5630_axis_0 = const()[name = string("op_5630_axis_0"), val = int32(1)]; tensor var_5630_cast_fp16_0, tensor var_5630_cast_fp16_1 = split(axis = var_5630_axis_0, split_sizes = var_5630_split_sizes_0, x = query_states_79_cast_fp16)[name = string("op_5630_cast_fp16")]; bool attn_weights_305_transpose_x_0 = const()[name = string("attn_weights_305_transpose_x_0"), val = bool(false)]; bool attn_weights_305_transpose_y_0 = const()[name = string("attn_weights_305_transpose_y_0"), val = bool(false)]; tensor attn_weights_305_cast_fp16 = matmul(transpose_x = attn_weights_305_transpose_x_0, transpose_y = attn_weights_305_transpose_y_0, x = var_5617_cast_fp16_0, y = var_5630_cast_fp16_0)[name = string("attn_weights_305_cast_fp16")]; fp16 _inversed_attn_weights_307_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_307_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_307_cast_fp16 = mul(x = attn_weights_305_cast_fp16, y = _inversed_attn_weights_307_y_0_to_fp16)[name = string("_inversed_attn_weights_307_cast_fp16")]; tensor attn_weights_309_cast_fp16 = add(x = _inversed_attn_weights_307_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_309_cast_fp16")]; int32 var_5637 = const()[name = string("op_5637"), val = int32(2)]; tensor attn_weights_311_cast_fp16 = softmax(axis = var_5637, x = attn_weights_309_cast_fp16)[name = string("attn_weights_311_cast_fp16")]; bool var_5643_transpose_x_1 = const()[name = string("op_5643_transpose_x_1"), val = bool(true)]; bool var_5643_transpose_y_1 = const()[name = string("op_5643_transpose_y_1"), val = bool(false)]; tensor var_5643_cast_fp16 = matmul(transpose_x = var_5643_transpose_x_1, transpose_y = var_5643_transpose_y_1, x = attn_weights_311_cast_fp16, y = var_5627_cast_fp16_0)[name = string("op_5643_cast_fp16")]; bool attn_weights_313_transpose_x_0 = const()[name = string("attn_weights_313_transpose_x_0"), val = bool(false)]; bool attn_weights_313_transpose_y_0 = const()[name = string("attn_weights_313_transpose_y_0"), val = bool(false)]; tensor attn_weights_313_cast_fp16 = matmul(transpose_x = attn_weights_313_transpose_x_0, transpose_y = attn_weights_313_transpose_y_0, x = var_5617_cast_fp16_1, y = var_5630_cast_fp16_1)[name = string("attn_weights_313_cast_fp16")]; fp16 _inversed_attn_weights_315_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_315_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_315_cast_fp16 = mul(x = attn_weights_313_cast_fp16, y = _inversed_attn_weights_315_y_0_to_fp16)[name = string("_inversed_attn_weights_315_cast_fp16")]; tensor attn_weights_317_cast_fp16 = add(x = _inversed_attn_weights_315_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_317_cast_fp16")]; int32 var_5649 = const()[name = string("op_5649"), val = int32(2)]; tensor attn_weights_319_cast_fp16 = softmax(axis = var_5649, x = attn_weights_317_cast_fp16)[name = string("attn_weights_319_cast_fp16")]; bool attn_output_115_transpose_x_1 = const()[name = string("attn_output_115_transpose_x_1"), val = bool(true)]; bool attn_output_115_transpose_y_1 = const()[name = string("attn_output_115_transpose_y_1"), val = bool(false)]; tensor attn_output_115_cast_fp16 = matmul(transpose_x = attn_output_115_transpose_x_1, transpose_y = attn_output_115_transpose_y_1, x = attn_weights_319_cast_fp16, y = var_5627_cast_fp16_1)[name = string("attn_output_115_cast_fp16")]; int32 var_5657 = const()[name = string("op_5657"), val = int32(1)]; bool attn_output_117_interleave_0 = const()[name = string("attn_output_117_interleave_0"), val = bool(false)]; tensor attn_output_117_cast_fp16 = concat(axis = var_5657, interleave = attn_output_117_interleave_0, values = (var_5643_cast_fp16, attn_output_115_cast_fp16))[name = string("attn_output_117_cast_fp16")]; tensor var_5661_perm_0 = const()[name = string("op_5661_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_5666 = const()[name = string("op_5666"), val = tensor([1, 1024, 1, 32])]; tensor var_5661_cast_fp16 = transpose(perm = var_5661_perm_0, x = attn_output_117_cast_fp16)[name = string("transpose_12")]; tensor x_347_cast_fp16 = reshape(shape = var_5666, x = var_5661_cast_fp16)[name = string("x_347_cast_fp16")]; string hidden_states_117_pad_type_0 = const()[name = string("hidden_states_117_pad_type_0"), val = string("valid")]; tensor hidden_states_117_strides_0 = const()[name = string("hidden_states_117_strides_0"), val = tensor([1, 1])]; tensor hidden_states_117_pad_0 = const()[name = string("hidden_states_117_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_117_dilations_0 = const()[name = string("hidden_states_117_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_117_groups_0 = const()[name = string("hidden_states_117_groups_0"), val = int32(1)]; tensor var_5673_to_fp16 = const()[name = string("op_5673_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(578997504)))]; tensor hidden_states_117_cast_fp16 = conv(dilations = hidden_states_117_dilations_0, groups = hidden_states_117_groups_0, pad = hidden_states_117_pad_0, pad_type = hidden_states_117_pad_type_0, strides = hidden_states_117_strides_0, weight = var_5673_to_fp16, x = x_347_cast_fp16)[name = string("hidden_states_117_cast_fp16")]; tensor x_349_cast_fp16 = add(x = x_341_cast_fp16, y = hidden_states_117_cast_fp16)[name = string("x_349_cast_fp16")]; int32 var_5685 = const()[name = string("op_5685"), val = int32(1)]; fp16 const_203_promoted_to_fp16 = const()[name = string("const_203_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_5688_cast_fp16 = mul(x = x_349_cast_fp16, y = const_203_promoted_to_fp16)[name = string("op_5688_cast_fp16")]; bool x_351_interleave_0 = const()[name = string("x_351_interleave_0"), val = bool(false)]; tensor x_351_cast_fp16 = concat(axis = var_5685, interleave = x_351_interleave_0, values = (x_349_cast_fp16, var_5688_cast_fp16))[name = string("x_351_cast_fp16")]; tensor out_235_axes_0 = const()[name = string("out_235_axes_0"), val = tensor([1])]; fp16 var_5698_to_fp16 = const()[name = string("op_5698_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_235_cast_fp16 = layer_norm(axes = out_235_axes_0, epsilon = var_5698_to_fp16, x = x_351_cast_fp16)[name = string("out_235_cast_fp16")]; tensor layer_layers_19_post_attention_layernorm_weight_to_fp16 = const()[name = string("layer_layers_19_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(581094720)))]; tensor out_237_cast_fp16 = mul(x = out_235_cast_fp16, y = layer_layers_19_post_attention_layernorm_weight_to_fp16)[name = string("out_237_cast_fp16")]; tensor var_5704_split_sizes_0 = const()[name = string("op_5704_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_5704_axis_0 = const()[name = string("op_5704_axis_0"), val = int32(1)]; tensor var_5704_cast_fp16_0, tensor var_5704_cast_fp16_1 = split(axis = var_5704_axis_0, split_sizes = var_5704_split_sizes_0, x = out_237_cast_fp16)[name = string("op_5704_cast_fp16")]; string input_39_pad_type_0 = const()[name = string("input_39_pad_type_0"), val = string("valid")]; tensor input_39_strides_0 = const()[name = string("input_39_strides_0"), val = tensor([1, 1])]; tensor input_39_pad_0 = const()[name = string("input_39_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_39_dilations_0 = const()[name = string("input_39_dilations_0"), val = tensor([1, 1])]; int32 input_39_groups_0 = const()[name = string("input_39_groups_0"), val = int32(1)]; tensor var_5709_to_fp16 = const()[name = string("op_5709_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(581098880)))]; tensor input_39_cast_fp16 = conv(dilations = input_39_dilations_0, groups = input_39_groups_0, pad = input_39_pad_0, pad_type = input_39_pad_type_0, strides = input_39_strides_0, weight = var_5709_to_fp16, x = var_5704_cast_fp16_0)[name = string("input_39_cast_fp16")]; tensor var_5720_cast_fp16 = silu(x = input_39_cast_fp16)[name = string("op_5720_cast_fp16")]; string var_5725_pad_type_0 = const()[name = string("op_5725_pad_type_0"), val = string("valid")]; tensor var_5725_strides_0 = const()[name = string("op_5725_strides_0"), val = tensor([1, 1])]; tensor var_5725_pad_0 = const()[name = string("op_5725_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_5725_dilations_0 = const()[name = string("op_5725_dilations_0"), val = tensor([1, 1])]; int32 var_5725_groups_0 = const()[name = string("op_5725_groups_0"), val = int32(1)]; tensor var_5708_to_fp16 = const()[name = string("op_5708_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(589487552)))]; tensor var_5725_cast_fp16 = conv(dilations = var_5725_dilations_0, groups = var_5725_groups_0, pad = var_5725_pad_0, pad_type = var_5725_pad_type_0, strides = var_5725_strides_0, weight = var_5708_to_fp16, x = var_5704_cast_fp16_0)[name = string("op_5725_cast_fp16")]; tensor x_357_cast_fp16 = mul(x = var_5720_cast_fp16, y = var_5725_cast_fp16)[name = string("x_357_cast_fp16")]; string hidden_states_119_pad_type_0 = const()[name = string("hidden_states_119_pad_type_0"), val = string("valid")]; tensor hidden_states_119_strides_0 = const()[name = string("hidden_states_119_strides_0"), val = tensor([1, 1])]; tensor hidden_states_119_pad_0 = const()[name = string("hidden_states_119_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_119_dilations_0 = const()[name = string("hidden_states_119_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_119_groups_0 = const()[name = string("hidden_states_119_groups_0"), val = int32(1)]; tensor var_5707_to_fp16 = const()[name = string("op_5707_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(597876224)))]; tensor hidden_states_119_cast_fp16 = conv(dilations = hidden_states_119_dilations_0, groups = hidden_states_119_groups_0, pad = hidden_states_119_pad_0, pad_type = hidden_states_119_pad_type_0, strides = hidden_states_119_strides_0, weight = var_5707_to_fp16, x = x_357_cast_fp16)[name = string("hidden_states_119_cast_fp16")]; tensor x_359_cast_fp16 = add(x = x_349_cast_fp16, y = hidden_states_119_cast_fp16)[name = string("x_359_cast_fp16")]; int32 var_5738 = const()[name = string("op_5738"), val = int32(1)]; fp16 const_204_promoted_to_fp16 = const()[name = string("const_204_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_5741_cast_fp16 = mul(x = x_359_cast_fp16, y = const_204_promoted_to_fp16)[name = string("op_5741_cast_fp16")]; bool x_361_interleave_0 = const()[name = string("x_361_interleave_0"), val = bool(false)]; tensor x_361_cast_fp16 = concat(axis = var_5738, interleave = x_361_interleave_0, values = (x_359_cast_fp16, var_5741_cast_fp16))[name = string("x_361_cast_fp16")]; tensor out_241_axes_0 = const()[name = string("out_241_axes_0"), val = tensor([1])]; fp16 var_5751_to_fp16 = const()[name = string("op_5751_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_241_cast_fp16 = layer_norm(axes = out_241_axes_0, epsilon = var_5751_to_fp16, x = x_361_cast_fp16)[name = string("out_241_cast_fp16")]; tensor layer_layers_20_input_layernorm_weight_to_fp16 = const()[name = string("layer_layers_20_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(606264896)))]; tensor out_243_cast_fp16 = mul(x = out_241_cast_fp16, y = layer_layers_20_input_layernorm_weight_to_fp16)[name = string("out_243_cast_fp16")]; tensor var_5757_split_sizes_0 = const()[name = string("op_5757_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_5757_axis_0 = const()[name = string("op_5757_axis_0"), val = int32(1)]; tensor var_5757_cast_fp16_0, tensor var_5757_cast_fp16_1 = split(axis = var_5757_axis_0, split_sizes = var_5757_split_sizes_0, x = out_243_cast_fp16)[name = string("op_5757_cast_fp16")]; string query_states_81_pad_type_0 = const()[name = string("query_states_81_pad_type_0"), val = string("valid")]; tensor query_states_81_strides_0 = const()[name = string("query_states_81_strides_0"), val = tensor([1, 1])]; tensor query_states_81_pad_0 = const()[name = string("query_states_81_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_states_81_dilations_0 = const()[name = string("query_states_81_dilations_0"), val = tensor([1, 1])]; int32 query_states_81_groups_0 = const()[name = string("query_states_81_groups_0"), val = int32(1)]; tensor var_5779_to_fp16 = const()[name = string("op_5779_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(606269056)))]; tensor query_states_81_cast_fp16 = conv(dilations = query_states_81_dilations_0, groups = query_states_81_groups_0, pad = query_states_81_pad_0, pad_type = query_states_81_pad_type_0, strides = query_states_81_strides_0, weight = var_5779_to_fp16, x = var_5757_cast_fp16_0)[name = string("query_states_81_cast_fp16")]; string key_states_81_pad_type_0 = const()[name = string("key_states_81_pad_type_0"), val = string("valid")]; tensor key_states_81_strides_0 = const()[name = string("key_states_81_strides_0"), val = tensor([1, 1])]; tensor key_states_81_pad_0 = const()[name = string("key_states_81_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_states_81_dilations_0 = const()[name = string("key_states_81_dilations_0"), val = tensor([1, 1])]; int32 key_states_81_groups_0 = const()[name = string("key_states_81_groups_0"), val = int32(1)]; tensor var_5790_to_fp16 = const()[name = string("op_5790_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(608366272)))]; tensor key_states_81_cast_fp16 = conv(dilations = key_states_81_dilations_0, groups = key_states_81_groups_0, pad = key_states_81_pad_0, pad_type = key_states_81_pad_type_0, strides = key_states_81_strides_0, weight = var_5790_to_fp16, x = var_5757_cast_fp16_0)[name = string("key_states_81_cast_fp16")]; string value_states_81_pad_type_0 = const()[name = string("value_states_81_pad_type_0"), val = string("valid")]; tensor value_states_81_strides_0 = const()[name = string("value_states_81_strides_0"), val = tensor([1, 1])]; tensor value_states_81_pad_0 = const()[name = string("value_states_81_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_states_81_dilations_0 = const()[name = string("value_states_81_dilations_0"), val = tensor([1, 1])]; int32 value_states_81_groups_0 = const()[name = string("value_states_81_groups_0"), val = int32(1)]; tensor var_5801_to_fp16 = const()[name = string("op_5801_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(608628480)))]; tensor value_states_81_cast_fp16 = conv(dilations = value_states_81_dilations_0, groups = value_states_81_groups_0, pad = value_states_81_pad_0, pad_type = value_states_81_pad_type_0, strides = value_states_81_strides_0, weight = var_5801_to_fp16, x = var_5757_cast_fp16_0)[name = string("value_states_81_cast_fp16")]; tensor var_5809 = const()[name = string("op_5809"), val = tensor([1, 16, 64, 32])]; tensor embed_81_cast_fp16 = reshape(shape = var_5809, x = query_states_81_cast_fp16)[name = string("embed_81_cast_fp16")]; tensor var_5813 = const()[name = string("op_5813"), val = tensor([1, 2, 64, 32])]; tensor var_5814_cast_fp16 = reshape(shape = var_5813, x = key_states_81_cast_fp16)[name = string("op_5814_cast_fp16")]; tensor embed_83_perm_0 = const()[name = string("embed_83_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_5820 = const()[name = string("op_5820"), val = tensor([1, 2, 64, 32])]; tensor var_5821_cast_fp16 = reshape(shape = var_5820, x = value_states_81_cast_fp16)[name = string("op_5821_cast_fp16")]; tensor value_states_83_perm_0 = const()[name = string("value_states_83_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_5825_cast_fp16 = mul(x = embed_81_cast_fp16, y = cos_1_cast_fp16)[name = string("op_5825_cast_fp16")]; tensor var_5826_split_sizes_0 = const()[name = string("op_5826_split_sizes_0"), val = tensor([32, 32])]; int32 var_5826_axis_0 = const()[name = string("op_5826_axis_0"), val = int32(-2)]; tensor var_5826_cast_fp16_0, tensor var_5826_cast_fp16_1 = split(axis = var_5826_axis_0, split_sizes = var_5826_split_sizes_0, x = embed_81_cast_fp16)[name = string("op_5826_cast_fp16")]; fp16 const_207_promoted_to_fp16 = const()[name = string("const_207_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_5828_cast_fp16 = mul(x = var_5826_cast_fp16_1, y = const_207_promoted_to_fp16)[name = string("op_5828_cast_fp16")]; int32 var_5830 = const()[name = string("op_5830"), val = int32(-2)]; bool var_5831_interleave_0 = const()[name = string("op_5831_interleave_0"), val = bool(false)]; tensor var_5831_cast_fp16 = concat(axis = var_5830, interleave = var_5831_interleave_0, values = (var_5828_cast_fp16, var_5826_cast_fp16_0))[name = string("op_5831_cast_fp16")]; tensor var_5832_cast_fp16 = mul(x = var_5831_cast_fp16, y = sin_1_cast_fp16)[name = string("op_5832_cast_fp16")]; tensor query_states_83_cast_fp16 = add(x = var_5825_cast_fp16, y = var_5832_cast_fp16)[name = string("query_states_83_cast_fp16")]; tensor embed_83_cast_fp16 = transpose(perm = embed_83_perm_0, x = var_5814_cast_fp16)[name = string("transpose_11")]; tensor var_5835_cast_fp16 = mul(x = embed_83_cast_fp16, y = cos_cast_fp16)[name = string("op_5835_cast_fp16")]; tensor var_5836_split_sizes_0 = const()[name = string("op_5836_split_sizes_0"), val = tensor([32, 32])]; int32 var_5836_axis_0 = const()[name = string("op_5836_axis_0"), val = int32(-1)]; tensor var_5836_cast_fp16_0, tensor var_5836_cast_fp16_1 = split(axis = var_5836_axis_0, split_sizes = var_5836_split_sizes_0, x = embed_83_cast_fp16)[name = string("op_5836_cast_fp16")]; fp16 const_208_promoted_to_fp16 = const()[name = string("const_208_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_5838_cast_fp16 = mul(x = var_5836_cast_fp16_1, y = const_208_promoted_to_fp16)[name = string("op_5838_cast_fp16")]; int32 var_5840 = const()[name = string("op_5840"), val = int32(-1)]; bool var_5841_interleave_0 = const()[name = string("op_5841_interleave_0"), val = bool(false)]; tensor var_5841_cast_fp16 = concat(axis = var_5840, interleave = var_5841_interleave_0, values = (var_5838_cast_fp16, var_5836_cast_fp16_0))[name = string("op_5841_cast_fp16")]; tensor var_5842_cast_fp16 = mul(x = var_5841_cast_fp16, y = sin_cast_fp16)[name = string("op_5842_cast_fp16")]; tensor key_states_83_cast_fp16 = add(x = var_5835_cast_fp16, y = var_5842_cast_fp16)[name = string("key_states_83_cast_fp16")]; tensor expand_dims_201 = const()[name = string("expand_dims_201"), val = tensor([20])]; tensor expand_dims_202 = const()[name = string("expand_dims_202"), val = tensor([0])]; tensor expand_dims_204 = const()[name = string("expand_dims_204"), val = tensor([21])]; tensor concat_163_values3_0 = const()[name = string("concat_163_values3_0"), val = tensor([0])]; int32 concat_163_axis_0 = const()[name = string("concat_163_axis_0"), val = int32(0)]; bool concat_163_interleave_0 = const()[name = string("concat_163_interleave_0"), val = bool(false)]; tensor concat_163 = concat(axis = concat_163_axis_0, interleave = concat_163_interleave_0, values = (expand_dims_201, expand_dims_202, position_id, concat_163_values3_0))[name = string("concat_163")]; tensor concat_164_values1_0 = const()[name = string("concat_164_values1_0"), val = tensor([0])]; tensor concat_164_values3_0 = const()[name = string("concat_164_values3_0"), val = tensor([0])]; int32 concat_164_axis_0 = const()[name = string("concat_164_axis_0"), val = int32(0)]; bool concat_164_interleave_0 = const()[name = string("concat_164_interleave_0"), val = bool(false)]; tensor concat_164 = concat(axis = concat_164_axis_0, interleave = concat_164_interleave_0, values = (expand_dims_204, concat_164_values1_0, var_426, concat_164_values3_0))[name = string("concat_164")]; tensor key_cache_internal_tensor_assign_21_stride_0 = const()[name = string("key_cache_internal_tensor_assign_21_stride_0"), val = tensor([1, 1, 1, 1])]; tensor key_cache_internal_tensor_assign_21_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_21_begin_mask_0"), val = tensor([false, false, false, true])]; tensor key_cache_internal_tensor_assign_21_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_21_end_mask_0"), val = tensor([false, true, false, true])]; tensor key_cache_internal_tensor_assign_21_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_21_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor key_cache_internal_tensor_assign_21_cast_fp16 = slice_update(begin = concat_163, begin_mask = key_cache_internal_tensor_assign_21_begin_mask_0, end = concat_164, end_mask = key_cache_internal_tensor_assign_21_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_21_squeeze_mask_0, stride = key_cache_internal_tensor_assign_21_stride_0, update = key_states_83_cast_fp16, x = coreml_update_state_86)[name = string("key_cache_internal_tensor_assign_21_cast_fp16")]; write_state(data = key_cache_internal_tensor_assign_21_cast_fp16, input = key_cache)[name = string("coreml_update_state_350_write_state")]; tensor coreml_update_state_88 = read_state(input = key_cache)[name = string("coreml_update_state_350")]; tensor value_cache_internal_tensor_assign_21_stride_0 = const()[name = string("value_cache_internal_tensor_assign_21_stride_0"), val = tensor([1, 1, 1, 1])]; tensor value_cache_internal_tensor_assign_21_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_21_begin_mask_0"), val = tensor([false, false, false, true])]; tensor value_cache_internal_tensor_assign_21_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_21_end_mask_0"), val = tensor([false, true, false, true])]; tensor value_cache_internal_tensor_assign_21_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_21_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor value_states_83_cast_fp16 = transpose(perm = value_states_83_perm_0, x = var_5821_cast_fp16)[name = string("transpose_10")]; tensor value_cache_internal_tensor_assign_21_cast_fp16 = slice_update(begin = concat_163, begin_mask = value_cache_internal_tensor_assign_21_begin_mask_0, end = concat_164, end_mask = value_cache_internal_tensor_assign_21_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_21_squeeze_mask_0, stride = value_cache_internal_tensor_assign_21_stride_0, update = value_states_83_cast_fp16, x = coreml_update_state_87)[name = string("value_cache_internal_tensor_assign_21_cast_fp16")]; write_state(data = value_cache_internal_tensor_assign_21_cast_fp16, input = value_cache)[name = string("coreml_update_state_351_write_state")]; tensor coreml_update_state_89 = read_state(input = value_cache)[name = string("coreml_update_state_351")]; tensor var_5885_begin_0 = const()[name = string("op_5885_begin_0"), val = tensor([20, 0, 0, 0])]; tensor var_5885_end_0 = const()[name = string("op_5885_end_0"), val = tensor([21, 2, 2048, 64])]; tensor var_5885_end_mask_0 = const()[name = string("op_5885_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_5885_cast_fp16 = slice_by_index(begin = var_5885_begin_0, end = var_5885_end_0, end_mask = var_5885_end_mask_0, x = coreml_update_state_88)[name = string("op_5885_cast_fp16")]; tensor tile_40 = const()[name = string("tile_40"), val = tensor([1, 1])]; int32 var_5888_axis_0 = const()[name = string("op_5888_axis_0"), val = int32(1)]; tensor var_5888_cast_fp16_0, tensor var_5888_cast_fp16_1 = split(axis = var_5888_axis_0, split_sizes = tile_40, x = var_5885_cast_fp16)[name = string("op_5888_cast_fp16")]; tensor var_5895_begin_0 = const()[name = string("op_5895_begin_0"), val = tensor([20, 0, 0, 0])]; tensor var_5895_end_0 = const()[name = string("op_5895_end_0"), val = tensor([21, 2, 2048, 64])]; tensor var_5895_end_mask_0 = const()[name = string("op_5895_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_5895_cast_fp16 = slice_by_index(begin = var_5895_begin_0, end = var_5895_end_0, end_mask = var_5895_end_mask_0, x = coreml_update_state_89)[name = string("op_5895_cast_fp16")]; tensor tile_41 = const()[name = string("tile_41"), val = tensor([1, 1])]; int32 var_5898_axis_0 = const()[name = string("op_5898_axis_0"), val = int32(1)]; tensor var_5898_cast_fp16_0, tensor var_5898_cast_fp16_1 = split(axis = var_5898_axis_0, split_sizes = tile_41, x = var_5895_cast_fp16)[name = string("op_5898_cast_fp16")]; tensor var_5901_split_sizes_0 = const()[name = string("op_5901_split_sizes_0"), val = tensor([8, 8])]; int32 var_5901_axis_0 = const()[name = string("op_5901_axis_0"), val = int32(1)]; tensor var_5901_cast_fp16_0, tensor var_5901_cast_fp16_1 = split(axis = var_5901_axis_0, split_sizes = var_5901_split_sizes_0, x = query_states_83_cast_fp16)[name = string("op_5901_cast_fp16")]; bool attn_weights_321_transpose_x_0 = const()[name = string("attn_weights_321_transpose_x_0"), val = bool(false)]; bool attn_weights_321_transpose_y_0 = const()[name = string("attn_weights_321_transpose_y_0"), val = bool(false)]; tensor attn_weights_321_cast_fp16 = matmul(transpose_x = attn_weights_321_transpose_x_0, transpose_y = attn_weights_321_transpose_y_0, x = var_5888_cast_fp16_0, y = var_5901_cast_fp16_0)[name = string("attn_weights_321_cast_fp16")]; fp16 _inversed_attn_weights_323_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_323_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_323_cast_fp16 = mul(x = attn_weights_321_cast_fp16, y = _inversed_attn_weights_323_y_0_to_fp16)[name = string("_inversed_attn_weights_323_cast_fp16")]; tensor attn_weights_325_cast_fp16 = add(x = _inversed_attn_weights_323_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_325_cast_fp16")]; int32 var_5908 = const()[name = string("op_5908"), val = int32(2)]; tensor attn_weights_327_cast_fp16 = softmax(axis = var_5908, x = attn_weights_325_cast_fp16)[name = string("attn_weights_327_cast_fp16")]; bool var_5914_transpose_x_1 = const()[name = string("op_5914_transpose_x_1"), val = bool(true)]; bool var_5914_transpose_y_1 = const()[name = string("op_5914_transpose_y_1"), val = bool(false)]; tensor var_5914_cast_fp16 = matmul(transpose_x = var_5914_transpose_x_1, transpose_y = var_5914_transpose_y_1, x = attn_weights_327_cast_fp16, y = var_5898_cast_fp16_0)[name = string("op_5914_cast_fp16")]; bool attn_weights_329_transpose_x_0 = const()[name = string("attn_weights_329_transpose_x_0"), val = bool(false)]; bool attn_weights_329_transpose_y_0 = const()[name = string("attn_weights_329_transpose_y_0"), val = bool(false)]; tensor attn_weights_329_cast_fp16 = matmul(transpose_x = attn_weights_329_transpose_x_0, transpose_y = attn_weights_329_transpose_y_0, x = var_5888_cast_fp16_1, y = var_5901_cast_fp16_1)[name = string("attn_weights_329_cast_fp16")]; fp16 _inversed_attn_weights_331_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_331_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_331_cast_fp16 = mul(x = attn_weights_329_cast_fp16, y = _inversed_attn_weights_331_y_0_to_fp16)[name = string("_inversed_attn_weights_331_cast_fp16")]; tensor attn_weights_333_cast_fp16 = add(x = _inversed_attn_weights_331_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_333_cast_fp16")]; int32 var_5920 = const()[name = string("op_5920"), val = int32(2)]; tensor attn_weights_335_cast_fp16 = softmax(axis = var_5920, x = attn_weights_333_cast_fp16)[name = string("attn_weights_335_cast_fp16")]; bool attn_output_121_transpose_x_1 = const()[name = string("attn_output_121_transpose_x_1"), val = bool(true)]; bool attn_output_121_transpose_y_1 = const()[name = string("attn_output_121_transpose_y_1"), val = bool(false)]; tensor attn_output_121_cast_fp16 = matmul(transpose_x = attn_output_121_transpose_x_1, transpose_y = attn_output_121_transpose_y_1, x = attn_weights_335_cast_fp16, y = var_5898_cast_fp16_1)[name = string("attn_output_121_cast_fp16")]; int32 var_5928 = const()[name = string("op_5928"), val = int32(1)]; bool attn_output_123_interleave_0 = const()[name = string("attn_output_123_interleave_0"), val = bool(false)]; tensor attn_output_123_cast_fp16 = concat(axis = var_5928, interleave = attn_output_123_interleave_0, values = (var_5914_cast_fp16, attn_output_121_cast_fp16))[name = string("attn_output_123_cast_fp16")]; tensor var_5932_perm_0 = const()[name = string("op_5932_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_5937 = const()[name = string("op_5937"), val = tensor([1, 1024, 1, 32])]; tensor var_5932_cast_fp16 = transpose(perm = var_5932_perm_0, x = attn_output_123_cast_fp16)[name = string("transpose_9")]; tensor x_365_cast_fp16 = reshape(shape = var_5937, x = var_5932_cast_fp16)[name = string("x_365_cast_fp16")]; string hidden_states_123_pad_type_0 = const()[name = string("hidden_states_123_pad_type_0"), val = string("valid")]; tensor hidden_states_123_strides_0 = const()[name = string("hidden_states_123_strides_0"), val = tensor([1, 1])]; tensor hidden_states_123_pad_0 = const()[name = string("hidden_states_123_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_123_dilations_0 = const()[name = string("hidden_states_123_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_123_groups_0 = const()[name = string("hidden_states_123_groups_0"), val = int32(1)]; tensor var_5944_to_fp16 = const()[name = string("op_5944_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(608890688)))]; tensor hidden_states_123_cast_fp16 = conv(dilations = hidden_states_123_dilations_0, groups = hidden_states_123_groups_0, pad = hidden_states_123_pad_0, pad_type = hidden_states_123_pad_type_0, strides = hidden_states_123_strides_0, weight = var_5944_to_fp16, x = x_365_cast_fp16)[name = string("hidden_states_123_cast_fp16")]; tensor x_367_cast_fp16 = add(x = x_359_cast_fp16, y = hidden_states_123_cast_fp16)[name = string("x_367_cast_fp16")]; int32 var_5956 = const()[name = string("op_5956"), val = int32(1)]; fp16 const_213_promoted_to_fp16 = const()[name = string("const_213_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_5959_cast_fp16 = mul(x = x_367_cast_fp16, y = const_213_promoted_to_fp16)[name = string("op_5959_cast_fp16")]; bool x_369_interleave_0 = const()[name = string("x_369_interleave_0"), val = bool(false)]; tensor x_369_cast_fp16 = concat(axis = var_5956, interleave = x_369_interleave_0, values = (x_367_cast_fp16, var_5959_cast_fp16))[name = string("x_369_cast_fp16")]; tensor out_247_axes_0 = const()[name = string("out_247_axes_0"), val = tensor([1])]; fp16 var_5969_to_fp16 = const()[name = string("op_5969_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_247_cast_fp16 = layer_norm(axes = out_247_axes_0, epsilon = var_5969_to_fp16, x = x_369_cast_fp16)[name = string("out_247_cast_fp16")]; tensor layer_layers_20_post_attention_layernorm_weight_to_fp16 = const()[name = string("layer_layers_20_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(610987904)))]; tensor out_249_cast_fp16 = mul(x = out_247_cast_fp16, y = layer_layers_20_post_attention_layernorm_weight_to_fp16)[name = string("out_249_cast_fp16")]; tensor var_5975_split_sizes_0 = const()[name = string("op_5975_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_5975_axis_0 = const()[name = string("op_5975_axis_0"), val = int32(1)]; tensor var_5975_cast_fp16_0, tensor var_5975_cast_fp16_1 = split(axis = var_5975_axis_0, split_sizes = var_5975_split_sizes_0, x = out_249_cast_fp16)[name = string("op_5975_cast_fp16")]; string input_41_pad_type_0 = const()[name = string("input_41_pad_type_0"), val = string("valid")]; tensor input_41_strides_0 = const()[name = string("input_41_strides_0"), val = tensor([1, 1])]; tensor input_41_pad_0 = const()[name = string("input_41_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_41_dilations_0 = const()[name = string("input_41_dilations_0"), val = tensor([1, 1])]; int32 input_41_groups_0 = const()[name = string("input_41_groups_0"), val = int32(1)]; tensor var_5980_to_fp16 = const()[name = string("op_5980_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(610992064)))]; tensor input_41_cast_fp16 = conv(dilations = input_41_dilations_0, groups = input_41_groups_0, pad = input_41_pad_0, pad_type = input_41_pad_type_0, strides = input_41_strides_0, weight = var_5980_to_fp16, x = var_5975_cast_fp16_0)[name = string("input_41_cast_fp16")]; tensor var_5991_cast_fp16 = silu(x = input_41_cast_fp16)[name = string("op_5991_cast_fp16")]; string var_5996_pad_type_0 = const()[name = string("op_5996_pad_type_0"), val = string("valid")]; tensor var_5996_strides_0 = const()[name = string("op_5996_strides_0"), val = tensor([1, 1])]; tensor var_5996_pad_0 = const()[name = string("op_5996_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_5996_dilations_0 = const()[name = string("op_5996_dilations_0"), val = tensor([1, 1])]; int32 var_5996_groups_0 = const()[name = string("op_5996_groups_0"), val = int32(1)]; tensor var_5979_to_fp16 = const()[name = string("op_5979_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(619380736)))]; tensor var_5996_cast_fp16 = conv(dilations = var_5996_dilations_0, groups = var_5996_groups_0, pad = var_5996_pad_0, pad_type = var_5996_pad_type_0, strides = var_5996_strides_0, weight = var_5979_to_fp16, x = var_5975_cast_fp16_0)[name = string("op_5996_cast_fp16")]; tensor x_375_cast_fp16 = mul(x = var_5991_cast_fp16, y = var_5996_cast_fp16)[name = string("x_375_cast_fp16")]; string hidden_states_125_pad_type_0 = const()[name = string("hidden_states_125_pad_type_0"), val = string("valid")]; tensor hidden_states_125_strides_0 = const()[name = string("hidden_states_125_strides_0"), val = tensor([1, 1])]; tensor hidden_states_125_pad_0 = const()[name = string("hidden_states_125_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_125_dilations_0 = const()[name = string("hidden_states_125_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_125_groups_0 = const()[name = string("hidden_states_125_groups_0"), val = int32(1)]; tensor var_5978_to_fp16 = const()[name = string("op_5978_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(627769408)))]; tensor hidden_states_125_cast_fp16 = conv(dilations = hidden_states_125_dilations_0, groups = hidden_states_125_groups_0, pad = hidden_states_125_pad_0, pad_type = hidden_states_125_pad_type_0, strides = hidden_states_125_strides_0, weight = var_5978_to_fp16, x = x_375_cast_fp16)[name = string("hidden_states_125_cast_fp16")]; tensor x_377_cast_fp16 = add(x = x_367_cast_fp16, y = hidden_states_125_cast_fp16)[name = string("x_377_cast_fp16")]; int32 var_6009 = const()[name = string("op_6009"), val = int32(1)]; fp16 const_214_promoted_to_fp16 = const()[name = string("const_214_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_6012_cast_fp16 = mul(x = x_377_cast_fp16, y = const_214_promoted_to_fp16)[name = string("op_6012_cast_fp16")]; bool x_379_interleave_0 = const()[name = string("x_379_interleave_0"), val = bool(false)]; tensor x_379_cast_fp16 = concat(axis = var_6009, interleave = x_379_interleave_0, values = (x_377_cast_fp16, var_6012_cast_fp16))[name = string("x_379_cast_fp16")]; tensor out_253_axes_0 = const()[name = string("out_253_axes_0"), val = tensor([1])]; fp16 var_6022_to_fp16 = const()[name = string("op_6022_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_253_cast_fp16 = layer_norm(axes = out_253_axes_0, epsilon = var_6022_to_fp16, x = x_379_cast_fp16)[name = string("out_253_cast_fp16")]; tensor layer_layers_21_input_layernorm_weight_to_fp16 = const()[name = string("layer_layers_21_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(636158080)))]; tensor out_255_cast_fp16 = mul(x = out_253_cast_fp16, y = layer_layers_21_input_layernorm_weight_to_fp16)[name = string("out_255_cast_fp16")]; tensor var_6028_split_sizes_0 = const()[name = string("op_6028_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_6028_axis_0 = const()[name = string("op_6028_axis_0"), val = int32(1)]; tensor var_6028_cast_fp16_0, tensor var_6028_cast_fp16_1 = split(axis = var_6028_axis_0, split_sizes = var_6028_split_sizes_0, x = out_255_cast_fp16)[name = string("op_6028_cast_fp16")]; string query_states_85_pad_type_0 = const()[name = string("query_states_85_pad_type_0"), val = string("valid")]; tensor query_states_85_strides_0 = const()[name = string("query_states_85_strides_0"), val = tensor([1, 1])]; tensor query_states_85_pad_0 = const()[name = string("query_states_85_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_states_85_dilations_0 = const()[name = string("query_states_85_dilations_0"), val = tensor([1, 1])]; int32 query_states_85_groups_0 = const()[name = string("query_states_85_groups_0"), val = int32(1)]; tensor var_6050_to_fp16 = const()[name = string("op_6050_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(636162240)))]; tensor query_states_85_cast_fp16 = conv(dilations = query_states_85_dilations_0, groups = query_states_85_groups_0, pad = query_states_85_pad_0, pad_type = query_states_85_pad_type_0, strides = query_states_85_strides_0, weight = var_6050_to_fp16, x = var_6028_cast_fp16_0)[name = string("query_states_85_cast_fp16")]; string key_states_85_pad_type_0 = const()[name = string("key_states_85_pad_type_0"), val = string("valid")]; tensor key_states_85_strides_0 = const()[name = string("key_states_85_strides_0"), val = tensor([1, 1])]; tensor key_states_85_pad_0 = const()[name = string("key_states_85_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_states_85_dilations_0 = const()[name = string("key_states_85_dilations_0"), val = tensor([1, 1])]; int32 key_states_85_groups_0 = const()[name = string("key_states_85_groups_0"), val = int32(1)]; tensor var_6061_to_fp16 = const()[name = string("op_6061_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(638259456)))]; tensor key_states_85_cast_fp16 = conv(dilations = key_states_85_dilations_0, groups = key_states_85_groups_0, pad = key_states_85_pad_0, pad_type = key_states_85_pad_type_0, strides = key_states_85_strides_0, weight = var_6061_to_fp16, x = var_6028_cast_fp16_0)[name = string("key_states_85_cast_fp16")]; string value_states_85_pad_type_0 = const()[name = string("value_states_85_pad_type_0"), val = string("valid")]; tensor value_states_85_strides_0 = const()[name = string("value_states_85_strides_0"), val = tensor([1, 1])]; tensor value_states_85_pad_0 = const()[name = string("value_states_85_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_states_85_dilations_0 = const()[name = string("value_states_85_dilations_0"), val = tensor([1, 1])]; int32 value_states_85_groups_0 = const()[name = string("value_states_85_groups_0"), val = int32(1)]; tensor var_6072_to_fp16 = const()[name = string("op_6072_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(638521664)))]; tensor value_states_85_cast_fp16 = conv(dilations = value_states_85_dilations_0, groups = value_states_85_groups_0, pad = value_states_85_pad_0, pad_type = value_states_85_pad_type_0, strides = value_states_85_strides_0, weight = var_6072_to_fp16, x = var_6028_cast_fp16_0)[name = string("value_states_85_cast_fp16")]; tensor var_6080 = const()[name = string("op_6080"), val = tensor([1, 16, 64, 32])]; tensor embed_85_cast_fp16 = reshape(shape = var_6080, x = query_states_85_cast_fp16)[name = string("embed_85_cast_fp16")]; tensor var_6084 = const()[name = string("op_6084"), val = tensor([1, 2, 64, 32])]; tensor var_6085_cast_fp16 = reshape(shape = var_6084, x = key_states_85_cast_fp16)[name = string("op_6085_cast_fp16")]; tensor embed_87_perm_0 = const()[name = string("embed_87_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_6091 = const()[name = string("op_6091"), val = tensor([1, 2, 64, 32])]; tensor var_6092_cast_fp16 = reshape(shape = var_6091, x = value_states_85_cast_fp16)[name = string("op_6092_cast_fp16")]; tensor value_states_87_perm_0 = const()[name = string("value_states_87_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_6096_cast_fp16 = mul(x = embed_85_cast_fp16, y = cos_1_cast_fp16)[name = string("op_6096_cast_fp16")]; tensor var_6097_split_sizes_0 = const()[name = string("op_6097_split_sizes_0"), val = tensor([32, 32])]; int32 var_6097_axis_0 = const()[name = string("op_6097_axis_0"), val = int32(-2)]; tensor var_6097_cast_fp16_0, tensor var_6097_cast_fp16_1 = split(axis = var_6097_axis_0, split_sizes = var_6097_split_sizes_0, x = embed_85_cast_fp16)[name = string("op_6097_cast_fp16")]; fp16 const_217_promoted_to_fp16 = const()[name = string("const_217_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_6099_cast_fp16 = mul(x = var_6097_cast_fp16_1, y = const_217_promoted_to_fp16)[name = string("op_6099_cast_fp16")]; int32 var_6101 = const()[name = string("op_6101"), val = int32(-2)]; bool var_6102_interleave_0 = const()[name = string("op_6102_interleave_0"), val = bool(false)]; tensor var_6102_cast_fp16 = concat(axis = var_6101, interleave = var_6102_interleave_0, values = (var_6099_cast_fp16, var_6097_cast_fp16_0))[name = string("op_6102_cast_fp16")]; tensor var_6103_cast_fp16 = mul(x = var_6102_cast_fp16, y = sin_1_cast_fp16)[name = string("op_6103_cast_fp16")]; tensor query_states_87_cast_fp16 = add(x = var_6096_cast_fp16, y = var_6103_cast_fp16)[name = string("query_states_87_cast_fp16")]; tensor embed_87_cast_fp16 = transpose(perm = embed_87_perm_0, x = var_6085_cast_fp16)[name = string("transpose_8")]; tensor var_6106_cast_fp16 = mul(x = embed_87_cast_fp16, y = cos_cast_fp16)[name = string("op_6106_cast_fp16")]; tensor var_6107_split_sizes_0 = const()[name = string("op_6107_split_sizes_0"), val = tensor([32, 32])]; int32 var_6107_axis_0 = const()[name = string("op_6107_axis_0"), val = int32(-1)]; tensor var_6107_cast_fp16_0, tensor var_6107_cast_fp16_1 = split(axis = var_6107_axis_0, split_sizes = var_6107_split_sizes_0, x = embed_87_cast_fp16)[name = string("op_6107_cast_fp16")]; fp16 const_218_promoted_to_fp16 = const()[name = string("const_218_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_6109_cast_fp16 = mul(x = var_6107_cast_fp16_1, y = const_218_promoted_to_fp16)[name = string("op_6109_cast_fp16")]; int32 var_6111 = const()[name = string("op_6111"), val = int32(-1)]; bool var_6112_interleave_0 = const()[name = string("op_6112_interleave_0"), val = bool(false)]; tensor var_6112_cast_fp16 = concat(axis = var_6111, interleave = var_6112_interleave_0, values = (var_6109_cast_fp16, var_6107_cast_fp16_0))[name = string("op_6112_cast_fp16")]; tensor var_6113_cast_fp16 = mul(x = var_6112_cast_fp16, y = sin_cast_fp16)[name = string("op_6113_cast_fp16")]; tensor key_states_87_cast_fp16 = add(x = var_6106_cast_fp16, y = var_6113_cast_fp16)[name = string("key_states_87_cast_fp16")]; tensor expand_dims_211 = const()[name = string("expand_dims_211"), val = tensor([21])]; tensor expand_dims_212 = const()[name = string("expand_dims_212"), val = tensor([0])]; tensor expand_dims_214 = const()[name = string("expand_dims_214"), val = tensor([22])]; tensor concat_171_values3_0 = const()[name = string("concat_171_values3_0"), val = tensor([0])]; int32 concat_171_axis_0 = const()[name = string("concat_171_axis_0"), val = int32(0)]; bool concat_171_interleave_0 = const()[name = string("concat_171_interleave_0"), val = bool(false)]; tensor concat_171 = concat(axis = concat_171_axis_0, interleave = concat_171_interleave_0, values = (expand_dims_211, expand_dims_212, position_id, concat_171_values3_0))[name = string("concat_171")]; tensor concat_172_values1_0 = const()[name = string("concat_172_values1_0"), val = tensor([0])]; tensor concat_172_values3_0 = const()[name = string("concat_172_values3_0"), val = tensor([0])]; int32 concat_172_axis_0 = const()[name = string("concat_172_axis_0"), val = int32(0)]; bool concat_172_interleave_0 = const()[name = string("concat_172_interleave_0"), val = bool(false)]; tensor concat_172 = concat(axis = concat_172_axis_0, interleave = concat_172_interleave_0, values = (expand_dims_214, concat_172_values1_0, var_426, concat_172_values3_0))[name = string("concat_172")]; tensor key_cache_internal_tensor_assign_22_stride_0 = const()[name = string("key_cache_internal_tensor_assign_22_stride_0"), val = tensor([1, 1, 1, 1])]; tensor key_cache_internal_tensor_assign_22_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_22_begin_mask_0"), val = tensor([false, false, false, true])]; tensor key_cache_internal_tensor_assign_22_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_22_end_mask_0"), val = tensor([false, true, false, true])]; tensor key_cache_internal_tensor_assign_22_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_22_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor key_cache_internal_tensor_assign_22_cast_fp16 = slice_update(begin = concat_171, begin_mask = key_cache_internal_tensor_assign_22_begin_mask_0, end = concat_172, end_mask = key_cache_internal_tensor_assign_22_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_22_squeeze_mask_0, stride = key_cache_internal_tensor_assign_22_stride_0, update = key_states_87_cast_fp16, x = coreml_update_state_88)[name = string("key_cache_internal_tensor_assign_22_cast_fp16")]; write_state(data = key_cache_internal_tensor_assign_22_cast_fp16, input = key_cache)[name = string("coreml_update_state_352_write_state")]; tensor coreml_update_state_90 = read_state(input = key_cache)[name = string("coreml_update_state_352")]; tensor value_cache_internal_tensor_assign_22_stride_0 = const()[name = string("value_cache_internal_tensor_assign_22_stride_0"), val = tensor([1, 1, 1, 1])]; tensor value_cache_internal_tensor_assign_22_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_22_begin_mask_0"), val = tensor([false, false, false, true])]; tensor value_cache_internal_tensor_assign_22_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_22_end_mask_0"), val = tensor([false, true, false, true])]; tensor value_cache_internal_tensor_assign_22_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_22_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor value_states_87_cast_fp16 = transpose(perm = value_states_87_perm_0, x = var_6092_cast_fp16)[name = string("transpose_7")]; tensor value_cache_internal_tensor_assign_22_cast_fp16 = slice_update(begin = concat_171, begin_mask = value_cache_internal_tensor_assign_22_begin_mask_0, end = concat_172, end_mask = value_cache_internal_tensor_assign_22_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_22_squeeze_mask_0, stride = value_cache_internal_tensor_assign_22_stride_0, update = value_states_87_cast_fp16, x = coreml_update_state_89)[name = string("value_cache_internal_tensor_assign_22_cast_fp16")]; write_state(data = value_cache_internal_tensor_assign_22_cast_fp16, input = value_cache)[name = string("coreml_update_state_353_write_state")]; tensor coreml_update_state_91 = read_state(input = value_cache)[name = string("coreml_update_state_353")]; tensor var_6156_begin_0 = const()[name = string("op_6156_begin_0"), val = tensor([21, 0, 0, 0])]; tensor var_6156_end_0 = const()[name = string("op_6156_end_0"), val = tensor([22, 2, 2048, 64])]; tensor var_6156_end_mask_0 = const()[name = string("op_6156_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_6156_cast_fp16 = slice_by_index(begin = var_6156_begin_0, end = var_6156_end_0, end_mask = var_6156_end_mask_0, x = coreml_update_state_90)[name = string("op_6156_cast_fp16")]; tensor tile_42 = const()[name = string("tile_42"), val = tensor([1, 1])]; int32 var_6159_axis_0 = const()[name = string("op_6159_axis_0"), val = int32(1)]; tensor var_6159_cast_fp16_0, tensor var_6159_cast_fp16_1 = split(axis = var_6159_axis_0, split_sizes = tile_42, x = var_6156_cast_fp16)[name = string("op_6159_cast_fp16")]; tensor var_6166_begin_0 = const()[name = string("op_6166_begin_0"), val = tensor([21, 0, 0, 0])]; tensor var_6166_end_0 = const()[name = string("op_6166_end_0"), val = tensor([22, 2, 2048, 64])]; tensor var_6166_end_mask_0 = const()[name = string("op_6166_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_6166_cast_fp16 = slice_by_index(begin = var_6166_begin_0, end = var_6166_end_0, end_mask = var_6166_end_mask_0, x = coreml_update_state_91)[name = string("op_6166_cast_fp16")]; tensor tile_43 = const()[name = string("tile_43"), val = tensor([1, 1])]; int32 var_6169_axis_0 = const()[name = string("op_6169_axis_0"), val = int32(1)]; tensor var_6169_cast_fp16_0, tensor var_6169_cast_fp16_1 = split(axis = var_6169_axis_0, split_sizes = tile_43, x = var_6166_cast_fp16)[name = string("op_6169_cast_fp16")]; tensor var_6172_split_sizes_0 = const()[name = string("op_6172_split_sizes_0"), val = tensor([8, 8])]; int32 var_6172_axis_0 = const()[name = string("op_6172_axis_0"), val = int32(1)]; tensor var_6172_cast_fp16_0, tensor var_6172_cast_fp16_1 = split(axis = var_6172_axis_0, split_sizes = var_6172_split_sizes_0, x = query_states_87_cast_fp16)[name = string("op_6172_cast_fp16")]; bool attn_weights_337_transpose_x_0 = const()[name = string("attn_weights_337_transpose_x_0"), val = bool(false)]; bool attn_weights_337_transpose_y_0 = const()[name = string("attn_weights_337_transpose_y_0"), val = bool(false)]; tensor attn_weights_337_cast_fp16 = matmul(transpose_x = attn_weights_337_transpose_x_0, transpose_y = attn_weights_337_transpose_y_0, x = var_6159_cast_fp16_0, y = var_6172_cast_fp16_0)[name = string("attn_weights_337_cast_fp16")]; fp16 _inversed_attn_weights_339_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_339_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_339_cast_fp16 = mul(x = attn_weights_337_cast_fp16, y = _inversed_attn_weights_339_y_0_to_fp16)[name = string("_inversed_attn_weights_339_cast_fp16")]; tensor attn_weights_341_cast_fp16 = add(x = _inversed_attn_weights_339_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_341_cast_fp16")]; int32 var_6179 = const()[name = string("op_6179"), val = int32(2)]; tensor attn_weights_343_cast_fp16 = softmax(axis = var_6179, x = attn_weights_341_cast_fp16)[name = string("attn_weights_343_cast_fp16")]; bool var_6185_transpose_x_1 = const()[name = string("op_6185_transpose_x_1"), val = bool(true)]; bool var_6185_transpose_y_1 = const()[name = string("op_6185_transpose_y_1"), val = bool(false)]; tensor var_6185_cast_fp16 = matmul(transpose_x = var_6185_transpose_x_1, transpose_y = var_6185_transpose_y_1, x = attn_weights_343_cast_fp16, y = var_6169_cast_fp16_0)[name = string("op_6185_cast_fp16")]; bool attn_weights_345_transpose_x_0 = const()[name = string("attn_weights_345_transpose_x_0"), val = bool(false)]; bool attn_weights_345_transpose_y_0 = const()[name = string("attn_weights_345_transpose_y_0"), val = bool(false)]; tensor attn_weights_345_cast_fp16 = matmul(transpose_x = attn_weights_345_transpose_x_0, transpose_y = attn_weights_345_transpose_y_0, x = var_6159_cast_fp16_1, y = var_6172_cast_fp16_1)[name = string("attn_weights_345_cast_fp16")]; fp16 _inversed_attn_weights_347_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_347_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_347_cast_fp16 = mul(x = attn_weights_345_cast_fp16, y = _inversed_attn_weights_347_y_0_to_fp16)[name = string("_inversed_attn_weights_347_cast_fp16")]; tensor attn_weights_349_cast_fp16 = add(x = _inversed_attn_weights_347_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_349_cast_fp16")]; int32 var_6191 = const()[name = string("op_6191"), val = int32(2)]; tensor attn_weights_351_cast_fp16 = softmax(axis = var_6191, x = attn_weights_349_cast_fp16)[name = string("attn_weights_351_cast_fp16")]; bool attn_output_127_transpose_x_1 = const()[name = string("attn_output_127_transpose_x_1"), val = bool(true)]; bool attn_output_127_transpose_y_1 = const()[name = string("attn_output_127_transpose_y_1"), val = bool(false)]; tensor attn_output_127_cast_fp16 = matmul(transpose_x = attn_output_127_transpose_x_1, transpose_y = attn_output_127_transpose_y_1, x = attn_weights_351_cast_fp16, y = var_6169_cast_fp16_1)[name = string("attn_output_127_cast_fp16")]; int32 var_6199 = const()[name = string("op_6199"), val = int32(1)]; bool attn_output_129_interleave_0 = const()[name = string("attn_output_129_interleave_0"), val = bool(false)]; tensor attn_output_129_cast_fp16 = concat(axis = var_6199, interleave = attn_output_129_interleave_0, values = (var_6185_cast_fp16, attn_output_127_cast_fp16))[name = string("attn_output_129_cast_fp16")]; tensor var_6203_perm_0 = const()[name = string("op_6203_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_6208 = const()[name = string("op_6208"), val = tensor([1, 1024, 1, 32])]; tensor var_6203_cast_fp16 = transpose(perm = var_6203_perm_0, x = attn_output_129_cast_fp16)[name = string("transpose_6")]; tensor x_383_cast_fp16 = reshape(shape = var_6208, x = var_6203_cast_fp16)[name = string("x_383_cast_fp16")]; string hidden_states_129_pad_type_0 = const()[name = string("hidden_states_129_pad_type_0"), val = string("valid")]; tensor hidden_states_129_strides_0 = const()[name = string("hidden_states_129_strides_0"), val = tensor([1, 1])]; tensor hidden_states_129_pad_0 = const()[name = string("hidden_states_129_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_129_dilations_0 = const()[name = string("hidden_states_129_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_129_groups_0 = const()[name = string("hidden_states_129_groups_0"), val = int32(1)]; tensor var_6215_to_fp16 = const()[name = string("op_6215_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(638783872)))]; tensor hidden_states_129_cast_fp16 = conv(dilations = hidden_states_129_dilations_0, groups = hidden_states_129_groups_0, pad = hidden_states_129_pad_0, pad_type = hidden_states_129_pad_type_0, strides = hidden_states_129_strides_0, weight = var_6215_to_fp16, x = x_383_cast_fp16)[name = string("hidden_states_129_cast_fp16")]; tensor x_385_cast_fp16 = add(x = x_377_cast_fp16, y = hidden_states_129_cast_fp16)[name = string("x_385_cast_fp16")]; int32 var_6227 = const()[name = string("op_6227"), val = int32(1)]; fp16 const_223_promoted_to_fp16 = const()[name = string("const_223_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_6230_cast_fp16 = mul(x = x_385_cast_fp16, y = const_223_promoted_to_fp16)[name = string("op_6230_cast_fp16")]; bool x_387_interleave_0 = const()[name = string("x_387_interleave_0"), val = bool(false)]; tensor x_387_cast_fp16 = concat(axis = var_6227, interleave = x_387_interleave_0, values = (x_385_cast_fp16, var_6230_cast_fp16))[name = string("x_387_cast_fp16")]; tensor out_259_axes_0 = const()[name = string("out_259_axes_0"), val = tensor([1])]; fp16 var_6240_to_fp16 = const()[name = string("op_6240_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_259_cast_fp16 = layer_norm(axes = out_259_axes_0, epsilon = var_6240_to_fp16, x = x_387_cast_fp16)[name = string("out_259_cast_fp16")]; tensor layer_layers_21_post_attention_layernorm_weight_to_fp16 = const()[name = string("layer_layers_21_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(640881088)))]; tensor out_261_cast_fp16 = mul(x = out_259_cast_fp16, y = layer_layers_21_post_attention_layernorm_weight_to_fp16)[name = string("out_261_cast_fp16")]; tensor var_6246_split_sizes_0 = const()[name = string("op_6246_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_6246_axis_0 = const()[name = string("op_6246_axis_0"), val = int32(1)]; tensor var_6246_cast_fp16_0, tensor var_6246_cast_fp16_1 = split(axis = var_6246_axis_0, split_sizes = var_6246_split_sizes_0, x = out_261_cast_fp16)[name = string("op_6246_cast_fp16")]; string input_43_pad_type_0 = const()[name = string("input_43_pad_type_0"), val = string("valid")]; tensor input_43_strides_0 = const()[name = string("input_43_strides_0"), val = tensor([1, 1])]; tensor input_43_pad_0 = const()[name = string("input_43_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_43_dilations_0 = const()[name = string("input_43_dilations_0"), val = tensor([1, 1])]; int32 input_43_groups_0 = const()[name = string("input_43_groups_0"), val = int32(1)]; tensor var_6251_to_fp16 = const()[name = string("op_6251_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(640885248)))]; tensor input_43_cast_fp16 = conv(dilations = input_43_dilations_0, groups = input_43_groups_0, pad = input_43_pad_0, pad_type = input_43_pad_type_0, strides = input_43_strides_0, weight = var_6251_to_fp16, x = var_6246_cast_fp16_0)[name = string("input_43_cast_fp16")]; tensor var_6262_cast_fp16 = silu(x = input_43_cast_fp16)[name = string("op_6262_cast_fp16")]; string var_6267_pad_type_0 = const()[name = string("op_6267_pad_type_0"), val = string("valid")]; tensor var_6267_strides_0 = const()[name = string("op_6267_strides_0"), val = tensor([1, 1])]; tensor var_6267_pad_0 = const()[name = string("op_6267_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_6267_dilations_0 = const()[name = string("op_6267_dilations_0"), val = tensor([1, 1])]; int32 var_6267_groups_0 = const()[name = string("op_6267_groups_0"), val = int32(1)]; tensor var_6250_to_fp16 = const()[name = string("op_6250_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(649273920)))]; tensor var_6267_cast_fp16 = conv(dilations = var_6267_dilations_0, groups = var_6267_groups_0, pad = var_6267_pad_0, pad_type = var_6267_pad_type_0, strides = var_6267_strides_0, weight = var_6250_to_fp16, x = var_6246_cast_fp16_0)[name = string("op_6267_cast_fp16")]; tensor x_393_cast_fp16 = mul(x = var_6262_cast_fp16, y = var_6267_cast_fp16)[name = string("x_393_cast_fp16")]; string hidden_states_131_pad_type_0 = const()[name = string("hidden_states_131_pad_type_0"), val = string("valid")]; tensor hidden_states_131_strides_0 = const()[name = string("hidden_states_131_strides_0"), val = tensor([1, 1])]; tensor hidden_states_131_pad_0 = const()[name = string("hidden_states_131_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_131_dilations_0 = const()[name = string("hidden_states_131_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_131_groups_0 = const()[name = string("hidden_states_131_groups_0"), val = int32(1)]; tensor var_6249_to_fp16 = const()[name = string("op_6249_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(657662592)))]; tensor hidden_states_131_cast_fp16 = conv(dilations = hidden_states_131_dilations_0, groups = hidden_states_131_groups_0, pad = hidden_states_131_pad_0, pad_type = hidden_states_131_pad_type_0, strides = hidden_states_131_strides_0, weight = var_6249_to_fp16, x = x_393_cast_fp16)[name = string("hidden_states_131_cast_fp16")]; tensor x_395_cast_fp16 = add(x = x_385_cast_fp16, y = hidden_states_131_cast_fp16)[name = string("x_395_cast_fp16")]; int32 var_6280 = const()[name = string("op_6280"), val = int32(1)]; fp16 const_224_promoted_to_fp16 = const()[name = string("const_224_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_6283_cast_fp16 = mul(x = x_395_cast_fp16, y = const_224_promoted_to_fp16)[name = string("op_6283_cast_fp16")]; bool x_397_interleave_0 = const()[name = string("x_397_interleave_0"), val = bool(false)]; tensor x_397_cast_fp16 = concat(axis = var_6280, interleave = x_397_interleave_0, values = (x_395_cast_fp16, var_6283_cast_fp16))[name = string("x_397_cast_fp16")]; tensor out_265_axes_0 = const()[name = string("out_265_axes_0"), val = tensor([1])]; fp16 var_6293_to_fp16 = const()[name = string("op_6293_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_265_cast_fp16 = layer_norm(axes = out_265_axes_0, epsilon = var_6293_to_fp16, x = x_397_cast_fp16)[name = string("out_265_cast_fp16")]; tensor layer_layers_22_input_layernorm_weight_to_fp16 = const()[name = string("layer_layers_22_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(666051264)))]; tensor out_267_cast_fp16 = mul(x = out_265_cast_fp16, y = layer_layers_22_input_layernorm_weight_to_fp16)[name = string("out_267_cast_fp16")]; tensor var_6299_split_sizes_0 = const()[name = string("op_6299_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_6299_axis_0 = const()[name = string("op_6299_axis_0"), val = int32(1)]; tensor var_6299_cast_fp16_0, tensor var_6299_cast_fp16_1 = split(axis = var_6299_axis_0, split_sizes = var_6299_split_sizes_0, x = out_267_cast_fp16)[name = string("op_6299_cast_fp16")]; string query_states_89_pad_type_0 = const()[name = string("query_states_89_pad_type_0"), val = string("valid")]; tensor query_states_89_strides_0 = const()[name = string("query_states_89_strides_0"), val = tensor([1, 1])]; tensor query_states_89_pad_0 = const()[name = string("query_states_89_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_states_89_dilations_0 = const()[name = string("query_states_89_dilations_0"), val = tensor([1, 1])]; int32 query_states_89_groups_0 = const()[name = string("query_states_89_groups_0"), val = int32(1)]; tensor var_6321_to_fp16 = const()[name = string("op_6321_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(666055424)))]; tensor query_states_89_cast_fp16 = conv(dilations = query_states_89_dilations_0, groups = query_states_89_groups_0, pad = query_states_89_pad_0, pad_type = query_states_89_pad_type_0, strides = query_states_89_strides_0, weight = var_6321_to_fp16, x = var_6299_cast_fp16_0)[name = string("query_states_89_cast_fp16")]; string key_states_89_pad_type_0 = const()[name = string("key_states_89_pad_type_0"), val = string("valid")]; tensor key_states_89_strides_0 = const()[name = string("key_states_89_strides_0"), val = tensor([1, 1])]; tensor key_states_89_pad_0 = const()[name = string("key_states_89_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_states_89_dilations_0 = const()[name = string("key_states_89_dilations_0"), val = tensor([1, 1])]; int32 key_states_89_groups_0 = const()[name = string("key_states_89_groups_0"), val = int32(1)]; tensor var_6332_to_fp16 = const()[name = string("op_6332_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(668152640)))]; tensor key_states_89_cast_fp16 = conv(dilations = key_states_89_dilations_0, groups = key_states_89_groups_0, pad = key_states_89_pad_0, pad_type = key_states_89_pad_type_0, strides = key_states_89_strides_0, weight = var_6332_to_fp16, x = var_6299_cast_fp16_0)[name = string("key_states_89_cast_fp16")]; string value_states_89_pad_type_0 = const()[name = string("value_states_89_pad_type_0"), val = string("valid")]; tensor value_states_89_strides_0 = const()[name = string("value_states_89_strides_0"), val = tensor([1, 1])]; tensor value_states_89_pad_0 = const()[name = string("value_states_89_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_states_89_dilations_0 = const()[name = string("value_states_89_dilations_0"), val = tensor([1, 1])]; int32 value_states_89_groups_0 = const()[name = string("value_states_89_groups_0"), val = int32(1)]; tensor var_6343_to_fp16 = const()[name = string("op_6343_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(668414848)))]; tensor value_states_89_cast_fp16 = conv(dilations = value_states_89_dilations_0, groups = value_states_89_groups_0, pad = value_states_89_pad_0, pad_type = value_states_89_pad_type_0, strides = value_states_89_strides_0, weight = var_6343_to_fp16, x = var_6299_cast_fp16_0)[name = string("value_states_89_cast_fp16")]; tensor var_6351 = const()[name = string("op_6351"), val = tensor([1, 16, 64, 32])]; tensor embed_89_cast_fp16 = reshape(shape = var_6351, x = query_states_89_cast_fp16)[name = string("embed_89_cast_fp16")]; tensor var_6355 = const()[name = string("op_6355"), val = tensor([1, 2, 64, 32])]; tensor var_6356_cast_fp16 = reshape(shape = var_6355, x = key_states_89_cast_fp16)[name = string("op_6356_cast_fp16")]; tensor embed_91_perm_0 = const()[name = string("embed_91_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_6362 = const()[name = string("op_6362"), val = tensor([1, 2, 64, 32])]; tensor var_6363_cast_fp16 = reshape(shape = var_6362, x = value_states_89_cast_fp16)[name = string("op_6363_cast_fp16")]; tensor value_states_91_perm_0 = const()[name = string("value_states_91_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_6367_cast_fp16 = mul(x = embed_89_cast_fp16, y = cos_1_cast_fp16)[name = string("op_6367_cast_fp16")]; tensor var_6368_split_sizes_0 = const()[name = string("op_6368_split_sizes_0"), val = tensor([32, 32])]; int32 var_6368_axis_0 = const()[name = string("op_6368_axis_0"), val = int32(-2)]; tensor var_6368_cast_fp16_0, tensor var_6368_cast_fp16_1 = split(axis = var_6368_axis_0, split_sizes = var_6368_split_sizes_0, x = embed_89_cast_fp16)[name = string("op_6368_cast_fp16")]; fp16 const_227_promoted_to_fp16 = const()[name = string("const_227_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_6370_cast_fp16 = mul(x = var_6368_cast_fp16_1, y = const_227_promoted_to_fp16)[name = string("op_6370_cast_fp16")]; int32 var_6372 = const()[name = string("op_6372"), val = int32(-2)]; bool var_6373_interleave_0 = const()[name = string("op_6373_interleave_0"), val = bool(false)]; tensor var_6373_cast_fp16 = concat(axis = var_6372, interleave = var_6373_interleave_0, values = (var_6370_cast_fp16, var_6368_cast_fp16_0))[name = string("op_6373_cast_fp16")]; tensor var_6374_cast_fp16 = mul(x = var_6373_cast_fp16, y = sin_1_cast_fp16)[name = string("op_6374_cast_fp16")]; tensor query_states_91_cast_fp16 = add(x = var_6367_cast_fp16, y = var_6374_cast_fp16)[name = string("query_states_91_cast_fp16")]; tensor embed_91_cast_fp16 = transpose(perm = embed_91_perm_0, x = var_6356_cast_fp16)[name = string("transpose_5")]; tensor var_6377_cast_fp16 = mul(x = embed_91_cast_fp16, y = cos_cast_fp16)[name = string("op_6377_cast_fp16")]; tensor var_6378_split_sizes_0 = const()[name = string("op_6378_split_sizes_0"), val = tensor([32, 32])]; int32 var_6378_axis_0 = const()[name = string("op_6378_axis_0"), val = int32(-1)]; tensor var_6378_cast_fp16_0, tensor var_6378_cast_fp16_1 = split(axis = var_6378_axis_0, split_sizes = var_6378_split_sizes_0, x = embed_91_cast_fp16)[name = string("op_6378_cast_fp16")]; fp16 const_228_promoted_to_fp16 = const()[name = string("const_228_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_6380_cast_fp16 = mul(x = var_6378_cast_fp16_1, y = const_228_promoted_to_fp16)[name = string("op_6380_cast_fp16")]; int32 var_6382 = const()[name = string("op_6382"), val = int32(-1)]; bool var_6383_interleave_0 = const()[name = string("op_6383_interleave_0"), val = bool(false)]; tensor var_6383_cast_fp16 = concat(axis = var_6382, interleave = var_6383_interleave_0, values = (var_6380_cast_fp16, var_6378_cast_fp16_0))[name = string("op_6383_cast_fp16")]; tensor var_6384_cast_fp16 = mul(x = var_6383_cast_fp16, y = sin_cast_fp16)[name = string("op_6384_cast_fp16")]; tensor key_states_91_cast_fp16 = add(x = var_6377_cast_fp16, y = var_6384_cast_fp16)[name = string("key_states_91_cast_fp16")]; tensor expand_dims_221 = const()[name = string("expand_dims_221"), val = tensor([22])]; tensor expand_dims_222 = const()[name = string("expand_dims_222"), val = tensor([0])]; tensor expand_dims_224 = const()[name = string("expand_dims_224"), val = tensor([23])]; tensor concat_179_values3_0 = const()[name = string("concat_179_values3_0"), val = tensor([0])]; int32 concat_179_axis_0 = const()[name = string("concat_179_axis_0"), val = int32(0)]; bool concat_179_interleave_0 = const()[name = string("concat_179_interleave_0"), val = bool(false)]; tensor concat_179 = concat(axis = concat_179_axis_0, interleave = concat_179_interleave_0, values = (expand_dims_221, expand_dims_222, position_id, concat_179_values3_0))[name = string("concat_179")]; tensor concat_180_values1_0 = const()[name = string("concat_180_values1_0"), val = tensor([0])]; tensor concat_180_values3_0 = const()[name = string("concat_180_values3_0"), val = tensor([0])]; int32 concat_180_axis_0 = const()[name = string("concat_180_axis_0"), val = int32(0)]; bool concat_180_interleave_0 = const()[name = string("concat_180_interleave_0"), val = bool(false)]; tensor concat_180 = concat(axis = concat_180_axis_0, interleave = concat_180_interleave_0, values = (expand_dims_224, concat_180_values1_0, var_426, concat_180_values3_0))[name = string("concat_180")]; tensor key_cache_internal_tensor_assign_23_stride_0 = const()[name = string("key_cache_internal_tensor_assign_23_stride_0"), val = tensor([1, 1, 1, 1])]; tensor key_cache_internal_tensor_assign_23_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_23_begin_mask_0"), val = tensor([false, false, false, true])]; tensor key_cache_internal_tensor_assign_23_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_23_end_mask_0"), val = tensor([false, true, false, true])]; tensor key_cache_internal_tensor_assign_23_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_23_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor key_cache_internal_tensor_assign_23_cast_fp16 = slice_update(begin = concat_179, begin_mask = key_cache_internal_tensor_assign_23_begin_mask_0, end = concat_180, end_mask = key_cache_internal_tensor_assign_23_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_23_squeeze_mask_0, stride = key_cache_internal_tensor_assign_23_stride_0, update = key_states_91_cast_fp16, x = coreml_update_state_90)[name = string("key_cache_internal_tensor_assign_23_cast_fp16")]; write_state(data = key_cache_internal_tensor_assign_23_cast_fp16, input = key_cache)[name = string("coreml_update_state_354_write_state")]; tensor coreml_update_state_92 = read_state(input = key_cache)[name = string("coreml_update_state_354")]; tensor value_cache_internal_tensor_assign_23_stride_0 = const()[name = string("value_cache_internal_tensor_assign_23_stride_0"), val = tensor([1, 1, 1, 1])]; tensor value_cache_internal_tensor_assign_23_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_23_begin_mask_0"), val = tensor([false, false, false, true])]; tensor value_cache_internal_tensor_assign_23_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_23_end_mask_0"), val = tensor([false, true, false, true])]; tensor value_cache_internal_tensor_assign_23_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_23_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor value_states_91_cast_fp16 = transpose(perm = value_states_91_perm_0, x = var_6363_cast_fp16)[name = string("transpose_4")]; tensor value_cache_internal_tensor_assign_23_cast_fp16 = slice_update(begin = concat_179, begin_mask = value_cache_internal_tensor_assign_23_begin_mask_0, end = concat_180, end_mask = value_cache_internal_tensor_assign_23_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_23_squeeze_mask_0, stride = value_cache_internal_tensor_assign_23_stride_0, update = value_states_91_cast_fp16, x = coreml_update_state_91)[name = string("value_cache_internal_tensor_assign_23_cast_fp16")]; write_state(data = value_cache_internal_tensor_assign_23_cast_fp16, input = value_cache)[name = string("coreml_update_state_355_write_state")]; tensor coreml_update_state_93 = read_state(input = value_cache)[name = string("coreml_update_state_355")]; tensor var_6427_begin_0 = const()[name = string("op_6427_begin_0"), val = tensor([22, 0, 0, 0])]; tensor var_6427_end_0 = const()[name = string("op_6427_end_0"), val = tensor([23, 2, 2048, 64])]; tensor var_6427_end_mask_0 = const()[name = string("op_6427_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_6427_cast_fp16 = slice_by_index(begin = var_6427_begin_0, end = var_6427_end_0, end_mask = var_6427_end_mask_0, x = coreml_update_state_92)[name = string("op_6427_cast_fp16")]; tensor tile_44 = const()[name = string("tile_44"), val = tensor([1, 1])]; int32 var_6430_axis_0 = const()[name = string("op_6430_axis_0"), val = int32(1)]; tensor var_6430_cast_fp16_0, tensor var_6430_cast_fp16_1 = split(axis = var_6430_axis_0, split_sizes = tile_44, x = var_6427_cast_fp16)[name = string("op_6430_cast_fp16")]; tensor var_6437_begin_0 = const()[name = string("op_6437_begin_0"), val = tensor([22, 0, 0, 0])]; tensor var_6437_end_0 = const()[name = string("op_6437_end_0"), val = tensor([23, 2, 2048, 64])]; tensor var_6437_end_mask_0 = const()[name = string("op_6437_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_6437_cast_fp16 = slice_by_index(begin = var_6437_begin_0, end = var_6437_end_0, end_mask = var_6437_end_mask_0, x = coreml_update_state_93)[name = string("op_6437_cast_fp16")]; tensor tile_45 = const()[name = string("tile_45"), val = tensor([1, 1])]; int32 var_6440_axis_0 = const()[name = string("op_6440_axis_0"), val = int32(1)]; tensor var_6440_cast_fp16_0, tensor var_6440_cast_fp16_1 = split(axis = var_6440_axis_0, split_sizes = tile_45, x = var_6437_cast_fp16)[name = string("op_6440_cast_fp16")]; tensor var_6443_split_sizes_0 = const()[name = string("op_6443_split_sizes_0"), val = tensor([8, 8])]; int32 var_6443_axis_0 = const()[name = string("op_6443_axis_0"), val = int32(1)]; tensor var_6443_cast_fp16_0, tensor var_6443_cast_fp16_1 = split(axis = var_6443_axis_0, split_sizes = var_6443_split_sizes_0, x = query_states_91_cast_fp16)[name = string("op_6443_cast_fp16")]; bool attn_weights_353_transpose_x_0 = const()[name = string("attn_weights_353_transpose_x_0"), val = bool(false)]; bool attn_weights_353_transpose_y_0 = const()[name = string("attn_weights_353_transpose_y_0"), val = bool(false)]; tensor attn_weights_353_cast_fp16 = matmul(transpose_x = attn_weights_353_transpose_x_0, transpose_y = attn_weights_353_transpose_y_0, x = var_6430_cast_fp16_0, y = var_6443_cast_fp16_0)[name = string("attn_weights_353_cast_fp16")]; fp16 _inversed_attn_weights_355_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_355_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_355_cast_fp16 = mul(x = attn_weights_353_cast_fp16, y = _inversed_attn_weights_355_y_0_to_fp16)[name = string("_inversed_attn_weights_355_cast_fp16")]; tensor attn_weights_357_cast_fp16 = add(x = _inversed_attn_weights_355_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_357_cast_fp16")]; int32 var_6450 = const()[name = string("op_6450"), val = int32(2)]; tensor attn_weights_359_cast_fp16 = softmax(axis = var_6450, x = attn_weights_357_cast_fp16)[name = string("attn_weights_359_cast_fp16")]; bool var_6456_transpose_x_1 = const()[name = string("op_6456_transpose_x_1"), val = bool(true)]; bool var_6456_transpose_y_1 = const()[name = string("op_6456_transpose_y_1"), val = bool(false)]; tensor var_6456_cast_fp16 = matmul(transpose_x = var_6456_transpose_x_1, transpose_y = var_6456_transpose_y_1, x = attn_weights_359_cast_fp16, y = var_6440_cast_fp16_0)[name = string("op_6456_cast_fp16")]; bool attn_weights_361_transpose_x_0 = const()[name = string("attn_weights_361_transpose_x_0"), val = bool(false)]; bool attn_weights_361_transpose_y_0 = const()[name = string("attn_weights_361_transpose_y_0"), val = bool(false)]; tensor attn_weights_361_cast_fp16 = matmul(transpose_x = attn_weights_361_transpose_x_0, transpose_y = attn_weights_361_transpose_y_0, x = var_6430_cast_fp16_1, y = var_6443_cast_fp16_1)[name = string("attn_weights_361_cast_fp16")]; fp16 _inversed_attn_weights_363_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_363_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_363_cast_fp16 = mul(x = attn_weights_361_cast_fp16, y = _inversed_attn_weights_363_y_0_to_fp16)[name = string("_inversed_attn_weights_363_cast_fp16")]; tensor attn_weights_365_cast_fp16 = add(x = _inversed_attn_weights_363_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_365_cast_fp16")]; int32 var_6462 = const()[name = string("op_6462"), val = int32(2)]; tensor attn_weights_367_cast_fp16 = softmax(axis = var_6462, x = attn_weights_365_cast_fp16)[name = string("attn_weights_367_cast_fp16")]; bool attn_output_133_transpose_x_1 = const()[name = string("attn_output_133_transpose_x_1"), val = bool(true)]; bool attn_output_133_transpose_y_1 = const()[name = string("attn_output_133_transpose_y_1"), val = bool(false)]; tensor attn_output_133_cast_fp16 = matmul(transpose_x = attn_output_133_transpose_x_1, transpose_y = attn_output_133_transpose_y_1, x = attn_weights_367_cast_fp16, y = var_6440_cast_fp16_1)[name = string("attn_output_133_cast_fp16")]; int32 var_6470 = const()[name = string("op_6470"), val = int32(1)]; bool attn_output_135_interleave_0 = const()[name = string("attn_output_135_interleave_0"), val = bool(false)]; tensor attn_output_135_cast_fp16 = concat(axis = var_6470, interleave = attn_output_135_interleave_0, values = (var_6456_cast_fp16, attn_output_133_cast_fp16))[name = string("attn_output_135_cast_fp16")]; tensor var_6474_perm_0 = const()[name = string("op_6474_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_6479 = const()[name = string("op_6479"), val = tensor([1, 1024, 1, 32])]; tensor var_6474_cast_fp16 = transpose(perm = var_6474_perm_0, x = attn_output_135_cast_fp16)[name = string("transpose_3")]; tensor x_401_cast_fp16 = reshape(shape = var_6479, x = var_6474_cast_fp16)[name = string("x_401_cast_fp16")]; string hidden_states_135_pad_type_0 = const()[name = string("hidden_states_135_pad_type_0"), val = string("valid")]; tensor hidden_states_135_strides_0 = const()[name = string("hidden_states_135_strides_0"), val = tensor([1, 1])]; tensor hidden_states_135_pad_0 = const()[name = string("hidden_states_135_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_135_dilations_0 = const()[name = string("hidden_states_135_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_135_groups_0 = const()[name = string("hidden_states_135_groups_0"), val = int32(1)]; tensor var_6486_to_fp16 = const()[name = string("op_6486_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(668677056)))]; tensor hidden_states_135_cast_fp16 = conv(dilations = hidden_states_135_dilations_0, groups = hidden_states_135_groups_0, pad = hidden_states_135_pad_0, pad_type = hidden_states_135_pad_type_0, strides = hidden_states_135_strides_0, weight = var_6486_to_fp16, x = x_401_cast_fp16)[name = string("hidden_states_135_cast_fp16")]; tensor x_403_cast_fp16 = add(x = x_395_cast_fp16, y = hidden_states_135_cast_fp16)[name = string("x_403_cast_fp16")]; int32 var_6498 = const()[name = string("op_6498"), val = int32(1)]; fp16 const_233_promoted_to_fp16 = const()[name = string("const_233_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_6501_cast_fp16 = mul(x = x_403_cast_fp16, y = const_233_promoted_to_fp16)[name = string("op_6501_cast_fp16")]; bool x_405_interleave_0 = const()[name = string("x_405_interleave_0"), val = bool(false)]; tensor x_405_cast_fp16 = concat(axis = var_6498, interleave = x_405_interleave_0, values = (x_403_cast_fp16, var_6501_cast_fp16))[name = string("x_405_cast_fp16")]; tensor out_271_axes_0 = const()[name = string("out_271_axes_0"), val = tensor([1])]; fp16 var_6511_to_fp16 = const()[name = string("op_6511_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_271_cast_fp16 = layer_norm(axes = out_271_axes_0, epsilon = var_6511_to_fp16, x = x_405_cast_fp16)[name = string("out_271_cast_fp16")]; tensor layer_layers_22_post_attention_layernorm_weight_to_fp16 = const()[name = string("layer_layers_22_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(670774272)))]; tensor out_273_cast_fp16 = mul(x = out_271_cast_fp16, y = layer_layers_22_post_attention_layernorm_weight_to_fp16)[name = string("out_273_cast_fp16")]; tensor var_6517_split_sizes_0 = const()[name = string("op_6517_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_6517_axis_0 = const()[name = string("op_6517_axis_0"), val = int32(1)]; tensor var_6517_cast_fp16_0, tensor var_6517_cast_fp16_1 = split(axis = var_6517_axis_0, split_sizes = var_6517_split_sizes_0, x = out_273_cast_fp16)[name = string("op_6517_cast_fp16")]; string input_45_pad_type_0 = const()[name = string("input_45_pad_type_0"), val = string("valid")]; tensor input_45_strides_0 = const()[name = string("input_45_strides_0"), val = tensor([1, 1])]; tensor input_45_pad_0 = const()[name = string("input_45_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_45_dilations_0 = const()[name = string("input_45_dilations_0"), val = tensor([1, 1])]; int32 input_45_groups_0 = const()[name = string("input_45_groups_0"), val = int32(1)]; tensor var_6522_to_fp16 = const()[name = string("op_6522_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(670778432)))]; tensor input_45_cast_fp16 = conv(dilations = input_45_dilations_0, groups = input_45_groups_0, pad = input_45_pad_0, pad_type = input_45_pad_type_0, strides = input_45_strides_0, weight = var_6522_to_fp16, x = var_6517_cast_fp16_0)[name = string("input_45_cast_fp16")]; tensor var_6533_cast_fp16 = silu(x = input_45_cast_fp16)[name = string("op_6533_cast_fp16")]; string var_6538_pad_type_0 = const()[name = string("op_6538_pad_type_0"), val = string("valid")]; tensor var_6538_strides_0 = const()[name = string("op_6538_strides_0"), val = tensor([1, 1])]; tensor var_6538_pad_0 = const()[name = string("op_6538_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_6538_dilations_0 = const()[name = string("op_6538_dilations_0"), val = tensor([1, 1])]; int32 var_6538_groups_0 = const()[name = string("op_6538_groups_0"), val = int32(1)]; tensor var_6521_to_fp16 = const()[name = string("op_6521_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(679167104)))]; tensor var_6538_cast_fp16 = conv(dilations = var_6538_dilations_0, groups = var_6538_groups_0, pad = var_6538_pad_0, pad_type = var_6538_pad_type_0, strides = var_6538_strides_0, weight = var_6521_to_fp16, x = var_6517_cast_fp16_0)[name = string("op_6538_cast_fp16")]; tensor x_411_cast_fp16 = mul(x = var_6533_cast_fp16, y = var_6538_cast_fp16)[name = string("x_411_cast_fp16")]; string hidden_states_137_pad_type_0 = const()[name = string("hidden_states_137_pad_type_0"), val = string("valid")]; tensor hidden_states_137_strides_0 = const()[name = string("hidden_states_137_strides_0"), val = tensor([1, 1])]; tensor hidden_states_137_pad_0 = const()[name = string("hidden_states_137_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_137_dilations_0 = const()[name = string("hidden_states_137_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_137_groups_0 = const()[name = string("hidden_states_137_groups_0"), val = int32(1)]; tensor var_6520_to_fp16 = const()[name = string("op_6520_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(687555776)))]; tensor hidden_states_137_cast_fp16 = conv(dilations = hidden_states_137_dilations_0, groups = hidden_states_137_groups_0, pad = hidden_states_137_pad_0, pad_type = hidden_states_137_pad_type_0, strides = hidden_states_137_strides_0, weight = var_6520_to_fp16, x = x_411_cast_fp16)[name = string("hidden_states_137_cast_fp16")]; tensor x_413_cast_fp16 = add(x = x_403_cast_fp16, y = hidden_states_137_cast_fp16)[name = string("x_413_cast_fp16")]; int32 var_6551 = const()[name = string("op_6551"), val = int32(1)]; fp16 const_234_promoted_to_fp16 = const()[name = string("const_234_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_6554_cast_fp16 = mul(x = x_413_cast_fp16, y = const_234_promoted_to_fp16)[name = string("op_6554_cast_fp16")]; bool x_415_interleave_0 = const()[name = string("x_415_interleave_0"), val = bool(false)]; tensor x_415_cast_fp16 = concat(axis = var_6551, interleave = x_415_interleave_0, values = (x_413_cast_fp16, var_6554_cast_fp16))[name = string("x_415_cast_fp16")]; tensor out_277_axes_0 = const()[name = string("out_277_axes_0"), val = tensor([1])]; fp16 var_6564_to_fp16 = const()[name = string("op_6564_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_277_cast_fp16 = layer_norm(axes = out_277_axes_0, epsilon = var_6564_to_fp16, x = x_415_cast_fp16)[name = string("out_277_cast_fp16")]; tensor layer_layers_23_input_layernorm_weight_to_fp16 = const()[name = string("layer_layers_23_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(695944448)))]; tensor out_279_cast_fp16 = mul(x = out_277_cast_fp16, y = layer_layers_23_input_layernorm_weight_to_fp16)[name = string("out_279_cast_fp16")]; tensor var_6570_split_sizes_0 = const()[name = string("op_6570_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_6570_axis_0 = const()[name = string("op_6570_axis_0"), val = int32(1)]; tensor var_6570_cast_fp16_0, tensor var_6570_cast_fp16_1 = split(axis = var_6570_axis_0, split_sizes = var_6570_split_sizes_0, x = out_279_cast_fp16)[name = string("op_6570_cast_fp16")]; string query_states_93_pad_type_0 = const()[name = string("query_states_93_pad_type_0"), val = string("valid")]; tensor query_states_93_strides_0 = const()[name = string("query_states_93_strides_0"), val = tensor([1, 1])]; tensor query_states_93_pad_0 = const()[name = string("query_states_93_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_states_93_dilations_0 = const()[name = string("query_states_93_dilations_0"), val = tensor([1, 1])]; int32 query_states_93_groups_0 = const()[name = string("query_states_93_groups_0"), val = int32(1)]; tensor var_6592_to_fp16 = const()[name = string("op_6592_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(695948608)))]; tensor query_states_93_cast_fp16 = conv(dilations = query_states_93_dilations_0, groups = query_states_93_groups_0, pad = query_states_93_pad_0, pad_type = query_states_93_pad_type_0, strides = query_states_93_strides_0, weight = var_6592_to_fp16, x = var_6570_cast_fp16_0)[name = string("query_states_93_cast_fp16")]; string key_states_93_pad_type_0 = const()[name = string("key_states_93_pad_type_0"), val = string("valid")]; tensor key_states_93_strides_0 = const()[name = string("key_states_93_strides_0"), val = tensor([1, 1])]; tensor key_states_93_pad_0 = const()[name = string("key_states_93_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_states_93_dilations_0 = const()[name = string("key_states_93_dilations_0"), val = tensor([1, 1])]; int32 key_states_93_groups_0 = const()[name = string("key_states_93_groups_0"), val = int32(1)]; tensor var_6603_to_fp16 = const()[name = string("op_6603_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(698045824)))]; tensor key_states_93_cast_fp16 = conv(dilations = key_states_93_dilations_0, groups = key_states_93_groups_0, pad = key_states_93_pad_0, pad_type = key_states_93_pad_type_0, strides = key_states_93_strides_0, weight = var_6603_to_fp16, x = var_6570_cast_fp16_0)[name = string("key_states_93_cast_fp16")]; string value_states_93_pad_type_0 = const()[name = string("value_states_93_pad_type_0"), val = string("valid")]; tensor value_states_93_strides_0 = const()[name = string("value_states_93_strides_0"), val = tensor([1, 1])]; tensor value_states_93_pad_0 = const()[name = string("value_states_93_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_states_93_dilations_0 = const()[name = string("value_states_93_dilations_0"), val = tensor([1, 1])]; int32 value_states_93_groups_0 = const()[name = string("value_states_93_groups_0"), val = int32(1)]; tensor var_6614_to_fp16 = const()[name = string("op_6614_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(698308032)))]; tensor value_states_93_cast_fp16 = conv(dilations = value_states_93_dilations_0, groups = value_states_93_groups_0, pad = value_states_93_pad_0, pad_type = value_states_93_pad_type_0, strides = value_states_93_strides_0, weight = var_6614_to_fp16, x = var_6570_cast_fp16_0)[name = string("value_states_93_cast_fp16")]; tensor var_6622 = const()[name = string("op_6622"), val = tensor([1, 16, 64, 32])]; tensor embed_93_cast_fp16 = reshape(shape = var_6622, x = query_states_93_cast_fp16)[name = string("embed_93_cast_fp16")]; tensor var_6626 = const()[name = string("op_6626"), val = tensor([1, 2, 64, 32])]; tensor var_6627_cast_fp16 = reshape(shape = var_6626, x = key_states_93_cast_fp16)[name = string("op_6627_cast_fp16")]; tensor embed_perm_0 = const()[name = string("embed_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_6633 = const()[name = string("op_6633"), val = tensor([1, 2, 64, 32])]; tensor var_6634_cast_fp16 = reshape(shape = var_6633, x = value_states_93_cast_fp16)[name = string("op_6634_cast_fp16")]; tensor value_states_perm_0 = const()[name = string("value_states_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_6638_cast_fp16 = mul(x = embed_93_cast_fp16, y = cos_1_cast_fp16)[name = string("op_6638_cast_fp16")]; tensor var_6639_split_sizes_0 = const()[name = string("op_6639_split_sizes_0"), val = tensor([32, 32])]; int32 var_6639_axis_0 = const()[name = string("op_6639_axis_0"), val = int32(-2)]; tensor var_6639_cast_fp16_0, tensor var_6639_cast_fp16_1 = split(axis = var_6639_axis_0, split_sizes = var_6639_split_sizes_0, x = embed_93_cast_fp16)[name = string("op_6639_cast_fp16")]; fp16 const_237_promoted_to_fp16 = const()[name = string("const_237_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_6641_cast_fp16 = mul(x = var_6639_cast_fp16_1, y = const_237_promoted_to_fp16)[name = string("op_6641_cast_fp16")]; int32 var_6643 = const()[name = string("op_6643"), val = int32(-2)]; bool var_6644_interleave_0 = const()[name = string("op_6644_interleave_0"), val = bool(false)]; tensor var_6644_cast_fp16 = concat(axis = var_6643, interleave = var_6644_interleave_0, values = (var_6641_cast_fp16, var_6639_cast_fp16_0))[name = string("op_6644_cast_fp16")]; tensor var_6645_cast_fp16 = mul(x = var_6644_cast_fp16, y = sin_1_cast_fp16)[name = string("op_6645_cast_fp16")]; tensor query_states_cast_fp16 = add(x = var_6638_cast_fp16, y = var_6645_cast_fp16)[name = string("query_states_cast_fp16")]; tensor embed_cast_fp16 = transpose(perm = embed_perm_0, x = var_6627_cast_fp16)[name = string("transpose_2")]; tensor var_6648_cast_fp16 = mul(x = embed_cast_fp16, y = cos_cast_fp16)[name = string("op_6648_cast_fp16")]; tensor var_6649_split_sizes_0 = const()[name = string("op_6649_split_sizes_0"), val = tensor([32, 32])]; int32 var_6649_axis_0 = const()[name = string("op_6649_axis_0"), val = int32(-1)]; tensor var_6649_cast_fp16_0, tensor var_6649_cast_fp16_1 = split(axis = var_6649_axis_0, split_sizes = var_6649_split_sizes_0, x = embed_cast_fp16)[name = string("op_6649_cast_fp16")]; fp16 const_238_promoted_to_fp16 = const()[name = string("const_238_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_6651_cast_fp16 = mul(x = var_6649_cast_fp16_1, y = const_238_promoted_to_fp16)[name = string("op_6651_cast_fp16")]; int32 var_6653 = const()[name = string("op_6653"), val = int32(-1)]; bool var_6654_interleave_0 = const()[name = string("op_6654_interleave_0"), val = bool(false)]; tensor var_6654_cast_fp16 = concat(axis = var_6653, interleave = var_6654_interleave_0, values = (var_6651_cast_fp16, var_6649_cast_fp16_0))[name = string("op_6654_cast_fp16")]; tensor var_6655_cast_fp16 = mul(x = var_6654_cast_fp16, y = sin_cast_fp16)[name = string("op_6655_cast_fp16")]; tensor key_states_cast_fp16 = add(x = var_6648_cast_fp16, y = var_6655_cast_fp16)[name = string("key_states_cast_fp16")]; tensor expand_dims_231 = const()[name = string("expand_dims_231"), val = tensor([23])]; tensor expand_dims_232 = const()[name = string("expand_dims_232"), val = tensor([0])]; tensor expand_dims_234 = const()[name = string("expand_dims_234"), val = tensor([24])]; tensor concat_187_values3_0 = const()[name = string("concat_187_values3_0"), val = tensor([0])]; int32 concat_187_axis_0 = const()[name = string("concat_187_axis_0"), val = int32(0)]; bool concat_187_interleave_0 = const()[name = string("concat_187_interleave_0"), val = bool(false)]; tensor concat_187 = concat(axis = concat_187_axis_0, interleave = concat_187_interleave_0, values = (expand_dims_231, expand_dims_232, position_id, concat_187_values3_0))[name = string("concat_187")]; tensor concat_188_values1_0 = const()[name = string("concat_188_values1_0"), val = tensor([0])]; tensor concat_188_values3_0 = const()[name = string("concat_188_values3_0"), val = tensor([0])]; int32 concat_188_axis_0 = const()[name = string("concat_188_axis_0"), val = int32(0)]; bool concat_188_interleave_0 = const()[name = string("concat_188_interleave_0"), val = bool(false)]; tensor concat_188 = concat(axis = concat_188_axis_0, interleave = concat_188_interleave_0, values = (expand_dims_234, concat_188_values1_0, var_426, concat_188_values3_0))[name = string("concat_188")]; tensor key_cache_internal_tensor_assign_24_stride_0 = const()[name = string("key_cache_internal_tensor_assign_24_stride_0"), val = tensor([1, 1, 1, 1])]; tensor key_cache_internal_tensor_assign_24_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_24_begin_mask_0"), val = tensor([false, false, false, true])]; tensor key_cache_internal_tensor_assign_24_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_24_end_mask_0"), val = tensor([false, true, false, true])]; tensor key_cache_internal_tensor_assign_24_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_24_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor key_cache_internal_tensor_assign_24_cast_fp16 = slice_update(begin = concat_187, begin_mask = key_cache_internal_tensor_assign_24_begin_mask_0, end = concat_188, end_mask = key_cache_internal_tensor_assign_24_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_24_squeeze_mask_0, stride = key_cache_internal_tensor_assign_24_stride_0, update = key_states_cast_fp16, x = coreml_update_state_92)[name = string("key_cache_internal_tensor_assign_24_cast_fp16")]; write_state(data = key_cache_internal_tensor_assign_24_cast_fp16, input = key_cache)[name = string("coreml_update_state_356_write_state")]; tensor coreml_update_state_94 = read_state(input = key_cache)[name = string("coreml_update_state_356")]; tensor value_cache_internal_tensor_assign_24_stride_0 = const()[name = string("value_cache_internal_tensor_assign_24_stride_0"), val = tensor([1, 1, 1, 1])]; tensor value_cache_internal_tensor_assign_24_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_24_begin_mask_0"), val = tensor([false, false, false, true])]; tensor value_cache_internal_tensor_assign_24_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_24_end_mask_0"), val = tensor([false, true, false, true])]; tensor value_cache_internal_tensor_assign_24_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_24_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor value_states_cast_fp16 = transpose(perm = value_states_perm_0, x = var_6634_cast_fp16)[name = string("transpose_1")]; tensor value_cache_internal_tensor_assign_24_cast_fp16 = slice_update(begin = concat_187, begin_mask = value_cache_internal_tensor_assign_24_begin_mask_0, end = concat_188, end_mask = value_cache_internal_tensor_assign_24_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_24_squeeze_mask_0, stride = value_cache_internal_tensor_assign_24_stride_0, update = value_states_cast_fp16, x = coreml_update_state_93)[name = string("value_cache_internal_tensor_assign_24_cast_fp16")]; write_state(data = value_cache_internal_tensor_assign_24_cast_fp16, input = value_cache)[name = string("coreml_update_state_357_write_state")]; tensor coreml_update_state_95 = read_state(input = value_cache)[name = string("coreml_update_state_357")]; tensor var_6698_begin_0 = const()[name = string("op_6698_begin_0"), val = tensor([23, 0, 0, 0])]; tensor var_6698_end_0 = const()[name = string("op_6698_end_0"), val = tensor([1, 2, 2048, 64])]; tensor var_6698_end_mask_0 = const()[name = string("op_6698_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_6698_cast_fp16 = slice_by_index(begin = var_6698_begin_0, end = var_6698_end_0, end_mask = var_6698_end_mask_0, x = coreml_update_state_94)[name = string("op_6698_cast_fp16")]; tensor tile_46 = const()[name = string("tile_46"), val = tensor([1, 1])]; int32 var_6701_axis_0 = const()[name = string("op_6701_axis_0"), val = int32(1)]; tensor var_6701_cast_fp16_0, tensor var_6701_cast_fp16_1 = split(axis = var_6701_axis_0, split_sizes = tile_46, x = var_6698_cast_fp16)[name = string("op_6701_cast_fp16")]; tensor var_6708_begin_0 = const()[name = string("op_6708_begin_0"), val = tensor([23, 0, 0, 0])]; tensor var_6708_end_0 = const()[name = string("op_6708_end_0"), val = tensor([1, 2, 2048, 64])]; tensor var_6708_end_mask_0 = const()[name = string("op_6708_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_6708_cast_fp16 = slice_by_index(begin = var_6708_begin_0, end = var_6708_end_0, end_mask = var_6708_end_mask_0, x = coreml_update_state_95)[name = string("op_6708_cast_fp16")]; tensor tile_47 = const()[name = string("tile_47"), val = tensor([1, 1])]; int32 var_6711_axis_0 = const()[name = string("op_6711_axis_0"), val = int32(1)]; tensor var_6711_cast_fp16_0, tensor var_6711_cast_fp16_1 = split(axis = var_6711_axis_0, split_sizes = tile_47, x = var_6708_cast_fp16)[name = string("op_6711_cast_fp16")]; tensor var_6714_split_sizes_0 = const()[name = string("op_6714_split_sizes_0"), val = tensor([8, 8])]; int32 var_6714_axis_0 = const()[name = string("op_6714_axis_0"), val = int32(1)]; tensor var_6714_cast_fp16_0, tensor var_6714_cast_fp16_1 = split(axis = var_6714_axis_0, split_sizes = var_6714_split_sizes_0, x = query_states_cast_fp16)[name = string("op_6714_cast_fp16")]; bool attn_weights_369_transpose_x_0 = const()[name = string("attn_weights_369_transpose_x_0"), val = bool(false)]; bool attn_weights_369_transpose_y_0 = const()[name = string("attn_weights_369_transpose_y_0"), val = bool(false)]; tensor attn_weights_369_cast_fp16 = matmul(transpose_x = attn_weights_369_transpose_x_0, transpose_y = attn_weights_369_transpose_y_0, x = var_6701_cast_fp16_0, y = var_6714_cast_fp16_0)[name = string("attn_weights_369_cast_fp16")]; fp16 _inversed_attn_weights_371_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_371_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_371_cast_fp16 = mul(x = attn_weights_369_cast_fp16, y = _inversed_attn_weights_371_y_0_to_fp16)[name = string("_inversed_attn_weights_371_cast_fp16")]; tensor attn_weights_373_cast_fp16 = add(x = _inversed_attn_weights_371_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_373_cast_fp16")]; int32 var_6721 = const()[name = string("op_6721"), val = int32(2)]; tensor attn_weights_375_cast_fp16 = softmax(axis = var_6721, x = attn_weights_373_cast_fp16)[name = string("attn_weights_375_cast_fp16")]; bool var_6727_transpose_x_1 = const()[name = string("op_6727_transpose_x_1"), val = bool(true)]; bool var_6727_transpose_y_1 = const()[name = string("op_6727_transpose_y_1"), val = bool(false)]; tensor var_6727_cast_fp16 = matmul(transpose_x = var_6727_transpose_x_1, transpose_y = var_6727_transpose_y_1, x = attn_weights_375_cast_fp16, y = var_6711_cast_fp16_0)[name = string("op_6727_cast_fp16")]; bool attn_weights_377_transpose_x_0 = const()[name = string("attn_weights_377_transpose_x_0"), val = bool(false)]; bool attn_weights_377_transpose_y_0 = const()[name = string("attn_weights_377_transpose_y_0"), val = bool(false)]; tensor attn_weights_377_cast_fp16 = matmul(transpose_x = attn_weights_377_transpose_x_0, transpose_y = attn_weights_377_transpose_y_0, x = var_6701_cast_fp16_1, y = var_6714_cast_fp16_1)[name = string("attn_weights_377_cast_fp16")]; fp16 _inversed_attn_weights_379_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_379_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_379_cast_fp16 = mul(x = attn_weights_377_cast_fp16, y = _inversed_attn_weights_379_y_0_to_fp16)[name = string("_inversed_attn_weights_379_cast_fp16")]; tensor attn_weights_381_cast_fp16 = add(x = _inversed_attn_weights_379_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_381_cast_fp16")]; int32 var_6733 = const()[name = string("op_6733"), val = int32(2)]; tensor attn_weights_cast_fp16 = softmax(axis = var_6733, x = attn_weights_381_cast_fp16)[name = string("attn_weights_cast_fp16")]; bool attn_output_139_transpose_x_1 = const()[name = string("attn_output_139_transpose_x_1"), val = bool(true)]; bool attn_output_139_transpose_y_1 = const()[name = string("attn_output_139_transpose_y_1"), val = bool(false)]; tensor attn_output_139_cast_fp16 = matmul(transpose_x = attn_output_139_transpose_x_1, transpose_y = attn_output_139_transpose_y_1, x = attn_weights_cast_fp16, y = var_6711_cast_fp16_1)[name = string("attn_output_139_cast_fp16")]; int32 var_6741 = const()[name = string("op_6741"), val = int32(1)]; bool attn_output_141_interleave_0 = const()[name = string("attn_output_141_interleave_0"), val = bool(false)]; tensor attn_output_141_cast_fp16 = concat(axis = var_6741, interleave = attn_output_141_interleave_0, values = (var_6727_cast_fp16, attn_output_139_cast_fp16))[name = string("attn_output_141_cast_fp16")]; tensor var_6745_perm_0 = const()[name = string("op_6745_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_6750 = const()[name = string("op_6750"), val = tensor([1, 1024, 1, 32])]; tensor var_6745_cast_fp16 = transpose(perm = var_6745_perm_0, x = attn_output_141_cast_fp16)[name = string("transpose_0")]; tensor x_419_cast_fp16 = reshape(shape = var_6750, x = var_6745_cast_fp16)[name = string("x_419_cast_fp16")]; string hidden_states_141_pad_type_0 = const()[name = string("hidden_states_141_pad_type_0"), val = string("valid")]; tensor hidden_states_141_strides_0 = const()[name = string("hidden_states_141_strides_0"), val = tensor([1, 1])]; tensor hidden_states_141_pad_0 = const()[name = string("hidden_states_141_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_141_dilations_0 = const()[name = string("hidden_states_141_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_141_groups_0 = const()[name = string("hidden_states_141_groups_0"), val = int32(1)]; tensor var_6757_to_fp16 = const()[name = string("op_6757_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(698570240)))]; tensor hidden_states_141_cast_fp16 = conv(dilations = hidden_states_141_dilations_0, groups = hidden_states_141_groups_0, pad = hidden_states_141_pad_0, pad_type = hidden_states_141_pad_type_0, strides = hidden_states_141_strides_0, weight = var_6757_to_fp16, x = x_419_cast_fp16)[name = string("hidden_states_141_cast_fp16")]; tensor x_421_cast_fp16 = add(x = x_413_cast_fp16, y = hidden_states_141_cast_fp16)[name = string("x_421_cast_fp16")]; int32 var_6769 = const()[name = string("op_6769"), val = int32(1)]; fp16 const_243_promoted_to_fp16 = const()[name = string("const_243_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_6772_cast_fp16 = mul(x = x_421_cast_fp16, y = const_243_promoted_to_fp16)[name = string("op_6772_cast_fp16")]; bool x_423_interleave_0 = const()[name = string("x_423_interleave_0"), val = bool(false)]; tensor x_423_cast_fp16 = concat(axis = var_6769, interleave = x_423_interleave_0, values = (x_421_cast_fp16, var_6772_cast_fp16))[name = string("x_423_cast_fp16")]; tensor out_283_axes_0 = const()[name = string("out_283_axes_0"), val = tensor([1])]; fp16 var_6782_to_fp16 = const()[name = string("op_6782_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_283_cast_fp16 = layer_norm(axes = out_283_axes_0, epsilon = var_6782_to_fp16, x = x_423_cast_fp16)[name = string("out_283_cast_fp16")]; tensor layer_layers_23_post_attention_layernorm_weight_to_fp16 = const()[name = string("layer_layers_23_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(700667456)))]; tensor out_285_cast_fp16 = mul(x = out_283_cast_fp16, y = layer_layers_23_post_attention_layernorm_weight_to_fp16)[name = string("out_285_cast_fp16")]; tensor var_6788_split_sizes_0 = const()[name = string("op_6788_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_6788_axis_0 = const()[name = string("op_6788_axis_0"), val = int32(1)]; tensor var_6788_cast_fp16_0, tensor var_6788_cast_fp16_1 = split(axis = var_6788_axis_0, split_sizes = var_6788_split_sizes_0, x = out_285_cast_fp16)[name = string("op_6788_cast_fp16")]; string input_pad_type_0 = const()[name = string("input_pad_type_0"), val = string("valid")]; tensor input_strides_0 = const()[name = string("input_strides_0"), val = tensor([1, 1])]; tensor input_pad_0 = const()[name = string("input_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_dilations_0 = const()[name = string("input_dilations_0"), val = tensor([1, 1])]; int32 input_groups_0 = const()[name = string("input_groups_0"), val = int32(1)]; tensor var_6793_to_fp16 = const()[name = string("op_6793_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(700671616)))]; tensor input_cast_fp16 = conv(dilations = input_dilations_0, groups = input_groups_0, pad = input_pad_0, pad_type = input_pad_type_0, strides = input_strides_0, weight = var_6793_to_fp16, x = var_6788_cast_fp16_0)[name = string("input_cast_fp16")]; tensor var_6804_cast_fp16 = silu(x = input_cast_fp16)[name = string("op_6804_cast_fp16")]; string var_6809_pad_type_0 = const()[name = string("op_6809_pad_type_0"), val = string("valid")]; tensor var_6809_strides_0 = const()[name = string("op_6809_strides_0"), val = tensor([1, 1])]; tensor var_6809_pad_0 = const()[name = string("op_6809_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_6809_dilations_0 = const()[name = string("op_6809_dilations_0"), val = tensor([1, 1])]; int32 var_6809_groups_0 = const()[name = string("op_6809_groups_0"), val = int32(1)]; tensor var_6792_to_fp16 = const()[name = string("op_6792_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(709060288)))]; tensor var_6809_cast_fp16 = conv(dilations = var_6809_dilations_0, groups = var_6809_groups_0, pad = var_6809_pad_0, pad_type = var_6809_pad_type_0, strides = var_6809_strides_0, weight = var_6792_to_fp16, x = var_6788_cast_fp16_0)[name = string("op_6809_cast_fp16")]; tensor x_429_cast_fp16 = mul(x = var_6804_cast_fp16, y = var_6809_cast_fp16)[name = string("x_429_cast_fp16")]; string hidden_states_pad_type_0 = const()[name = string("hidden_states_pad_type_0"), val = string("valid")]; tensor hidden_states_strides_0 = const()[name = string("hidden_states_strides_0"), val = tensor([1, 1])]; tensor hidden_states_pad_0 = const()[name = string("hidden_states_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_dilations_0 = const()[name = string("hidden_states_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_groups_0 = const()[name = string("hidden_states_groups_0"), val = int32(1)]; tensor var_6791_to_fp16 = const()[name = string("op_6791_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(717448960)))]; tensor hidden_states_cast_fp16 = conv(dilations = hidden_states_dilations_0, groups = hidden_states_groups_0, pad = hidden_states_pad_0, pad_type = hidden_states_pad_type_0, strides = hidden_states_strides_0, weight = var_6791_to_fp16, x = x_429_cast_fp16)[name = string("hidden_states_cast_fp16")]; tensor x_431_cast_fp16 = add(x = x_421_cast_fp16, y = hidden_states_cast_fp16)[name = string("x_431_cast_fp16")]; int32 var_6822 = const()[name = string("op_6822"), val = int32(1)]; fp16 const_244_promoted_to_fp16 = const()[name = string("const_244_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_6825_cast_fp16 = mul(x = x_431_cast_fp16, y = const_244_promoted_to_fp16)[name = string("op_6825_cast_fp16")]; bool x_433_interleave_0 = const()[name = string("x_433_interleave_0"), val = bool(false)]; tensor x_433_cast_fp16 = concat(axis = var_6822, interleave = x_433_interleave_0, values = (x_431_cast_fp16, var_6825_cast_fp16))[name = string("x_433_cast_fp16")]; tensor out_289_axes_0 = const()[name = string("out_289_axes_0"), val = tensor([1])]; fp16 var_6835_to_fp16 = const()[name = string("op_6835_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_289_cast_fp16 = layer_norm(axes = out_289_axes_0, epsilon = var_6835_to_fp16, x = x_433_cast_fp16)[name = string("out_289_cast_fp16")]; tensor layer_norm_weight_to_fp16 = const()[name = string("layer_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(725837632)))]; tensor out_291_cast_fp16 = mul(x = out_289_cast_fp16, y = layer_norm_weight_to_fp16)[name = string("out_291_cast_fp16")]; tensor var_6841_split_sizes_0 = const()[name = string("op_6841_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_6841_axis_0 = const()[name = string("op_6841_axis_0"), val = int32(1)]; tensor output, tensor var_6841_cast_fp16_1 = split(axis = var_6841_axis_0, split_sizes = var_6841_split_sizes_0, x = out_291_cast_fp16)[name = string("op_6841_cast_fp16")]; } -> (output); func length_64(tensor inputs_embeds, state> key_cache, tensor position_id, state> value_cache) { tensor var_260 = const()[name = string("op_260"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(726596608)))]; tensor position_ids_1 = add(x = var_260, y = position_id)[name = string("position_ids_1")]; int32 cos_batch_dims_0 = const()[name = string("cos_batch_dims_0"), val = int32(0)]; bool cos_validate_indices_0 = const()[name = string("cos_validate_indices_0"), val = bool(false)]; tensor layer_rope_emb_cos_cached_to_fp16 = const()[name = string("layer_rope_emb_cos_cached_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64)))]; int32 greater_equal_0_y_0 = const()[name = string("greater_equal_0_y_0"), val = int32(0)]; tensor greater_equal_0 = greater_equal(x = position_ids_1, y = greater_equal_0_y_0)[name = string("greater_equal_0")]; int32 slice_by_index_0 = const()[name = string("slice_by_index_0"), val = int32(32768)]; tensor add_0 = add(x = position_ids_1, y = slice_by_index_0)[name = string("add_0")]; tensor select_0 = select(a = position_ids_1, b = add_0, cond = greater_equal_0)[name = string("select_0")]; int32 cos_cast_fp16_axis_0 = const()[name = string("cos_cast_fp16_axis_0"), val = int32(0)]; tensor cos_cast_fp16 = gather(axis = cos_cast_fp16_axis_0, batch_dims = cos_batch_dims_0, indices = select_0, validate_indices = cos_validate_indices_0, x = layer_rope_emb_cos_cached_to_fp16)[name = string("cos_cast_fp16")]; int32 sin_batch_dims_0 = const()[name = string("sin_batch_dims_0"), val = int32(0)]; bool sin_validate_indices_0 = const()[name = string("sin_validate_indices_0"), val = bool(false)]; tensor layer_rope_emb_sin_cached_to_fp16 = const()[name = string("layer_rope_emb_sin_cached_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(4194432)))]; int32 sin_cast_fp16_axis_0 = const()[name = string("sin_cast_fp16_axis_0"), val = int32(0)]; tensor sin_cast_fp16 = gather(axis = sin_cast_fp16_axis_0, batch_dims = sin_batch_dims_0, indices = select_0, validate_indices = sin_validate_indices_0, x = layer_rope_emb_sin_cached_to_fp16)[name = string("sin_cast_fp16")]; tensor var_285 = const()[name = string("op_285"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(8388800)))]; tensor var_292_axes_0 = const()[name = string("op_292_axes_0"), val = tensor([2])]; tensor var_292 = expand_dims(axes = var_292_axes_0, x = position_ids_1)[name = string("op_292")]; tensor var_293 = greater(x = var_285, y = var_292)[name = string("op_293")]; tensor attention_mask_3_axes_0 = const()[name = string("attention_mask_3_axes_0"), val = tensor([1])]; string attention_mask_1_to_fp16_dtype_0 = const()[name = string("attention_mask_1_to_fp16_dtype_0"), val = string("fp16")]; tensor var_293_to_fp16 = cast(dtype = attention_mask_1_to_fp16_dtype_0, x = var_293)[name = string("cast_245")]; tensor attention_mask_3_cast_fp16 = expand_dims(axes = attention_mask_3_axes_0, x = var_293_to_fp16)[name = string("attention_mask_3_cast_fp16")]; fp16 var_301_promoted_to_fp16 = const()[name = string("op_301_promoted_to_fp16"), val = fp16(0x0p+0)]; tensor var_302_cast_fp16 = equal(x = attention_mask_3_cast_fp16, y = var_301_promoted_to_fp16)[name = string("op_302_cast_fp16")]; tensor var_303_after_broadcast_to_fp16 = const()[name = string("op_303_after_broadcast_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(726596928)))]; tensor attention_mask_cast_fp16 = select(a = attention_mask_3_cast_fp16, b = var_303_after_broadcast_to_fp16, cond = var_302_cast_fp16)[name = string("attention_mask_cast_fp16")]; tensor cos_1_perm_0 = const()[name = string("cos_1_perm_0"), val = tensor([0, -1, -2])]; tensor sin_1_perm_0 = const()[name = string("sin_1_perm_0"), val = tensor([0, -1, -2])]; tensor attn_mask_perm_0 = const()[name = string("attn_mask_perm_0"), val = tensor([0, 1, -1, -2])]; int32 var_318 = const()[name = string("op_318"), val = int32(1)]; fp16 const_4_promoted_to_fp16 = const()[name = string("const_4_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_321_cast_fp16 = mul(x = inputs_embeds, y = const_4_promoted_to_fp16)[name = string("op_321_cast_fp16")]; bool x_1_interleave_0 = const()[name = string("x_1_interleave_0"), val = bool(false)]; tensor x_1_cast_fp16 = concat(axis = var_318, interleave = x_1_interleave_0, values = (inputs_embeds, var_321_cast_fp16))[name = string("x_1_cast_fp16")]; tensor out_1_axes_0 = const()[name = string("out_1_axes_0"), val = tensor([1])]; fp16 var_331_to_fp16 = const()[name = string("op_331_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_1_cast_fp16 = layer_norm(axes = out_1_axes_0, epsilon = var_331_to_fp16, x = x_1_cast_fp16)[name = string("out_1_cast_fp16")]; tensor layer_layers_0_input_layernorm_weight_to_fp16 = const()[name = string("layer_layers_0_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(8401216)))]; tensor out_3_cast_fp16 = mul(x = out_1_cast_fp16, y = layer_layers_0_input_layernorm_weight_to_fp16)[name = string("out_3_cast_fp16")]; tensor var_337_split_sizes_0 = const()[name = string("op_337_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_337_axis_0 = const()[name = string("op_337_axis_0"), val = int32(1)]; tensor var_337_cast_fp16_0, tensor var_337_cast_fp16_1 = split(axis = var_337_axis_0, split_sizes = var_337_split_sizes_0, x = out_3_cast_fp16)[name = string("op_337_cast_fp16")]; tensor q_len_1 = const()[name = string("q_len_1"), val = tensor([64])]; string query_states_1_pad_type_0 = const()[name = string("query_states_1_pad_type_0"), val = string("valid")]; tensor query_states_1_strides_0 = const()[name = string("query_states_1_strides_0"), val = tensor([1, 1])]; tensor query_states_1_pad_0 = const()[name = string("query_states_1_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_states_1_dilations_0 = const()[name = string("query_states_1_dilations_0"), val = tensor([1, 1])]; int32 query_states_1_groups_0 = const()[name = string("query_states_1_groups_0"), val = int32(1)]; tensor var_359_to_fp16 = const()[name = string("op_359_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(8405376)))]; tensor query_states_1_cast_fp16 = conv(dilations = query_states_1_dilations_0, groups = query_states_1_groups_0, pad = query_states_1_pad_0, pad_type = query_states_1_pad_type_0, strides = query_states_1_strides_0, weight = var_359_to_fp16, x = var_337_cast_fp16_0)[name = string("query_states_1_cast_fp16")]; string key_states_1_pad_type_0 = const()[name = string("key_states_1_pad_type_0"), val = string("valid")]; tensor key_states_1_strides_0 = const()[name = string("key_states_1_strides_0"), val = tensor([1, 1])]; tensor key_states_1_pad_0 = const()[name = string("key_states_1_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_states_1_dilations_0 = const()[name = string("key_states_1_dilations_0"), val = tensor([1, 1])]; int32 key_states_1_groups_0 = const()[name = string("key_states_1_groups_0"), val = int32(1)]; tensor var_370_to_fp16 = const()[name = string("op_370_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(10502592)))]; tensor key_states_1_cast_fp16 = conv(dilations = key_states_1_dilations_0, groups = key_states_1_groups_0, pad = key_states_1_pad_0, pad_type = key_states_1_pad_type_0, strides = key_states_1_strides_0, weight = var_370_to_fp16, x = var_337_cast_fp16_0)[name = string("key_states_1_cast_fp16")]; string value_states_1_pad_type_0 = const()[name = string("value_states_1_pad_type_0"), val = string("valid")]; tensor value_states_1_strides_0 = const()[name = string("value_states_1_strides_0"), val = tensor([1, 1])]; tensor value_states_1_pad_0 = const()[name = string("value_states_1_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_states_1_dilations_0 = const()[name = string("value_states_1_dilations_0"), val = tensor([1, 1])]; int32 value_states_1_groups_0 = const()[name = string("value_states_1_groups_0"), val = int32(1)]; tensor var_381_to_fp16 = const()[name = string("op_381_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(10764800)))]; tensor value_states_1_cast_fp16 = conv(dilations = value_states_1_dilations_0, groups = value_states_1_groups_0, pad = value_states_1_pad_0, pad_type = value_states_1_pad_type_0, strides = value_states_1_strides_0, weight = var_381_to_fp16, x = var_337_cast_fp16_0)[name = string("value_states_1_cast_fp16")]; tensor var_389 = const()[name = string("op_389"), val = tensor([1, 16, 64, 64])]; tensor embed_1_cast_fp16 = reshape(shape = var_389, x = query_states_1_cast_fp16)[name = string("embed_1_cast_fp16")]; tensor var_393 = const()[name = string("op_393"), val = tensor([1, 2, 64, 64])]; tensor var_394_cast_fp16 = reshape(shape = var_393, x = key_states_1_cast_fp16)[name = string("op_394_cast_fp16")]; tensor embed_3_perm_0 = const()[name = string("embed_3_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_400 = const()[name = string("op_400"), val = tensor([1, 2, 64, 64])]; tensor var_401_cast_fp16 = reshape(shape = var_400, x = value_states_1_cast_fp16)[name = string("op_401_cast_fp16")]; tensor value_states_3_perm_0 = const()[name = string("value_states_3_perm_0"), val = tensor([0, 1, 3, 2])]; tensor cos_1_cast_fp16 = transpose(perm = cos_1_perm_0, x = cos_cast_fp16)[name = string("transpose_74")]; tensor var_405_cast_fp16 = mul(x = embed_1_cast_fp16, y = cos_1_cast_fp16)[name = string("op_405_cast_fp16")]; tensor var_406_split_sizes_0 = const()[name = string("op_406_split_sizes_0"), val = tensor([32, 32])]; int32 var_406_axis_0 = const()[name = string("op_406_axis_0"), val = int32(-2)]; tensor var_406_cast_fp16_0, tensor var_406_cast_fp16_1 = split(axis = var_406_axis_0, split_sizes = var_406_split_sizes_0, x = embed_1_cast_fp16)[name = string("op_406_cast_fp16")]; fp16 const_7_promoted_to_fp16 = const()[name = string("const_7_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_408_cast_fp16 = mul(x = var_406_cast_fp16_1, y = const_7_promoted_to_fp16)[name = string("op_408_cast_fp16")]; int32 var_410 = const()[name = string("op_410"), val = int32(-2)]; bool var_411_interleave_0 = const()[name = string("op_411_interleave_0"), val = bool(false)]; tensor var_411_cast_fp16 = concat(axis = var_410, interleave = var_411_interleave_0, values = (var_408_cast_fp16, var_406_cast_fp16_0))[name = string("op_411_cast_fp16")]; tensor sin_1_cast_fp16 = transpose(perm = sin_1_perm_0, x = sin_cast_fp16)[name = string("transpose_73")]; tensor var_412_cast_fp16 = mul(x = var_411_cast_fp16, y = sin_1_cast_fp16)[name = string("op_412_cast_fp16")]; tensor query_states_3_cast_fp16 = add(x = var_405_cast_fp16, y = var_412_cast_fp16)[name = string("query_states_3_cast_fp16")]; tensor embed_3_cast_fp16 = transpose(perm = embed_3_perm_0, x = var_394_cast_fp16)[name = string("transpose_71")]; tensor var_415_cast_fp16 = mul(x = embed_3_cast_fp16, y = cos_cast_fp16)[name = string("op_415_cast_fp16")]; tensor var_416_split_sizes_0 = const()[name = string("op_416_split_sizes_0"), val = tensor([32, 32])]; int32 var_416_axis_0 = const()[name = string("op_416_axis_0"), val = int32(-1)]; tensor var_416_cast_fp16_0, tensor var_416_cast_fp16_1 = split(axis = var_416_axis_0, split_sizes = var_416_split_sizes_0, x = embed_3_cast_fp16)[name = string("op_416_cast_fp16")]; fp16 const_8_promoted_to_fp16 = const()[name = string("const_8_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_418_cast_fp16 = mul(x = var_416_cast_fp16_1, y = const_8_promoted_to_fp16)[name = string("op_418_cast_fp16")]; int32 var_420 = const()[name = string("op_420"), val = int32(-1)]; bool var_421_interleave_0 = const()[name = string("op_421_interleave_0"), val = bool(false)]; tensor var_421_cast_fp16 = concat(axis = var_420, interleave = var_421_interleave_0, values = (var_418_cast_fp16, var_416_cast_fp16_0))[name = string("op_421_cast_fp16")]; tensor var_422_cast_fp16 = mul(x = var_421_cast_fp16, y = sin_cast_fp16)[name = string("op_422_cast_fp16")]; tensor key_states_3_cast_fp16 = add(x = var_415_cast_fp16, y = var_422_cast_fp16)[name = string("key_states_3_cast_fp16")]; tensor var_426 = add(x = position_id, y = q_len_1)[name = string("op_426")]; tensor read_state_0 = read_state(input = key_cache)[name = string("read_state_0")]; tensor expand_dims_1 = const()[name = string("expand_dims_1"), val = tensor([0])]; tensor expand_dims_2 = const()[name = string("expand_dims_2"), val = tensor([0])]; tensor expand_dims_4 = const()[name = string("expand_dims_4"), val = tensor([1])]; tensor concat_3_values3_0 = const()[name = string("concat_3_values3_0"), val = tensor([0])]; int32 concat_3_axis_0 = const()[name = string("concat_3_axis_0"), val = int32(0)]; bool concat_3_interleave_0 = const()[name = string("concat_3_interleave_0"), val = bool(false)]; tensor concat_3 = concat(axis = concat_3_axis_0, interleave = concat_3_interleave_0, values = (expand_dims_1, expand_dims_2, position_id, concat_3_values3_0))[name = string("concat_3")]; tensor concat_4_values1_0 = const()[name = string("concat_4_values1_0"), val = tensor([0])]; tensor concat_4_values3_0 = const()[name = string("concat_4_values3_0"), val = tensor([0])]; int32 concat_4_axis_0 = const()[name = string("concat_4_axis_0"), val = int32(0)]; bool concat_4_interleave_0 = const()[name = string("concat_4_interleave_0"), val = bool(false)]; tensor concat_4 = concat(axis = concat_4_axis_0, interleave = concat_4_interleave_0, values = (expand_dims_4, concat_4_values1_0, var_426, concat_4_values3_0))[name = string("concat_4")]; tensor key_cache_internal_tensor_assign_1_stride_0 = const()[name = string("key_cache_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1, 1])]; tensor key_cache_internal_tensor_assign_1_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false, true])]; tensor key_cache_internal_tensor_assign_1_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_1_end_mask_0"), val = tensor([false, true, false, true])]; tensor key_cache_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor key_cache_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_3, begin_mask = key_cache_internal_tensor_assign_1_begin_mask_0, end = concat_4, end_mask = key_cache_internal_tensor_assign_1_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_1_squeeze_mask_0, stride = key_cache_internal_tensor_assign_1_stride_0, update = key_states_3_cast_fp16, x = read_state_0)[name = string("key_cache_internal_tensor_assign_1_cast_fp16")]; write_state(data = key_cache_internal_tensor_assign_1_cast_fp16, input = key_cache)[name = string("coreml_update_state_358_write_state")]; tensor coreml_update_state_48 = read_state(input = key_cache)[name = string("coreml_update_state_358")]; tensor read_state_1 = read_state(input = value_cache)[name = string("read_state_1")]; tensor value_cache_internal_tensor_assign_1_stride_0 = const()[name = string("value_cache_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1, 1])]; tensor value_cache_internal_tensor_assign_1_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false, true])]; tensor value_cache_internal_tensor_assign_1_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_1_end_mask_0"), val = tensor([false, true, false, true])]; tensor value_cache_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor value_states_3_cast_fp16 = transpose(perm = value_states_3_perm_0, x = var_401_cast_fp16)[name = string("transpose_70")]; tensor value_cache_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_3, begin_mask = value_cache_internal_tensor_assign_1_begin_mask_0, end = concat_4, end_mask = value_cache_internal_tensor_assign_1_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_1_squeeze_mask_0, stride = value_cache_internal_tensor_assign_1_stride_0, update = value_states_3_cast_fp16, x = read_state_1)[name = string("value_cache_internal_tensor_assign_1_cast_fp16")]; write_state(data = value_cache_internal_tensor_assign_1_cast_fp16, input = value_cache)[name = string("coreml_update_state_359_write_state")]; tensor coreml_update_state_49 = read_state(input = value_cache)[name = string("coreml_update_state_359")]; tensor var_465_begin_0 = const()[name = string("op_465_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_465_end_0 = const()[name = string("op_465_end_0"), val = tensor([1, 2, 2048, 64])]; tensor var_465_end_mask_0 = const()[name = string("op_465_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_465_cast_fp16 = slice_by_index(begin = var_465_begin_0, end = var_465_end_0, end_mask = var_465_end_mask_0, x = coreml_update_state_48)[name = string("op_465_cast_fp16")]; tensor tile_0 = const()[name = string("tile_0"), val = tensor([1, 1])]; int32 var_468_axis_0 = const()[name = string("op_468_axis_0"), val = int32(1)]; tensor var_468_cast_fp16_0, tensor var_468_cast_fp16_1 = split(axis = var_468_axis_0, split_sizes = tile_0, x = var_465_cast_fp16)[name = string("op_468_cast_fp16")]; tensor var_475_begin_0 = const()[name = string("op_475_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_475_end_0 = const()[name = string("op_475_end_0"), val = tensor([1, 2, 2048, 64])]; tensor var_475_end_mask_0 = const()[name = string("op_475_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_475_cast_fp16 = slice_by_index(begin = var_475_begin_0, end = var_475_end_0, end_mask = var_475_end_mask_0, x = coreml_update_state_49)[name = string("op_475_cast_fp16")]; tensor tile_1 = const()[name = string("tile_1"), val = tensor([1, 1])]; int32 var_478_axis_0 = const()[name = string("op_478_axis_0"), val = int32(1)]; tensor var_478_cast_fp16_0, tensor var_478_cast_fp16_1 = split(axis = var_478_axis_0, split_sizes = tile_1, x = var_475_cast_fp16)[name = string("op_478_cast_fp16")]; tensor var_481_split_sizes_0 = const()[name = string("op_481_split_sizes_0"), val = tensor([8, 8])]; int32 var_481_axis_0 = const()[name = string("op_481_axis_0"), val = int32(1)]; tensor var_481_cast_fp16_0, tensor var_481_cast_fp16_1 = split(axis = var_481_axis_0, split_sizes = var_481_split_sizes_0, x = query_states_3_cast_fp16)[name = string("op_481_cast_fp16")]; bool attn_weights_1_transpose_x_0 = const()[name = string("attn_weights_1_transpose_x_0"), val = bool(false)]; bool attn_weights_1_transpose_y_0 = const()[name = string("attn_weights_1_transpose_y_0"), val = bool(false)]; tensor attn_weights_1_cast_fp16 = matmul(transpose_x = attn_weights_1_transpose_x_0, transpose_y = attn_weights_1_transpose_y_0, x = var_468_cast_fp16_0, y = var_481_cast_fp16_0)[name = string("attn_weights_1_cast_fp16")]; fp16 _inversed_attn_weights_3_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_3_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_3_cast_fp16 = mul(x = attn_weights_1_cast_fp16, y = _inversed_attn_weights_3_y_0_to_fp16)[name = string("_inversed_attn_weights_3_cast_fp16")]; tensor attn_mask_cast_fp16 = transpose(perm = attn_mask_perm_0, x = attention_mask_cast_fp16)[name = string("transpose_72")]; tensor attn_weights_5_cast_fp16 = add(x = _inversed_attn_weights_3_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_5_cast_fp16")]; int32 var_488 = const()[name = string("op_488"), val = int32(2)]; tensor attn_weights_7_cast_fp16 = softmax(axis = var_488, x = attn_weights_5_cast_fp16)[name = string("attn_weights_7_cast_fp16")]; bool var_494_transpose_x_1 = const()[name = string("op_494_transpose_x_1"), val = bool(true)]; bool var_494_transpose_y_1 = const()[name = string("op_494_transpose_y_1"), val = bool(false)]; tensor var_494_cast_fp16 = matmul(transpose_x = var_494_transpose_x_1, transpose_y = var_494_transpose_y_1, x = attn_weights_7_cast_fp16, y = var_478_cast_fp16_0)[name = string("op_494_cast_fp16")]; bool attn_weights_9_transpose_x_0 = const()[name = string("attn_weights_9_transpose_x_0"), val = bool(false)]; bool attn_weights_9_transpose_y_0 = const()[name = string("attn_weights_9_transpose_y_0"), val = bool(false)]; tensor attn_weights_9_cast_fp16 = matmul(transpose_x = attn_weights_9_transpose_x_0, transpose_y = attn_weights_9_transpose_y_0, x = var_468_cast_fp16_1, y = var_481_cast_fp16_1)[name = string("attn_weights_9_cast_fp16")]; fp16 _inversed_attn_weights_11_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_11_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_11_cast_fp16 = mul(x = attn_weights_9_cast_fp16, y = _inversed_attn_weights_11_y_0_to_fp16)[name = string("_inversed_attn_weights_11_cast_fp16")]; tensor attn_weights_13_cast_fp16 = add(x = _inversed_attn_weights_11_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_13_cast_fp16")]; int32 var_500 = const()[name = string("op_500"), val = int32(2)]; tensor attn_weights_15_cast_fp16 = softmax(axis = var_500, x = attn_weights_13_cast_fp16)[name = string("attn_weights_15_cast_fp16")]; bool attn_output_1_transpose_x_1 = const()[name = string("attn_output_1_transpose_x_1"), val = bool(true)]; bool attn_output_1_transpose_y_1 = const()[name = string("attn_output_1_transpose_y_1"), val = bool(false)]; tensor attn_output_1_cast_fp16 = matmul(transpose_x = attn_output_1_transpose_x_1, transpose_y = attn_output_1_transpose_y_1, x = attn_weights_15_cast_fp16, y = var_478_cast_fp16_1)[name = string("attn_output_1_cast_fp16")]; int32 var_508 = const()[name = string("op_508"), val = int32(1)]; bool attn_output_3_interleave_0 = const()[name = string("attn_output_3_interleave_0"), val = bool(false)]; tensor attn_output_3_cast_fp16 = concat(axis = var_508, interleave = attn_output_3_interleave_0, values = (var_494_cast_fp16, attn_output_1_cast_fp16))[name = string("attn_output_3_cast_fp16")]; tensor var_512_perm_0 = const()[name = string("op_512_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_517 = const()[name = string("op_517"), val = tensor([1, 1024, 1, 64])]; tensor var_512_cast_fp16 = transpose(perm = var_512_perm_0, x = attn_output_3_cast_fp16)[name = string("transpose_69")]; tensor x_5_cast_fp16 = reshape(shape = var_517, x = var_512_cast_fp16)[name = string("x_5_cast_fp16")]; string hidden_states_3_pad_type_0 = const()[name = string("hidden_states_3_pad_type_0"), val = string("valid")]; tensor hidden_states_3_strides_0 = const()[name = string("hidden_states_3_strides_0"), val = tensor([1, 1])]; tensor hidden_states_3_pad_0 = const()[name = string("hidden_states_3_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_3_dilations_0 = const()[name = string("hidden_states_3_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_3_groups_0 = const()[name = string("hidden_states_3_groups_0"), val = int32(1)]; tensor var_524_to_fp16 = const()[name = string("op_524_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(11027008)))]; tensor hidden_states_3_cast_fp16 = conv(dilations = hidden_states_3_dilations_0, groups = hidden_states_3_groups_0, pad = hidden_states_3_pad_0, pad_type = hidden_states_3_pad_type_0, strides = hidden_states_3_strides_0, weight = var_524_to_fp16, x = x_5_cast_fp16)[name = string("hidden_states_3_cast_fp16")]; tensor x_7_cast_fp16 = add(x = inputs_embeds, y = hidden_states_3_cast_fp16)[name = string("x_7_cast_fp16")]; int32 var_536 = const()[name = string("op_536"), val = int32(1)]; fp16 const_13_promoted_to_fp16 = const()[name = string("const_13_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_539_cast_fp16 = mul(x = x_7_cast_fp16, y = const_13_promoted_to_fp16)[name = string("op_539_cast_fp16")]; bool x_9_interleave_0 = const()[name = string("x_9_interleave_0"), val = bool(false)]; tensor x_9_cast_fp16 = concat(axis = var_536, interleave = x_9_interleave_0, values = (x_7_cast_fp16, var_539_cast_fp16))[name = string("x_9_cast_fp16")]; tensor out_7_axes_0 = const()[name = string("out_7_axes_0"), val = tensor([1])]; fp16 var_549_to_fp16 = const()[name = string("op_549_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_7_cast_fp16 = layer_norm(axes = out_7_axes_0, epsilon = var_549_to_fp16, x = x_9_cast_fp16)[name = string("out_7_cast_fp16")]; tensor layer_layers_0_post_attention_layernorm_weight_to_fp16 = const()[name = string("layer_layers_0_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(13124224)))]; tensor out_9_cast_fp16 = mul(x = out_7_cast_fp16, y = layer_layers_0_post_attention_layernorm_weight_to_fp16)[name = string("out_9_cast_fp16")]; tensor var_555_split_sizes_0 = const()[name = string("op_555_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_555_axis_0 = const()[name = string("op_555_axis_0"), val = int32(1)]; tensor var_555_cast_fp16_0, tensor var_555_cast_fp16_1 = split(axis = var_555_axis_0, split_sizes = var_555_split_sizes_0, x = out_9_cast_fp16)[name = string("op_555_cast_fp16")]; string input_1_pad_type_0 = const()[name = string("input_1_pad_type_0"), val = string("valid")]; tensor input_1_strides_0 = const()[name = string("input_1_strides_0"), val = tensor([1, 1])]; tensor input_1_pad_0 = const()[name = string("input_1_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_1_dilations_0 = const()[name = string("input_1_dilations_0"), val = tensor([1, 1])]; int32 input_1_groups_0 = const()[name = string("input_1_groups_0"), val = int32(1)]; tensor var_560_to_fp16 = const()[name = string("op_560_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(13128384)))]; tensor input_1_cast_fp16 = conv(dilations = input_1_dilations_0, groups = input_1_groups_0, pad = input_1_pad_0, pad_type = input_1_pad_type_0, strides = input_1_strides_0, weight = var_560_to_fp16, x = var_555_cast_fp16_0)[name = string("input_1_cast_fp16")]; tensor var_571_cast_fp16 = silu(x = input_1_cast_fp16)[name = string("op_571_cast_fp16")]; string var_576_pad_type_0 = const()[name = string("op_576_pad_type_0"), val = string("valid")]; tensor var_576_strides_0 = const()[name = string("op_576_strides_0"), val = tensor([1, 1])]; tensor var_576_pad_0 = const()[name = string("op_576_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_576_dilations_0 = const()[name = string("op_576_dilations_0"), val = tensor([1, 1])]; int32 var_576_groups_0 = const()[name = string("op_576_groups_0"), val = int32(1)]; tensor var_559_to_fp16 = const()[name = string("op_559_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(21517056)))]; tensor var_576_cast_fp16 = conv(dilations = var_576_dilations_0, groups = var_576_groups_0, pad = var_576_pad_0, pad_type = var_576_pad_type_0, strides = var_576_strides_0, weight = var_559_to_fp16, x = var_555_cast_fp16_0)[name = string("op_576_cast_fp16")]; tensor x_15_cast_fp16 = mul(x = var_571_cast_fp16, y = var_576_cast_fp16)[name = string("x_15_cast_fp16")]; string hidden_states_5_pad_type_0 = const()[name = string("hidden_states_5_pad_type_0"), val = string("valid")]; tensor hidden_states_5_strides_0 = const()[name = string("hidden_states_5_strides_0"), val = tensor([1, 1])]; tensor hidden_states_5_pad_0 = const()[name = string("hidden_states_5_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_5_dilations_0 = const()[name = string("hidden_states_5_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_5_groups_0 = const()[name = string("hidden_states_5_groups_0"), val = int32(1)]; tensor var_558_to_fp16 = const()[name = string("op_558_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(29905728)))]; tensor hidden_states_5_cast_fp16 = conv(dilations = hidden_states_5_dilations_0, groups = hidden_states_5_groups_0, pad = hidden_states_5_pad_0, pad_type = hidden_states_5_pad_type_0, strides = hidden_states_5_strides_0, weight = var_558_to_fp16, x = x_15_cast_fp16)[name = string("hidden_states_5_cast_fp16")]; tensor x_17_cast_fp16 = add(x = x_7_cast_fp16, y = hidden_states_5_cast_fp16)[name = string("x_17_cast_fp16")]; int32 var_589 = const()[name = string("op_589"), val = int32(1)]; fp16 const_14_promoted_to_fp16 = const()[name = string("const_14_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_592_cast_fp16 = mul(x = x_17_cast_fp16, y = const_14_promoted_to_fp16)[name = string("op_592_cast_fp16")]; bool x_19_interleave_0 = const()[name = string("x_19_interleave_0"), val = bool(false)]; tensor x_19_cast_fp16 = concat(axis = var_589, interleave = x_19_interleave_0, values = (x_17_cast_fp16, var_592_cast_fp16))[name = string("x_19_cast_fp16")]; tensor out_13_axes_0 = const()[name = string("out_13_axes_0"), val = tensor([1])]; fp16 var_602_to_fp16 = const()[name = string("op_602_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_13_cast_fp16 = layer_norm(axes = out_13_axes_0, epsilon = var_602_to_fp16, x = x_19_cast_fp16)[name = string("out_13_cast_fp16")]; tensor layer_layers_1_input_layernorm_weight_to_fp16 = const()[name = string("layer_layers_1_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(38294400)))]; tensor out_15_cast_fp16 = mul(x = out_13_cast_fp16, y = layer_layers_1_input_layernorm_weight_to_fp16)[name = string("out_15_cast_fp16")]; tensor var_608_split_sizes_0 = const()[name = string("op_608_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_608_axis_0 = const()[name = string("op_608_axis_0"), val = int32(1)]; tensor var_608_cast_fp16_0, tensor var_608_cast_fp16_1 = split(axis = var_608_axis_0, split_sizes = var_608_split_sizes_0, x = out_15_cast_fp16)[name = string("op_608_cast_fp16")]; string query_states_5_pad_type_0 = const()[name = string("query_states_5_pad_type_0"), val = string("valid")]; tensor query_states_5_strides_0 = const()[name = string("query_states_5_strides_0"), val = tensor([1, 1])]; tensor query_states_5_pad_0 = const()[name = string("query_states_5_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_states_5_dilations_0 = const()[name = string("query_states_5_dilations_0"), val = tensor([1, 1])]; int32 query_states_5_groups_0 = const()[name = string("query_states_5_groups_0"), val = int32(1)]; tensor var_630_to_fp16 = const()[name = string("op_630_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(38298560)))]; tensor query_states_5_cast_fp16 = conv(dilations = query_states_5_dilations_0, groups = query_states_5_groups_0, pad = query_states_5_pad_0, pad_type = query_states_5_pad_type_0, strides = query_states_5_strides_0, weight = var_630_to_fp16, x = var_608_cast_fp16_0)[name = string("query_states_5_cast_fp16")]; string key_states_5_pad_type_0 = const()[name = string("key_states_5_pad_type_0"), val = string("valid")]; tensor key_states_5_strides_0 = const()[name = string("key_states_5_strides_0"), val = tensor([1, 1])]; tensor key_states_5_pad_0 = const()[name = string("key_states_5_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_states_5_dilations_0 = const()[name = string("key_states_5_dilations_0"), val = tensor([1, 1])]; int32 key_states_5_groups_0 = const()[name = string("key_states_5_groups_0"), val = int32(1)]; tensor var_641_to_fp16 = const()[name = string("op_641_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(40395776)))]; tensor key_states_5_cast_fp16 = conv(dilations = key_states_5_dilations_0, groups = key_states_5_groups_0, pad = key_states_5_pad_0, pad_type = key_states_5_pad_type_0, strides = key_states_5_strides_0, weight = var_641_to_fp16, x = var_608_cast_fp16_0)[name = string("key_states_5_cast_fp16")]; string value_states_5_pad_type_0 = const()[name = string("value_states_5_pad_type_0"), val = string("valid")]; tensor value_states_5_strides_0 = const()[name = string("value_states_5_strides_0"), val = tensor([1, 1])]; tensor value_states_5_pad_0 = const()[name = string("value_states_5_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_states_5_dilations_0 = const()[name = string("value_states_5_dilations_0"), val = tensor([1, 1])]; int32 value_states_5_groups_0 = const()[name = string("value_states_5_groups_0"), val = int32(1)]; tensor var_652_to_fp16 = const()[name = string("op_652_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(40657984)))]; tensor value_states_5_cast_fp16 = conv(dilations = value_states_5_dilations_0, groups = value_states_5_groups_0, pad = value_states_5_pad_0, pad_type = value_states_5_pad_type_0, strides = value_states_5_strides_0, weight = var_652_to_fp16, x = var_608_cast_fp16_0)[name = string("value_states_5_cast_fp16")]; tensor var_660 = const()[name = string("op_660"), val = tensor([1, 16, 64, 64])]; tensor embed_5_cast_fp16 = reshape(shape = var_660, x = query_states_5_cast_fp16)[name = string("embed_5_cast_fp16")]; tensor var_664 = const()[name = string("op_664"), val = tensor([1, 2, 64, 64])]; tensor var_665_cast_fp16 = reshape(shape = var_664, x = key_states_5_cast_fp16)[name = string("op_665_cast_fp16")]; tensor embed_7_perm_0 = const()[name = string("embed_7_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_671 = const()[name = string("op_671"), val = tensor([1, 2, 64, 64])]; tensor var_672_cast_fp16 = reshape(shape = var_671, x = value_states_5_cast_fp16)[name = string("op_672_cast_fp16")]; tensor value_states_7_perm_0 = const()[name = string("value_states_7_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_676_cast_fp16 = mul(x = embed_5_cast_fp16, y = cos_1_cast_fp16)[name = string("op_676_cast_fp16")]; tensor var_677_split_sizes_0 = const()[name = string("op_677_split_sizes_0"), val = tensor([32, 32])]; int32 var_677_axis_0 = const()[name = string("op_677_axis_0"), val = int32(-2)]; tensor var_677_cast_fp16_0, tensor var_677_cast_fp16_1 = split(axis = var_677_axis_0, split_sizes = var_677_split_sizes_0, x = embed_5_cast_fp16)[name = string("op_677_cast_fp16")]; fp16 const_17_promoted_to_fp16 = const()[name = string("const_17_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_679_cast_fp16 = mul(x = var_677_cast_fp16_1, y = const_17_promoted_to_fp16)[name = string("op_679_cast_fp16")]; int32 var_681 = const()[name = string("op_681"), val = int32(-2)]; bool var_682_interleave_0 = const()[name = string("op_682_interleave_0"), val = bool(false)]; tensor var_682_cast_fp16 = concat(axis = var_681, interleave = var_682_interleave_0, values = (var_679_cast_fp16, var_677_cast_fp16_0))[name = string("op_682_cast_fp16")]; tensor var_683_cast_fp16 = mul(x = var_682_cast_fp16, y = sin_1_cast_fp16)[name = string("op_683_cast_fp16")]; tensor query_states_7_cast_fp16 = add(x = var_676_cast_fp16, y = var_683_cast_fp16)[name = string("query_states_7_cast_fp16")]; tensor embed_7_cast_fp16 = transpose(perm = embed_7_perm_0, x = var_665_cast_fp16)[name = string("transpose_68")]; tensor var_686_cast_fp16 = mul(x = embed_7_cast_fp16, y = cos_cast_fp16)[name = string("op_686_cast_fp16")]; tensor var_687_split_sizes_0 = const()[name = string("op_687_split_sizes_0"), val = tensor([32, 32])]; int32 var_687_axis_0 = const()[name = string("op_687_axis_0"), val = int32(-1)]; tensor var_687_cast_fp16_0, tensor var_687_cast_fp16_1 = split(axis = var_687_axis_0, split_sizes = var_687_split_sizes_0, x = embed_7_cast_fp16)[name = string("op_687_cast_fp16")]; fp16 const_18_promoted_to_fp16 = const()[name = string("const_18_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_689_cast_fp16 = mul(x = var_687_cast_fp16_1, y = const_18_promoted_to_fp16)[name = string("op_689_cast_fp16")]; int32 var_691 = const()[name = string("op_691"), val = int32(-1)]; bool var_692_interleave_0 = const()[name = string("op_692_interleave_0"), val = bool(false)]; tensor var_692_cast_fp16 = concat(axis = var_691, interleave = var_692_interleave_0, values = (var_689_cast_fp16, var_687_cast_fp16_0))[name = string("op_692_cast_fp16")]; tensor var_693_cast_fp16 = mul(x = var_692_cast_fp16, y = sin_cast_fp16)[name = string("op_693_cast_fp16")]; tensor key_states_7_cast_fp16 = add(x = var_686_cast_fp16, y = var_693_cast_fp16)[name = string("key_states_7_cast_fp16")]; tensor expand_dims_11 = const()[name = string("expand_dims_11"), val = tensor([1])]; tensor expand_dims_12 = const()[name = string("expand_dims_12"), val = tensor([0])]; tensor expand_dims_14 = const()[name = string("expand_dims_14"), val = tensor([2])]; tensor concat_11_values3_0 = const()[name = string("concat_11_values3_0"), val = tensor([0])]; int32 concat_11_axis_0 = const()[name = string("concat_11_axis_0"), val = int32(0)]; bool concat_11_interleave_0 = const()[name = string("concat_11_interleave_0"), val = bool(false)]; tensor concat_11 = concat(axis = concat_11_axis_0, interleave = concat_11_interleave_0, values = (expand_dims_11, expand_dims_12, position_id, concat_11_values3_0))[name = string("concat_11")]; tensor concat_12_values1_0 = const()[name = string("concat_12_values1_0"), val = tensor([0])]; tensor concat_12_values3_0 = const()[name = string("concat_12_values3_0"), val = tensor([0])]; int32 concat_12_axis_0 = const()[name = string("concat_12_axis_0"), val = int32(0)]; bool concat_12_interleave_0 = const()[name = string("concat_12_interleave_0"), val = bool(false)]; tensor concat_12 = concat(axis = concat_12_axis_0, interleave = concat_12_interleave_0, values = (expand_dims_14, concat_12_values1_0, var_426, concat_12_values3_0))[name = string("concat_12")]; tensor key_cache_internal_tensor_assign_2_stride_0 = const()[name = string("key_cache_internal_tensor_assign_2_stride_0"), val = tensor([1, 1, 1, 1])]; tensor key_cache_internal_tensor_assign_2_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_2_begin_mask_0"), val = tensor([false, false, false, true])]; tensor key_cache_internal_tensor_assign_2_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_2_end_mask_0"), val = tensor([false, true, false, true])]; tensor key_cache_internal_tensor_assign_2_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_2_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor key_cache_internal_tensor_assign_2_cast_fp16 = slice_update(begin = concat_11, begin_mask = key_cache_internal_tensor_assign_2_begin_mask_0, end = concat_12, end_mask = key_cache_internal_tensor_assign_2_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_2_squeeze_mask_0, stride = key_cache_internal_tensor_assign_2_stride_0, update = key_states_7_cast_fp16, x = coreml_update_state_48)[name = string("key_cache_internal_tensor_assign_2_cast_fp16")]; write_state(data = key_cache_internal_tensor_assign_2_cast_fp16, input = key_cache)[name = string("coreml_update_state_360_write_state")]; tensor coreml_update_state_50 = read_state(input = key_cache)[name = string("coreml_update_state_360")]; tensor value_cache_internal_tensor_assign_2_stride_0 = const()[name = string("value_cache_internal_tensor_assign_2_stride_0"), val = tensor([1, 1, 1, 1])]; tensor value_cache_internal_tensor_assign_2_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_2_begin_mask_0"), val = tensor([false, false, false, true])]; tensor value_cache_internal_tensor_assign_2_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_2_end_mask_0"), val = tensor([false, true, false, true])]; tensor value_cache_internal_tensor_assign_2_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_2_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor value_states_7_cast_fp16 = transpose(perm = value_states_7_perm_0, x = var_672_cast_fp16)[name = string("transpose_67")]; tensor value_cache_internal_tensor_assign_2_cast_fp16 = slice_update(begin = concat_11, begin_mask = value_cache_internal_tensor_assign_2_begin_mask_0, end = concat_12, end_mask = value_cache_internal_tensor_assign_2_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_2_squeeze_mask_0, stride = value_cache_internal_tensor_assign_2_stride_0, update = value_states_7_cast_fp16, x = coreml_update_state_49)[name = string("value_cache_internal_tensor_assign_2_cast_fp16")]; write_state(data = value_cache_internal_tensor_assign_2_cast_fp16, input = value_cache)[name = string("coreml_update_state_361_write_state")]; tensor coreml_update_state_51 = read_state(input = value_cache)[name = string("coreml_update_state_361")]; tensor var_736_begin_0 = const()[name = string("op_736_begin_0"), val = tensor([1, 0, 0, 0])]; tensor var_736_end_0 = const()[name = string("op_736_end_0"), val = tensor([2, 2, 2048, 64])]; tensor var_736_end_mask_0 = const()[name = string("op_736_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_736_cast_fp16 = slice_by_index(begin = var_736_begin_0, end = var_736_end_0, end_mask = var_736_end_mask_0, x = coreml_update_state_50)[name = string("op_736_cast_fp16")]; tensor tile_2 = const()[name = string("tile_2"), val = tensor([1, 1])]; int32 var_739_axis_0 = const()[name = string("op_739_axis_0"), val = int32(1)]; tensor var_739_cast_fp16_0, tensor var_739_cast_fp16_1 = split(axis = var_739_axis_0, split_sizes = tile_2, x = var_736_cast_fp16)[name = string("op_739_cast_fp16")]; tensor var_746_begin_0 = const()[name = string("op_746_begin_0"), val = tensor([1, 0, 0, 0])]; tensor var_746_end_0 = const()[name = string("op_746_end_0"), val = tensor([2, 2, 2048, 64])]; tensor var_746_end_mask_0 = const()[name = string("op_746_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_746_cast_fp16 = slice_by_index(begin = var_746_begin_0, end = var_746_end_0, end_mask = var_746_end_mask_0, x = coreml_update_state_51)[name = string("op_746_cast_fp16")]; tensor tile_3 = const()[name = string("tile_3"), val = tensor([1, 1])]; int32 var_749_axis_0 = const()[name = string("op_749_axis_0"), val = int32(1)]; tensor var_749_cast_fp16_0, tensor var_749_cast_fp16_1 = split(axis = var_749_axis_0, split_sizes = tile_3, x = var_746_cast_fp16)[name = string("op_749_cast_fp16")]; tensor var_752_split_sizes_0 = const()[name = string("op_752_split_sizes_0"), val = tensor([8, 8])]; int32 var_752_axis_0 = const()[name = string("op_752_axis_0"), val = int32(1)]; tensor var_752_cast_fp16_0, tensor var_752_cast_fp16_1 = split(axis = var_752_axis_0, split_sizes = var_752_split_sizes_0, x = query_states_7_cast_fp16)[name = string("op_752_cast_fp16")]; bool attn_weights_17_transpose_x_0 = const()[name = string("attn_weights_17_transpose_x_0"), val = bool(false)]; bool attn_weights_17_transpose_y_0 = const()[name = string("attn_weights_17_transpose_y_0"), val = bool(false)]; tensor attn_weights_17_cast_fp16 = matmul(transpose_x = attn_weights_17_transpose_x_0, transpose_y = attn_weights_17_transpose_y_0, x = var_739_cast_fp16_0, y = var_752_cast_fp16_0)[name = string("attn_weights_17_cast_fp16")]; fp16 _inversed_attn_weights_19_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_19_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_19_cast_fp16 = mul(x = attn_weights_17_cast_fp16, y = _inversed_attn_weights_19_y_0_to_fp16)[name = string("_inversed_attn_weights_19_cast_fp16")]; tensor attn_weights_21_cast_fp16 = add(x = _inversed_attn_weights_19_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_21_cast_fp16")]; int32 var_759 = const()[name = string("op_759"), val = int32(2)]; tensor attn_weights_23_cast_fp16 = softmax(axis = var_759, x = attn_weights_21_cast_fp16)[name = string("attn_weights_23_cast_fp16")]; bool var_765_transpose_x_1 = const()[name = string("op_765_transpose_x_1"), val = bool(true)]; bool var_765_transpose_y_1 = const()[name = string("op_765_transpose_y_1"), val = bool(false)]; tensor var_765_cast_fp16 = matmul(transpose_x = var_765_transpose_x_1, transpose_y = var_765_transpose_y_1, x = attn_weights_23_cast_fp16, y = var_749_cast_fp16_0)[name = string("op_765_cast_fp16")]; bool attn_weights_25_transpose_x_0 = const()[name = string("attn_weights_25_transpose_x_0"), val = bool(false)]; bool attn_weights_25_transpose_y_0 = const()[name = string("attn_weights_25_transpose_y_0"), val = bool(false)]; tensor attn_weights_25_cast_fp16 = matmul(transpose_x = attn_weights_25_transpose_x_0, transpose_y = attn_weights_25_transpose_y_0, x = var_739_cast_fp16_1, y = var_752_cast_fp16_1)[name = string("attn_weights_25_cast_fp16")]; fp16 _inversed_attn_weights_27_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_27_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_27_cast_fp16 = mul(x = attn_weights_25_cast_fp16, y = _inversed_attn_weights_27_y_0_to_fp16)[name = string("_inversed_attn_weights_27_cast_fp16")]; tensor attn_weights_29_cast_fp16 = add(x = _inversed_attn_weights_27_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_29_cast_fp16")]; int32 var_771 = const()[name = string("op_771"), val = int32(2)]; tensor attn_weights_31_cast_fp16 = softmax(axis = var_771, x = attn_weights_29_cast_fp16)[name = string("attn_weights_31_cast_fp16")]; bool attn_output_7_transpose_x_1 = const()[name = string("attn_output_7_transpose_x_1"), val = bool(true)]; bool attn_output_7_transpose_y_1 = const()[name = string("attn_output_7_transpose_y_1"), val = bool(false)]; tensor attn_output_7_cast_fp16 = matmul(transpose_x = attn_output_7_transpose_x_1, transpose_y = attn_output_7_transpose_y_1, x = attn_weights_31_cast_fp16, y = var_749_cast_fp16_1)[name = string("attn_output_7_cast_fp16")]; int32 var_779 = const()[name = string("op_779"), val = int32(1)]; bool attn_output_9_interleave_0 = const()[name = string("attn_output_9_interleave_0"), val = bool(false)]; tensor attn_output_9_cast_fp16 = concat(axis = var_779, interleave = attn_output_9_interleave_0, values = (var_765_cast_fp16, attn_output_7_cast_fp16))[name = string("attn_output_9_cast_fp16")]; tensor var_783_perm_0 = const()[name = string("op_783_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_788 = const()[name = string("op_788"), val = tensor([1, 1024, 1, 64])]; tensor var_783_cast_fp16 = transpose(perm = var_783_perm_0, x = attn_output_9_cast_fp16)[name = string("transpose_66")]; tensor x_23_cast_fp16 = reshape(shape = var_788, x = var_783_cast_fp16)[name = string("x_23_cast_fp16")]; string hidden_states_9_pad_type_0 = const()[name = string("hidden_states_9_pad_type_0"), val = string("valid")]; tensor hidden_states_9_strides_0 = const()[name = string("hidden_states_9_strides_0"), val = tensor([1, 1])]; tensor hidden_states_9_pad_0 = const()[name = string("hidden_states_9_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_9_dilations_0 = const()[name = string("hidden_states_9_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_9_groups_0 = const()[name = string("hidden_states_9_groups_0"), val = int32(1)]; tensor var_795_to_fp16 = const()[name = string("op_795_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(40920192)))]; tensor hidden_states_9_cast_fp16 = conv(dilations = hidden_states_9_dilations_0, groups = hidden_states_9_groups_0, pad = hidden_states_9_pad_0, pad_type = hidden_states_9_pad_type_0, strides = hidden_states_9_strides_0, weight = var_795_to_fp16, x = x_23_cast_fp16)[name = string("hidden_states_9_cast_fp16")]; tensor x_25_cast_fp16 = add(x = x_17_cast_fp16, y = hidden_states_9_cast_fp16)[name = string("x_25_cast_fp16")]; int32 var_807 = const()[name = string("op_807"), val = int32(1)]; fp16 const_23_promoted_to_fp16 = const()[name = string("const_23_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_810_cast_fp16 = mul(x = x_25_cast_fp16, y = const_23_promoted_to_fp16)[name = string("op_810_cast_fp16")]; bool x_27_interleave_0 = const()[name = string("x_27_interleave_0"), val = bool(false)]; tensor x_27_cast_fp16 = concat(axis = var_807, interleave = x_27_interleave_0, values = (x_25_cast_fp16, var_810_cast_fp16))[name = string("x_27_cast_fp16")]; tensor out_19_axes_0 = const()[name = string("out_19_axes_0"), val = tensor([1])]; fp16 var_820_to_fp16 = const()[name = string("op_820_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_19_cast_fp16 = layer_norm(axes = out_19_axes_0, epsilon = var_820_to_fp16, x = x_27_cast_fp16)[name = string("out_19_cast_fp16")]; tensor layer_layers_1_post_attention_layernorm_weight_to_fp16 = const()[name = string("layer_layers_1_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(43017408)))]; tensor out_21_cast_fp16 = mul(x = out_19_cast_fp16, y = layer_layers_1_post_attention_layernorm_weight_to_fp16)[name = string("out_21_cast_fp16")]; tensor var_826_split_sizes_0 = const()[name = string("op_826_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_826_axis_0 = const()[name = string("op_826_axis_0"), val = int32(1)]; tensor var_826_cast_fp16_0, tensor var_826_cast_fp16_1 = split(axis = var_826_axis_0, split_sizes = var_826_split_sizes_0, x = out_21_cast_fp16)[name = string("op_826_cast_fp16")]; string input_3_pad_type_0 = const()[name = string("input_3_pad_type_0"), val = string("valid")]; tensor input_3_strides_0 = const()[name = string("input_3_strides_0"), val = tensor([1, 1])]; tensor input_3_pad_0 = const()[name = string("input_3_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_3_dilations_0 = const()[name = string("input_3_dilations_0"), val = tensor([1, 1])]; int32 input_3_groups_0 = const()[name = string("input_3_groups_0"), val = int32(1)]; tensor var_831_to_fp16 = const()[name = string("op_831_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(43021568)))]; tensor input_3_cast_fp16 = conv(dilations = input_3_dilations_0, groups = input_3_groups_0, pad = input_3_pad_0, pad_type = input_3_pad_type_0, strides = input_3_strides_0, weight = var_831_to_fp16, x = var_826_cast_fp16_0)[name = string("input_3_cast_fp16")]; tensor var_842_cast_fp16 = silu(x = input_3_cast_fp16)[name = string("op_842_cast_fp16")]; string var_847_pad_type_0 = const()[name = string("op_847_pad_type_0"), val = string("valid")]; tensor var_847_strides_0 = const()[name = string("op_847_strides_0"), val = tensor([1, 1])]; tensor var_847_pad_0 = const()[name = string("op_847_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_847_dilations_0 = const()[name = string("op_847_dilations_0"), val = tensor([1, 1])]; int32 var_847_groups_0 = const()[name = string("op_847_groups_0"), val = int32(1)]; tensor var_830_to_fp16 = const()[name = string("op_830_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(51410240)))]; tensor var_847_cast_fp16 = conv(dilations = var_847_dilations_0, groups = var_847_groups_0, pad = var_847_pad_0, pad_type = var_847_pad_type_0, strides = var_847_strides_0, weight = var_830_to_fp16, x = var_826_cast_fp16_0)[name = string("op_847_cast_fp16")]; tensor x_33_cast_fp16 = mul(x = var_842_cast_fp16, y = var_847_cast_fp16)[name = string("x_33_cast_fp16")]; string hidden_states_11_pad_type_0 = const()[name = string("hidden_states_11_pad_type_0"), val = string("valid")]; tensor hidden_states_11_strides_0 = const()[name = string("hidden_states_11_strides_0"), val = tensor([1, 1])]; tensor hidden_states_11_pad_0 = const()[name = string("hidden_states_11_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_11_dilations_0 = const()[name = string("hidden_states_11_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_11_groups_0 = const()[name = string("hidden_states_11_groups_0"), val = int32(1)]; tensor var_829_to_fp16 = const()[name = string("op_829_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(59798912)))]; tensor hidden_states_11_cast_fp16 = conv(dilations = hidden_states_11_dilations_0, groups = hidden_states_11_groups_0, pad = hidden_states_11_pad_0, pad_type = hidden_states_11_pad_type_0, strides = hidden_states_11_strides_0, weight = var_829_to_fp16, x = x_33_cast_fp16)[name = string("hidden_states_11_cast_fp16")]; tensor x_35_cast_fp16 = add(x = x_25_cast_fp16, y = hidden_states_11_cast_fp16)[name = string("x_35_cast_fp16")]; int32 var_860 = const()[name = string("op_860"), val = int32(1)]; fp16 const_24_promoted_to_fp16 = const()[name = string("const_24_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_863_cast_fp16 = mul(x = x_35_cast_fp16, y = const_24_promoted_to_fp16)[name = string("op_863_cast_fp16")]; bool x_37_interleave_0 = const()[name = string("x_37_interleave_0"), val = bool(false)]; tensor x_37_cast_fp16 = concat(axis = var_860, interleave = x_37_interleave_0, values = (x_35_cast_fp16, var_863_cast_fp16))[name = string("x_37_cast_fp16")]; tensor out_25_axes_0 = const()[name = string("out_25_axes_0"), val = tensor([1])]; fp16 var_873_to_fp16 = const()[name = string("op_873_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_25_cast_fp16 = layer_norm(axes = out_25_axes_0, epsilon = var_873_to_fp16, x = x_37_cast_fp16)[name = string("out_25_cast_fp16")]; tensor layer_layers_2_input_layernorm_weight_to_fp16 = const()[name = string("layer_layers_2_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(68187584)))]; tensor out_27_cast_fp16 = mul(x = out_25_cast_fp16, y = layer_layers_2_input_layernorm_weight_to_fp16)[name = string("out_27_cast_fp16")]; tensor var_879_split_sizes_0 = const()[name = string("op_879_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_879_axis_0 = const()[name = string("op_879_axis_0"), val = int32(1)]; tensor var_879_cast_fp16_0, tensor var_879_cast_fp16_1 = split(axis = var_879_axis_0, split_sizes = var_879_split_sizes_0, x = out_27_cast_fp16)[name = string("op_879_cast_fp16")]; string query_states_9_pad_type_0 = const()[name = string("query_states_9_pad_type_0"), val = string("valid")]; tensor query_states_9_strides_0 = const()[name = string("query_states_9_strides_0"), val = tensor([1, 1])]; tensor query_states_9_pad_0 = const()[name = string("query_states_9_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_states_9_dilations_0 = const()[name = string("query_states_9_dilations_0"), val = tensor([1, 1])]; int32 query_states_9_groups_0 = const()[name = string("query_states_9_groups_0"), val = int32(1)]; tensor var_901_to_fp16 = const()[name = string("op_901_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(68191744)))]; tensor query_states_9_cast_fp16 = conv(dilations = query_states_9_dilations_0, groups = query_states_9_groups_0, pad = query_states_9_pad_0, pad_type = query_states_9_pad_type_0, strides = query_states_9_strides_0, weight = var_901_to_fp16, x = var_879_cast_fp16_0)[name = string("query_states_9_cast_fp16")]; string key_states_9_pad_type_0 = const()[name = string("key_states_9_pad_type_0"), val = string("valid")]; tensor key_states_9_strides_0 = const()[name = string("key_states_9_strides_0"), val = tensor([1, 1])]; tensor key_states_9_pad_0 = const()[name = string("key_states_9_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_states_9_dilations_0 = const()[name = string("key_states_9_dilations_0"), val = tensor([1, 1])]; int32 key_states_9_groups_0 = const()[name = string("key_states_9_groups_0"), val = int32(1)]; tensor var_912_to_fp16 = const()[name = string("op_912_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(70288960)))]; tensor key_states_9_cast_fp16 = conv(dilations = key_states_9_dilations_0, groups = key_states_9_groups_0, pad = key_states_9_pad_0, pad_type = key_states_9_pad_type_0, strides = key_states_9_strides_0, weight = var_912_to_fp16, x = var_879_cast_fp16_0)[name = string("key_states_9_cast_fp16")]; string value_states_9_pad_type_0 = const()[name = string("value_states_9_pad_type_0"), val = string("valid")]; tensor value_states_9_strides_0 = const()[name = string("value_states_9_strides_0"), val = tensor([1, 1])]; tensor value_states_9_pad_0 = const()[name = string("value_states_9_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_states_9_dilations_0 = const()[name = string("value_states_9_dilations_0"), val = tensor([1, 1])]; int32 value_states_9_groups_0 = const()[name = string("value_states_9_groups_0"), val = int32(1)]; tensor var_923_to_fp16 = const()[name = string("op_923_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(70551168)))]; tensor value_states_9_cast_fp16 = conv(dilations = value_states_9_dilations_0, groups = value_states_9_groups_0, pad = value_states_9_pad_0, pad_type = value_states_9_pad_type_0, strides = value_states_9_strides_0, weight = var_923_to_fp16, x = var_879_cast_fp16_0)[name = string("value_states_9_cast_fp16")]; tensor var_931 = const()[name = string("op_931"), val = tensor([1, 16, 64, 64])]; tensor embed_9_cast_fp16 = reshape(shape = var_931, x = query_states_9_cast_fp16)[name = string("embed_9_cast_fp16")]; tensor var_935 = const()[name = string("op_935"), val = tensor([1, 2, 64, 64])]; tensor var_936_cast_fp16 = reshape(shape = var_935, x = key_states_9_cast_fp16)[name = string("op_936_cast_fp16")]; tensor embed_11_perm_0 = const()[name = string("embed_11_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_942 = const()[name = string("op_942"), val = tensor([1, 2, 64, 64])]; tensor var_943_cast_fp16 = reshape(shape = var_942, x = value_states_9_cast_fp16)[name = string("op_943_cast_fp16")]; tensor value_states_11_perm_0 = const()[name = string("value_states_11_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_947_cast_fp16 = mul(x = embed_9_cast_fp16, y = cos_1_cast_fp16)[name = string("op_947_cast_fp16")]; tensor var_948_split_sizes_0 = const()[name = string("op_948_split_sizes_0"), val = tensor([32, 32])]; int32 var_948_axis_0 = const()[name = string("op_948_axis_0"), val = int32(-2)]; tensor var_948_cast_fp16_0, tensor var_948_cast_fp16_1 = split(axis = var_948_axis_0, split_sizes = var_948_split_sizes_0, x = embed_9_cast_fp16)[name = string("op_948_cast_fp16")]; fp16 const_27_promoted_to_fp16 = const()[name = string("const_27_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_950_cast_fp16 = mul(x = var_948_cast_fp16_1, y = const_27_promoted_to_fp16)[name = string("op_950_cast_fp16")]; int32 var_952 = const()[name = string("op_952"), val = int32(-2)]; bool var_953_interleave_0 = const()[name = string("op_953_interleave_0"), val = bool(false)]; tensor var_953_cast_fp16 = concat(axis = var_952, interleave = var_953_interleave_0, values = (var_950_cast_fp16, var_948_cast_fp16_0))[name = string("op_953_cast_fp16")]; tensor var_954_cast_fp16 = mul(x = var_953_cast_fp16, y = sin_1_cast_fp16)[name = string("op_954_cast_fp16")]; tensor query_states_11_cast_fp16 = add(x = var_947_cast_fp16, y = var_954_cast_fp16)[name = string("query_states_11_cast_fp16")]; tensor embed_11_cast_fp16 = transpose(perm = embed_11_perm_0, x = var_936_cast_fp16)[name = string("transpose_65")]; tensor var_957_cast_fp16 = mul(x = embed_11_cast_fp16, y = cos_cast_fp16)[name = string("op_957_cast_fp16")]; tensor var_958_split_sizes_0 = const()[name = string("op_958_split_sizes_0"), val = tensor([32, 32])]; int32 var_958_axis_0 = const()[name = string("op_958_axis_0"), val = int32(-1)]; tensor var_958_cast_fp16_0, tensor var_958_cast_fp16_1 = split(axis = var_958_axis_0, split_sizes = var_958_split_sizes_0, x = embed_11_cast_fp16)[name = string("op_958_cast_fp16")]; fp16 const_28_promoted_to_fp16 = const()[name = string("const_28_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_960_cast_fp16 = mul(x = var_958_cast_fp16_1, y = const_28_promoted_to_fp16)[name = string("op_960_cast_fp16")]; int32 var_962 = const()[name = string("op_962"), val = int32(-1)]; bool var_963_interleave_0 = const()[name = string("op_963_interleave_0"), val = bool(false)]; tensor var_963_cast_fp16 = concat(axis = var_962, interleave = var_963_interleave_0, values = (var_960_cast_fp16, var_958_cast_fp16_0))[name = string("op_963_cast_fp16")]; tensor var_964_cast_fp16 = mul(x = var_963_cast_fp16, y = sin_cast_fp16)[name = string("op_964_cast_fp16")]; tensor key_states_11_cast_fp16 = add(x = var_957_cast_fp16, y = var_964_cast_fp16)[name = string("key_states_11_cast_fp16")]; tensor expand_dims_21 = const()[name = string("expand_dims_21"), val = tensor([2])]; tensor expand_dims_22 = const()[name = string("expand_dims_22"), val = tensor([0])]; tensor expand_dims_24 = const()[name = string("expand_dims_24"), val = tensor([3])]; tensor concat_19_values3_0 = const()[name = string("concat_19_values3_0"), val = tensor([0])]; int32 concat_19_axis_0 = const()[name = string("concat_19_axis_0"), val = int32(0)]; bool concat_19_interleave_0 = const()[name = string("concat_19_interleave_0"), val = bool(false)]; tensor concat_19 = concat(axis = concat_19_axis_0, interleave = concat_19_interleave_0, values = (expand_dims_21, expand_dims_22, position_id, concat_19_values3_0))[name = string("concat_19")]; tensor concat_20_values1_0 = const()[name = string("concat_20_values1_0"), val = tensor([0])]; tensor concat_20_values3_0 = const()[name = string("concat_20_values3_0"), val = tensor([0])]; int32 concat_20_axis_0 = const()[name = string("concat_20_axis_0"), val = int32(0)]; bool concat_20_interleave_0 = const()[name = string("concat_20_interleave_0"), val = bool(false)]; tensor concat_20 = concat(axis = concat_20_axis_0, interleave = concat_20_interleave_0, values = (expand_dims_24, concat_20_values1_0, var_426, concat_20_values3_0))[name = string("concat_20")]; tensor key_cache_internal_tensor_assign_3_stride_0 = const()[name = string("key_cache_internal_tensor_assign_3_stride_0"), val = tensor([1, 1, 1, 1])]; tensor key_cache_internal_tensor_assign_3_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_3_begin_mask_0"), val = tensor([false, false, false, true])]; tensor key_cache_internal_tensor_assign_3_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_3_end_mask_0"), val = tensor([false, true, false, true])]; tensor key_cache_internal_tensor_assign_3_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_3_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor key_cache_internal_tensor_assign_3_cast_fp16 = slice_update(begin = concat_19, begin_mask = key_cache_internal_tensor_assign_3_begin_mask_0, end = concat_20, end_mask = key_cache_internal_tensor_assign_3_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_3_squeeze_mask_0, stride = key_cache_internal_tensor_assign_3_stride_0, update = key_states_11_cast_fp16, x = coreml_update_state_50)[name = string("key_cache_internal_tensor_assign_3_cast_fp16")]; write_state(data = key_cache_internal_tensor_assign_3_cast_fp16, input = key_cache)[name = string("coreml_update_state_362_write_state")]; tensor coreml_update_state_52 = read_state(input = key_cache)[name = string("coreml_update_state_362")]; tensor value_cache_internal_tensor_assign_3_stride_0 = const()[name = string("value_cache_internal_tensor_assign_3_stride_0"), val = tensor([1, 1, 1, 1])]; tensor value_cache_internal_tensor_assign_3_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_3_begin_mask_0"), val = tensor([false, false, false, true])]; tensor value_cache_internal_tensor_assign_3_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_3_end_mask_0"), val = tensor([false, true, false, true])]; tensor value_cache_internal_tensor_assign_3_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_3_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor value_states_11_cast_fp16 = transpose(perm = value_states_11_perm_0, x = var_943_cast_fp16)[name = string("transpose_64")]; tensor value_cache_internal_tensor_assign_3_cast_fp16 = slice_update(begin = concat_19, begin_mask = value_cache_internal_tensor_assign_3_begin_mask_0, end = concat_20, end_mask = value_cache_internal_tensor_assign_3_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_3_squeeze_mask_0, stride = value_cache_internal_tensor_assign_3_stride_0, update = value_states_11_cast_fp16, x = coreml_update_state_51)[name = string("value_cache_internal_tensor_assign_3_cast_fp16")]; write_state(data = value_cache_internal_tensor_assign_3_cast_fp16, input = value_cache)[name = string("coreml_update_state_363_write_state")]; tensor coreml_update_state_53 = read_state(input = value_cache)[name = string("coreml_update_state_363")]; tensor var_1007_begin_0 = const()[name = string("op_1007_begin_0"), val = tensor([2, 0, 0, 0])]; tensor var_1007_end_0 = const()[name = string("op_1007_end_0"), val = tensor([3, 2, 2048, 64])]; tensor var_1007_end_mask_0 = const()[name = string("op_1007_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_1007_cast_fp16 = slice_by_index(begin = var_1007_begin_0, end = var_1007_end_0, end_mask = var_1007_end_mask_0, x = coreml_update_state_52)[name = string("op_1007_cast_fp16")]; tensor tile_4 = const()[name = string("tile_4"), val = tensor([1, 1])]; int32 var_1010_axis_0 = const()[name = string("op_1010_axis_0"), val = int32(1)]; tensor var_1010_cast_fp16_0, tensor var_1010_cast_fp16_1 = split(axis = var_1010_axis_0, split_sizes = tile_4, x = var_1007_cast_fp16)[name = string("op_1010_cast_fp16")]; tensor var_1017_begin_0 = const()[name = string("op_1017_begin_0"), val = tensor([2, 0, 0, 0])]; tensor var_1017_end_0 = const()[name = string("op_1017_end_0"), val = tensor([3, 2, 2048, 64])]; tensor var_1017_end_mask_0 = const()[name = string("op_1017_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_1017_cast_fp16 = slice_by_index(begin = var_1017_begin_0, end = var_1017_end_0, end_mask = var_1017_end_mask_0, x = coreml_update_state_53)[name = string("op_1017_cast_fp16")]; tensor tile_5 = const()[name = string("tile_5"), val = tensor([1, 1])]; int32 var_1020_axis_0 = const()[name = string("op_1020_axis_0"), val = int32(1)]; tensor var_1020_cast_fp16_0, tensor var_1020_cast_fp16_1 = split(axis = var_1020_axis_0, split_sizes = tile_5, x = var_1017_cast_fp16)[name = string("op_1020_cast_fp16")]; tensor var_1023_split_sizes_0 = const()[name = string("op_1023_split_sizes_0"), val = tensor([8, 8])]; int32 var_1023_axis_0 = const()[name = string("op_1023_axis_0"), val = int32(1)]; tensor var_1023_cast_fp16_0, tensor var_1023_cast_fp16_1 = split(axis = var_1023_axis_0, split_sizes = var_1023_split_sizes_0, x = query_states_11_cast_fp16)[name = string("op_1023_cast_fp16")]; bool attn_weights_33_transpose_x_0 = const()[name = string("attn_weights_33_transpose_x_0"), val = bool(false)]; bool attn_weights_33_transpose_y_0 = const()[name = string("attn_weights_33_transpose_y_0"), val = bool(false)]; tensor attn_weights_33_cast_fp16 = matmul(transpose_x = attn_weights_33_transpose_x_0, transpose_y = attn_weights_33_transpose_y_0, x = var_1010_cast_fp16_0, y = var_1023_cast_fp16_0)[name = string("attn_weights_33_cast_fp16")]; fp16 _inversed_attn_weights_35_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_35_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_35_cast_fp16 = mul(x = attn_weights_33_cast_fp16, y = _inversed_attn_weights_35_y_0_to_fp16)[name = string("_inversed_attn_weights_35_cast_fp16")]; tensor attn_weights_37_cast_fp16 = add(x = _inversed_attn_weights_35_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_37_cast_fp16")]; int32 var_1030 = const()[name = string("op_1030"), val = int32(2)]; tensor attn_weights_39_cast_fp16 = softmax(axis = var_1030, x = attn_weights_37_cast_fp16)[name = string("attn_weights_39_cast_fp16")]; bool var_1036_transpose_x_1 = const()[name = string("op_1036_transpose_x_1"), val = bool(true)]; bool var_1036_transpose_y_1 = const()[name = string("op_1036_transpose_y_1"), val = bool(false)]; tensor var_1036_cast_fp16 = matmul(transpose_x = var_1036_transpose_x_1, transpose_y = var_1036_transpose_y_1, x = attn_weights_39_cast_fp16, y = var_1020_cast_fp16_0)[name = string("op_1036_cast_fp16")]; bool attn_weights_41_transpose_x_0 = const()[name = string("attn_weights_41_transpose_x_0"), val = bool(false)]; bool attn_weights_41_transpose_y_0 = const()[name = string("attn_weights_41_transpose_y_0"), val = bool(false)]; tensor attn_weights_41_cast_fp16 = matmul(transpose_x = attn_weights_41_transpose_x_0, transpose_y = attn_weights_41_transpose_y_0, x = var_1010_cast_fp16_1, y = var_1023_cast_fp16_1)[name = string("attn_weights_41_cast_fp16")]; fp16 _inversed_attn_weights_43_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_43_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_43_cast_fp16 = mul(x = attn_weights_41_cast_fp16, y = _inversed_attn_weights_43_y_0_to_fp16)[name = string("_inversed_attn_weights_43_cast_fp16")]; tensor attn_weights_45_cast_fp16 = add(x = _inversed_attn_weights_43_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_45_cast_fp16")]; int32 var_1042 = const()[name = string("op_1042"), val = int32(2)]; tensor attn_weights_47_cast_fp16 = softmax(axis = var_1042, x = attn_weights_45_cast_fp16)[name = string("attn_weights_47_cast_fp16")]; bool attn_output_13_transpose_x_1 = const()[name = string("attn_output_13_transpose_x_1"), val = bool(true)]; bool attn_output_13_transpose_y_1 = const()[name = string("attn_output_13_transpose_y_1"), val = bool(false)]; tensor attn_output_13_cast_fp16 = matmul(transpose_x = attn_output_13_transpose_x_1, transpose_y = attn_output_13_transpose_y_1, x = attn_weights_47_cast_fp16, y = var_1020_cast_fp16_1)[name = string("attn_output_13_cast_fp16")]; int32 var_1050 = const()[name = string("op_1050"), val = int32(1)]; bool attn_output_15_interleave_0 = const()[name = string("attn_output_15_interleave_0"), val = bool(false)]; tensor attn_output_15_cast_fp16 = concat(axis = var_1050, interleave = attn_output_15_interleave_0, values = (var_1036_cast_fp16, attn_output_13_cast_fp16))[name = string("attn_output_15_cast_fp16")]; tensor var_1054_perm_0 = const()[name = string("op_1054_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_1059 = const()[name = string("op_1059"), val = tensor([1, 1024, 1, 64])]; tensor var_1054_cast_fp16 = transpose(perm = var_1054_perm_0, x = attn_output_15_cast_fp16)[name = string("transpose_63")]; tensor x_41_cast_fp16 = reshape(shape = var_1059, x = var_1054_cast_fp16)[name = string("x_41_cast_fp16")]; string hidden_states_15_pad_type_0 = const()[name = string("hidden_states_15_pad_type_0"), val = string("valid")]; tensor hidden_states_15_strides_0 = const()[name = string("hidden_states_15_strides_0"), val = tensor([1, 1])]; tensor hidden_states_15_pad_0 = const()[name = string("hidden_states_15_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_15_dilations_0 = const()[name = string("hidden_states_15_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_15_groups_0 = const()[name = string("hidden_states_15_groups_0"), val = int32(1)]; tensor var_1066_to_fp16 = const()[name = string("op_1066_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(70813376)))]; tensor hidden_states_15_cast_fp16 = conv(dilations = hidden_states_15_dilations_0, groups = hidden_states_15_groups_0, pad = hidden_states_15_pad_0, pad_type = hidden_states_15_pad_type_0, strides = hidden_states_15_strides_0, weight = var_1066_to_fp16, x = x_41_cast_fp16)[name = string("hidden_states_15_cast_fp16")]; tensor x_43_cast_fp16 = add(x = x_35_cast_fp16, y = hidden_states_15_cast_fp16)[name = string("x_43_cast_fp16")]; int32 var_1078 = const()[name = string("op_1078"), val = int32(1)]; fp16 const_33_promoted_to_fp16 = const()[name = string("const_33_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_1081_cast_fp16 = mul(x = x_43_cast_fp16, y = const_33_promoted_to_fp16)[name = string("op_1081_cast_fp16")]; bool x_45_interleave_0 = const()[name = string("x_45_interleave_0"), val = bool(false)]; tensor x_45_cast_fp16 = concat(axis = var_1078, interleave = x_45_interleave_0, values = (x_43_cast_fp16, var_1081_cast_fp16))[name = string("x_45_cast_fp16")]; tensor out_31_axes_0 = const()[name = string("out_31_axes_0"), val = tensor([1])]; fp16 var_1091_to_fp16 = const()[name = string("op_1091_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_31_cast_fp16 = layer_norm(axes = out_31_axes_0, epsilon = var_1091_to_fp16, x = x_45_cast_fp16)[name = string("out_31_cast_fp16")]; tensor layer_layers_2_post_attention_layernorm_weight_to_fp16 = const()[name = string("layer_layers_2_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(72910592)))]; tensor out_33_cast_fp16 = mul(x = out_31_cast_fp16, y = layer_layers_2_post_attention_layernorm_weight_to_fp16)[name = string("out_33_cast_fp16")]; tensor var_1097_split_sizes_0 = const()[name = string("op_1097_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_1097_axis_0 = const()[name = string("op_1097_axis_0"), val = int32(1)]; tensor var_1097_cast_fp16_0, tensor var_1097_cast_fp16_1 = split(axis = var_1097_axis_0, split_sizes = var_1097_split_sizes_0, x = out_33_cast_fp16)[name = string("op_1097_cast_fp16")]; string input_5_pad_type_0 = const()[name = string("input_5_pad_type_0"), val = string("valid")]; tensor input_5_strides_0 = const()[name = string("input_5_strides_0"), val = tensor([1, 1])]; tensor input_5_pad_0 = const()[name = string("input_5_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_5_dilations_0 = const()[name = string("input_5_dilations_0"), val = tensor([1, 1])]; int32 input_5_groups_0 = const()[name = string("input_5_groups_0"), val = int32(1)]; tensor var_1102_to_fp16 = const()[name = string("op_1102_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(72914752)))]; tensor input_5_cast_fp16 = conv(dilations = input_5_dilations_0, groups = input_5_groups_0, pad = input_5_pad_0, pad_type = input_5_pad_type_0, strides = input_5_strides_0, weight = var_1102_to_fp16, x = var_1097_cast_fp16_0)[name = string("input_5_cast_fp16")]; tensor var_1113_cast_fp16 = silu(x = input_5_cast_fp16)[name = string("op_1113_cast_fp16")]; string var_1118_pad_type_0 = const()[name = string("op_1118_pad_type_0"), val = string("valid")]; tensor var_1118_strides_0 = const()[name = string("op_1118_strides_0"), val = tensor([1, 1])]; tensor var_1118_pad_0 = const()[name = string("op_1118_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_1118_dilations_0 = const()[name = string("op_1118_dilations_0"), val = tensor([1, 1])]; int32 var_1118_groups_0 = const()[name = string("op_1118_groups_0"), val = int32(1)]; tensor var_1101_to_fp16 = const()[name = string("op_1101_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(81303424)))]; tensor var_1118_cast_fp16 = conv(dilations = var_1118_dilations_0, groups = var_1118_groups_0, pad = var_1118_pad_0, pad_type = var_1118_pad_type_0, strides = var_1118_strides_0, weight = var_1101_to_fp16, x = var_1097_cast_fp16_0)[name = string("op_1118_cast_fp16")]; tensor x_51_cast_fp16 = mul(x = var_1113_cast_fp16, y = var_1118_cast_fp16)[name = string("x_51_cast_fp16")]; string hidden_states_17_pad_type_0 = const()[name = string("hidden_states_17_pad_type_0"), val = string("valid")]; tensor hidden_states_17_strides_0 = const()[name = string("hidden_states_17_strides_0"), val = tensor([1, 1])]; tensor hidden_states_17_pad_0 = const()[name = string("hidden_states_17_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_17_dilations_0 = const()[name = string("hidden_states_17_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_17_groups_0 = const()[name = string("hidden_states_17_groups_0"), val = int32(1)]; tensor var_1100_to_fp16 = const()[name = string("op_1100_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(89692096)))]; tensor hidden_states_17_cast_fp16 = conv(dilations = hidden_states_17_dilations_0, groups = hidden_states_17_groups_0, pad = hidden_states_17_pad_0, pad_type = hidden_states_17_pad_type_0, strides = hidden_states_17_strides_0, weight = var_1100_to_fp16, x = x_51_cast_fp16)[name = string("hidden_states_17_cast_fp16")]; tensor x_53_cast_fp16 = add(x = x_43_cast_fp16, y = hidden_states_17_cast_fp16)[name = string("x_53_cast_fp16")]; int32 var_1131 = const()[name = string("op_1131"), val = int32(1)]; fp16 const_34_promoted_to_fp16 = const()[name = string("const_34_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_1134_cast_fp16 = mul(x = x_53_cast_fp16, y = const_34_promoted_to_fp16)[name = string("op_1134_cast_fp16")]; bool x_55_interleave_0 = const()[name = string("x_55_interleave_0"), val = bool(false)]; tensor x_55_cast_fp16 = concat(axis = var_1131, interleave = x_55_interleave_0, values = (x_53_cast_fp16, var_1134_cast_fp16))[name = string("x_55_cast_fp16")]; tensor out_37_axes_0 = const()[name = string("out_37_axes_0"), val = tensor([1])]; fp16 var_1144_to_fp16 = const()[name = string("op_1144_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_37_cast_fp16 = layer_norm(axes = out_37_axes_0, epsilon = var_1144_to_fp16, x = x_55_cast_fp16)[name = string("out_37_cast_fp16")]; tensor layer_layers_3_input_layernorm_weight_to_fp16 = const()[name = string("layer_layers_3_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(98080768)))]; tensor out_39_cast_fp16 = mul(x = out_37_cast_fp16, y = layer_layers_3_input_layernorm_weight_to_fp16)[name = string("out_39_cast_fp16")]; tensor var_1150_split_sizes_0 = const()[name = string("op_1150_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_1150_axis_0 = const()[name = string("op_1150_axis_0"), val = int32(1)]; tensor var_1150_cast_fp16_0, tensor var_1150_cast_fp16_1 = split(axis = var_1150_axis_0, split_sizes = var_1150_split_sizes_0, x = out_39_cast_fp16)[name = string("op_1150_cast_fp16")]; string query_states_13_pad_type_0 = const()[name = string("query_states_13_pad_type_0"), val = string("valid")]; tensor query_states_13_strides_0 = const()[name = string("query_states_13_strides_0"), val = tensor([1, 1])]; tensor query_states_13_pad_0 = const()[name = string("query_states_13_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_states_13_dilations_0 = const()[name = string("query_states_13_dilations_0"), val = tensor([1, 1])]; int32 query_states_13_groups_0 = const()[name = string("query_states_13_groups_0"), val = int32(1)]; tensor var_1172_to_fp16 = const()[name = string("op_1172_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(98084928)))]; tensor query_states_13_cast_fp16 = conv(dilations = query_states_13_dilations_0, groups = query_states_13_groups_0, pad = query_states_13_pad_0, pad_type = query_states_13_pad_type_0, strides = query_states_13_strides_0, weight = var_1172_to_fp16, x = var_1150_cast_fp16_0)[name = string("query_states_13_cast_fp16")]; string key_states_13_pad_type_0 = const()[name = string("key_states_13_pad_type_0"), val = string("valid")]; tensor key_states_13_strides_0 = const()[name = string("key_states_13_strides_0"), val = tensor([1, 1])]; tensor key_states_13_pad_0 = const()[name = string("key_states_13_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_states_13_dilations_0 = const()[name = string("key_states_13_dilations_0"), val = tensor([1, 1])]; int32 key_states_13_groups_0 = const()[name = string("key_states_13_groups_0"), val = int32(1)]; tensor var_1183_to_fp16 = const()[name = string("op_1183_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(100182144)))]; tensor key_states_13_cast_fp16 = conv(dilations = key_states_13_dilations_0, groups = key_states_13_groups_0, pad = key_states_13_pad_0, pad_type = key_states_13_pad_type_0, strides = key_states_13_strides_0, weight = var_1183_to_fp16, x = var_1150_cast_fp16_0)[name = string("key_states_13_cast_fp16")]; string value_states_13_pad_type_0 = const()[name = string("value_states_13_pad_type_0"), val = string("valid")]; tensor value_states_13_strides_0 = const()[name = string("value_states_13_strides_0"), val = tensor([1, 1])]; tensor value_states_13_pad_0 = const()[name = string("value_states_13_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_states_13_dilations_0 = const()[name = string("value_states_13_dilations_0"), val = tensor([1, 1])]; int32 value_states_13_groups_0 = const()[name = string("value_states_13_groups_0"), val = int32(1)]; tensor var_1194_to_fp16 = const()[name = string("op_1194_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(100444352)))]; tensor value_states_13_cast_fp16 = conv(dilations = value_states_13_dilations_0, groups = value_states_13_groups_0, pad = value_states_13_pad_0, pad_type = value_states_13_pad_type_0, strides = value_states_13_strides_0, weight = var_1194_to_fp16, x = var_1150_cast_fp16_0)[name = string("value_states_13_cast_fp16")]; tensor var_1202 = const()[name = string("op_1202"), val = tensor([1, 16, 64, 64])]; tensor embed_13_cast_fp16 = reshape(shape = var_1202, x = query_states_13_cast_fp16)[name = string("embed_13_cast_fp16")]; tensor var_1206 = const()[name = string("op_1206"), val = tensor([1, 2, 64, 64])]; tensor var_1207_cast_fp16 = reshape(shape = var_1206, x = key_states_13_cast_fp16)[name = string("op_1207_cast_fp16")]; tensor embed_15_perm_0 = const()[name = string("embed_15_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_1213 = const()[name = string("op_1213"), val = tensor([1, 2, 64, 64])]; tensor var_1214_cast_fp16 = reshape(shape = var_1213, x = value_states_13_cast_fp16)[name = string("op_1214_cast_fp16")]; tensor value_states_15_perm_0 = const()[name = string("value_states_15_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_1218_cast_fp16 = mul(x = embed_13_cast_fp16, y = cos_1_cast_fp16)[name = string("op_1218_cast_fp16")]; tensor var_1219_split_sizes_0 = const()[name = string("op_1219_split_sizes_0"), val = tensor([32, 32])]; int32 var_1219_axis_0 = const()[name = string("op_1219_axis_0"), val = int32(-2)]; tensor var_1219_cast_fp16_0, tensor var_1219_cast_fp16_1 = split(axis = var_1219_axis_0, split_sizes = var_1219_split_sizes_0, x = embed_13_cast_fp16)[name = string("op_1219_cast_fp16")]; fp16 const_37_promoted_to_fp16 = const()[name = string("const_37_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_1221_cast_fp16 = mul(x = var_1219_cast_fp16_1, y = const_37_promoted_to_fp16)[name = string("op_1221_cast_fp16")]; int32 var_1223 = const()[name = string("op_1223"), val = int32(-2)]; bool var_1224_interleave_0 = const()[name = string("op_1224_interleave_0"), val = bool(false)]; tensor var_1224_cast_fp16 = concat(axis = var_1223, interleave = var_1224_interleave_0, values = (var_1221_cast_fp16, var_1219_cast_fp16_0))[name = string("op_1224_cast_fp16")]; tensor var_1225_cast_fp16 = mul(x = var_1224_cast_fp16, y = sin_1_cast_fp16)[name = string("op_1225_cast_fp16")]; tensor query_states_15_cast_fp16 = add(x = var_1218_cast_fp16, y = var_1225_cast_fp16)[name = string("query_states_15_cast_fp16")]; tensor embed_15_cast_fp16 = transpose(perm = embed_15_perm_0, x = var_1207_cast_fp16)[name = string("transpose_62")]; tensor var_1228_cast_fp16 = mul(x = embed_15_cast_fp16, y = cos_cast_fp16)[name = string("op_1228_cast_fp16")]; tensor var_1229_split_sizes_0 = const()[name = string("op_1229_split_sizes_0"), val = tensor([32, 32])]; int32 var_1229_axis_0 = const()[name = string("op_1229_axis_0"), val = int32(-1)]; tensor var_1229_cast_fp16_0, tensor var_1229_cast_fp16_1 = split(axis = var_1229_axis_0, split_sizes = var_1229_split_sizes_0, x = embed_15_cast_fp16)[name = string("op_1229_cast_fp16")]; fp16 const_38_promoted_to_fp16 = const()[name = string("const_38_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_1231_cast_fp16 = mul(x = var_1229_cast_fp16_1, y = const_38_promoted_to_fp16)[name = string("op_1231_cast_fp16")]; int32 var_1233 = const()[name = string("op_1233"), val = int32(-1)]; bool var_1234_interleave_0 = const()[name = string("op_1234_interleave_0"), val = bool(false)]; tensor var_1234_cast_fp16 = concat(axis = var_1233, interleave = var_1234_interleave_0, values = (var_1231_cast_fp16, var_1229_cast_fp16_0))[name = string("op_1234_cast_fp16")]; tensor var_1235_cast_fp16 = mul(x = var_1234_cast_fp16, y = sin_cast_fp16)[name = string("op_1235_cast_fp16")]; tensor key_states_15_cast_fp16 = add(x = var_1228_cast_fp16, y = var_1235_cast_fp16)[name = string("key_states_15_cast_fp16")]; tensor expand_dims_31 = const()[name = string("expand_dims_31"), val = tensor([3])]; tensor expand_dims_32 = const()[name = string("expand_dims_32"), val = tensor([0])]; tensor expand_dims_34 = const()[name = string("expand_dims_34"), val = tensor([4])]; tensor concat_27_values3_0 = const()[name = string("concat_27_values3_0"), val = tensor([0])]; int32 concat_27_axis_0 = const()[name = string("concat_27_axis_0"), val = int32(0)]; bool concat_27_interleave_0 = const()[name = string("concat_27_interleave_0"), val = bool(false)]; tensor concat_27 = concat(axis = concat_27_axis_0, interleave = concat_27_interleave_0, values = (expand_dims_31, expand_dims_32, position_id, concat_27_values3_0))[name = string("concat_27")]; tensor concat_28_values1_0 = const()[name = string("concat_28_values1_0"), val = tensor([0])]; tensor concat_28_values3_0 = const()[name = string("concat_28_values3_0"), val = tensor([0])]; int32 concat_28_axis_0 = const()[name = string("concat_28_axis_0"), val = int32(0)]; bool concat_28_interleave_0 = const()[name = string("concat_28_interleave_0"), val = bool(false)]; tensor concat_28 = concat(axis = concat_28_axis_0, interleave = concat_28_interleave_0, values = (expand_dims_34, concat_28_values1_0, var_426, concat_28_values3_0))[name = string("concat_28")]; tensor key_cache_internal_tensor_assign_4_stride_0 = const()[name = string("key_cache_internal_tensor_assign_4_stride_0"), val = tensor([1, 1, 1, 1])]; tensor key_cache_internal_tensor_assign_4_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_4_begin_mask_0"), val = tensor([false, false, false, true])]; tensor key_cache_internal_tensor_assign_4_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_4_end_mask_0"), val = tensor([false, true, false, true])]; tensor key_cache_internal_tensor_assign_4_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_4_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor key_cache_internal_tensor_assign_4_cast_fp16 = slice_update(begin = concat_27, begin_mask = key_cache_internal_tensor_assign_4_begin_mask_0, end = concat_28, end_mask = key_cache_internal_tensor_assign_4_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_4_squeeze_mask_0, stride = key_cache_internal_tensor_assign_4_stride_0, update = key_states_15_cast_fp16, x = coreml_update_state_52)[name = string("key_cache_internal_tensor_assign_4_cast_fp16")]; write_state(data = key_cache_internal_tensor_assign_4_cast_fp16, input = key_cache)[name = string("coreml_update_state_364_write_state")]; tensor coreml_update_state_54 = read_state(input = key_cache)[name = string("coreml_update_state_364")]; tensor value_cache_internal_tensor_assign_4_stride_0 = const()[name = string("value_cache_internal_tensor_assign_4_stride_0"), val = tensor([1, 1, 1, 1])]; tensor value_cache_internal_tensor_assign_4_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_4_begin_mask_0"), val = tensor([false, false, false, true])]; tensor value_cache_internal_tensor_assign_4_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_4_end_mask_0"), val = tensor([false, true, false, true])]; tensor value_cache_internal_tensor_assign_4_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_4_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor value_states_15_cast_fp16 = transpose(perm = value_states_15_perm_0, x = var_1214_cast_fp16)[name = string("transpose_61")]; tensor value_cache_internal_tensor_assign_4_cast_fp16 = slice_update(begin = concat_27, begin_mask = value_cache_internal_tensor_assign_4_begin_mask_0, end = concat_28, end_mask = value_cache_internal_tensor_assign_4_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_4_squeeze_mask_0, stride = value_cache_internal_tensor_assign_4_stride_0, update = value_states_15_cast_fp16, x = coreml_update_state_53)[name = string("value_cache_internal_tensor_assign_4_cast_fp16")]; write_state(data = value_cache_internal_tensor_assign_4_cast_fp16, input = value_cache)[name = string("coreml_update_state_365_write_state")]; tensor coreml_update_state_55 = read_state(input = value_cache)[name = string("coreml_update_state_365")]; tensor var_1278_begin_0 = const()[name = string("op_1278_begin_0"), val = tensor([3, 0, 0, 0])]; tensor var_1278_end_0 = const()[name = string("op_1278_end_0"), val = tensor([4, 2, 2048, 64])]; tensor var_1278_end_mask_0 = const()[name = string("op_1278_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_1278_cast_fp16 = slice_by_index(begin = var_1278_begin_0, end = var_1278_end_0, end_mask = var_1278_end_mask_0, x = coreml_update_state_54)[name = string("op_1278_cast_fp16")]; tensor tile_6 = const()[name = string("tile_6"), val = tensor([1, 1])]; int32 var_1281_axis_0 = const()[name = string("op_1281_axis_0"), val = int32(1)]; tensor var_1281_cast_fp16_0, tensor var_1281_cast_fp16_1 = split(axis = var_1281_axis_0, split_sizes = tile_6, x = var_1278_cast_fp16)[name = string("op_1281_cast_fp16")]; tensor var_1288_begin_0 = const()[name = string("op_1288_begin_0"), val = tensor([3, 0, 0, 0])]; tensor var_1288_end_0 = const()[name = string("op_1288_end_0"), val = tensor([4, 2, 2048, 64])]; tensor var_1288_end_mask_0 = const()[name = string("op_1288_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_1288_cast_fp16 = slice_by_index(begin = var_1288_begin_0, end = var_1288_end_0, end_mask = var_1288_end_mask_0, x = coreml_update_state_55)[name = string("op_1288_cast_fp16")]; tensor tile_7 = const()[name = string("tile_7"), val = tensor([1, 1])]; int32 var_1291_axis_0 = const()[name = string("op_1291_axis_0"), val = int32(1)]; tensor var_1291_cast_fp16_0, tensor var_1291_cast_fp16_1 = split(axis = var_1291_axis_0, split_sizes = tile_7, x = var_1288_cast_fp16)[name = string("op_1291_cast_fp16")]; tensor var_1294_split_sizes_0 = const()[name = string("op_1294_split_sizes_0"), val = tensor([8, 8])]; int32 var_1294_axis_0 = const()[name = string("op_1294_axis_0"), val = int32(1)]; tensor var_1294_cast_fp16_0, tensor var_1294_cast_fp16_1 = split(axis = var_1294_axis_0, split_sizes = var_1294_split_sizes_0, x = query_states_15_cast_fp16)[name = string("op_1294_cast_fp16")]; bool attn_weights_49_transpose_x_0 = const()[name = string("attn_weights_49_transpose_x_0"), val = bool(false)]; bool attn_weights_49_transpose_y_0 = const()[name = string("attn_weights_49_transpose_y_0"), val = bool(false)]; tensor attn_weights_49_cast_fp16 = matmul(transpose_x = attn_weights_49_transpose_x_0, transpose_y = attn_weights_49_transpose_y_0, x = var_1281_cast_fp16_0, y = var_1294_cast_fp16_0)[name = string("attn_weights_49_cast_fp16")]; fp16 _inversed_attn_weights_51_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_51_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_51_cast_fp16 = mul(x = attn_weights_49_cast_fp16, y = _inversed_attn_weights_51_y_0_to_fp16)[name = string("_inversed_attn_weights_51_cast_fp16")]; tensor attn_weights_53_cast_fp16 = add(x = _inversed_attn_weights_51_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_53_cast_fp16")]; int32 var_1301 = const()[name = string("op_1301"), val = int32(2)]; tensor attn_weights_55_cast_fp16 = softmax(axis = var_1301, x = attn_weights_53_cast_fp16)[name = string("attn_weights_55_cast_fp16")]; bool var_1307_transpose_x_1 = const()[name = string("op_1307_transpose_x_1"), val = bool(true)]; bool var_1307_transpose_y_1 = const()[name = string("op_1307_transpose_y_1"), val = bool(false)]; tensor var_1307_cast_fp16 = matmul(transpose_x = var_1307_transpose_x_1, transpose_y = var_1307_transpose_y_1, x = attn_weights_55_cast_fp16, y = var_1291_cast_fp16_0)[name = string("op_1307_cast_fp16")]; bool attn_weights_57_transpose_x_0 = const()[name = string("attn_weights_57_transpose_x_0"), val = bool(false)]; bool attn_weights_57_transpose_y_0 = const()[name = string("attn_weights_57_transpose_y_0"), val = bool(false)]; tensor attn_weights_57_cast_fp16 = matmul(transpose_x = attn_weights_57_transpose_x_0, transpose_y = attn_weights_57_transpose_y_0, x = var_1281_cast_fp16_1, y = var_1294_cast_fp16_1)[name = string("attn_weights_57_cast_fp16")]; fp16 _inversed_attn_weights_59_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_59_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_59_cast_fp16 = mul(x = attn_weights_57_cast_fp16, y = _inversed_attn_weights_59_y_0_to_fp16)[name = string("_inversed_attn_weights_59_cast_fp16")]; tensor attn_weights_61_cast_fp16 = add(x = _inversed_attn_weights_59_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_61_cast_fp16")]; int32 var_1313 = const()[name = string("op_1313"), val = int32(2)]; tensor attn_weights_63_cast_fp16 = softmax(axis = var_1313, x = attn_weights_61_cast_fp16)[name = string("attn_weights_63_cast_fp16")]; bool attn_output_19_transpose_x_1 = const()[name = string("attn_output_19_transpose_x_1"), val = bool(true)]; bool attn_output_19_transpose_y_1 = const()[name = string("attn_output_19_transpose_y_1"), val = bool(false)]; tensor attn_output_19_cast_fp16 = matmul(transpose_x = attn_output_19_transpose_x_1, transpose_y = attn_output_19_transpose_y_1, x = attn_weights_63_cast_fp16, y = var_1291_cast_fp16_1)[name = string("attn_output_19_cast_fp16")]; int32 var_1321 = const()[name = string("op_1321"), val = int32(1)]; bool attn_output_21_interleave_0 = const()[name = string("attn_output_21_interleave_0"), val = bool(false)]; tensor attn_output_21_cast_fp16 = concat(axis = var_1321, interleave = attn_output_21_interleave_0, values = (var_1307_cast_fp16, attn_output_19_cast_fp16))[name = string("attn_output_21_cast_fp16")]; tensor var_1325_perm_0 = const()[name = string("op_1325_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_1330 = const()[name = string("op_1330"), val = tensor([1, 1024, 1, 64])]; tensor var_1325_cast_fp16 = transpose(perm = var_1325_perm_0, x = attn_output_21_cast_fp16)[name = string("transpose_60")]; tensor x_59_cast_fp16 = reshape(shape = var_1330, x = var_1325_cast_fp16)[name = string("x_59_cast_fp16")]; string hidden_states_21_pad_type_0 = const()[name = string("hidden_states_21_pad_type_0"), val = string("valid")]; tensor hidden_states_21_strides_0 = const()[name = string("hidden_states_21_strides_0"), val = tensor([1, 1])]; tensor hidden_states_21_pad_0 = const()[name = string("hidden_states_21_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_21_dilations_0 = const()[name = string("hidden_states_21_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_21_groups_0 = const()[name = string("hidden_states_21_groups_0"), val = int32(1)]; tensor var_1337_to_fp16 = const()[name = string("op_1337_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(100706560)))]; tensor hidden_states_21_cast_fp16 = conv(dilations = hidden_states_21_dilations_0, groups = hidden_states_21_groups_0, pad = hidden_states_21_pad_0, pad_type = hidden_states_21_pad_type_0, strides = hidden_states_21_strides_0, weight = var_1337_to_fp16, x = x_59_cast_fp16)[name = string("hidden_states_21_cast_fp16")]; tensor x_61_cast_fp16 = add(x = x_53_cast_fp16, y = hidden_states_21_cast_fp16)[name = string("x_61_cast_fp16")]; int32 var_1349 = const()[name = string("op_1349"), val = int32(1)]; fp16 const_43_promoted_to_fp16 = const()[name = string("const_43_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_1352_cast_fp16 = mul(x = x_61_cast_fp16, y = const_43_promoted_to_fp16)[name = string("op_1352_cast_fp16")]; bool x_63_interleave_0 = const()[name = string("x_63_interleave_0"), val = bool(false)]; tensor x_63_cast_fp16 = concat(axis = var_1349, interleave = x_63_interleave_0, values = (x_61_cast_fp16, var_1352_cast_fp16))[name = string("x_63_cast_fp16")]; tensor out_43_axes_0 = const()[name = string("out_43_axes_0"), val = tensor([1])]; fp16 var_1362_to_fp16 = const()[name = string("op_1362_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_43_cast_fp16 = layer_norm(axes = out_43_axes_0, epsilon = var_1362_to_fp16, x = x_63_cast_fp16)[name = string("out_43_cast_fp16")]; tensor layer_layers_3_post_attention_layernorm_weight_to_fp16 = const()[name = string("layer_layers_3_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(102803776)))]; tensor out_45_cast_fp16 = mul(x = out_43_cast_fp16, y = layer_layers_3_post_attention_layernorm_weight_to_fp16)[name = string("out_45_cast_fp16")]; tensor var_1368_split_sizes_0 = const()[name = string("op_1368_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_1368_axis_0 = const()[name = string("op_1368_axis_0"), val = int32(1)]; tensor var_1368_cast_fp16_0, tensor var_1368_cast_fp16_1 = split(axis = var_1368_axis_0, split_sizes = var_1368_split_sizes_0, x = out_45_cast_fp16)[name = string("op_1368_cast_fp16")]; string input_7_pad_type_0 = const()[name = string("input_7_pad_type_0"), val = string("valid")]; tensor input_7_strides_0 = const()[name = string("input_7_strides_0"), val = tensor([1, 1])]; tensor input_7_pad_0 = const()[name = string("input_7_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_7_dilations_0 = const()[name = string("input_7_dilations_0"), val = tensor([1, 1])]; int32 input_7_groups_0 = const()[name = string("input_7_groups_0"), val = int32(1)]; tensor var_1373_to_fp16 = const()[name = string("op_1373_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(102807936)))]; tensor input_7_cast_fp16 = conv(dilations = input_7_dilations_0, groups = input_7_groups_0, pad = input_7_pad_0, pad_type = input_7_pad_type_0, strides = input_7_strides_0, weight = var_1373_to_fp16, x = var_1368_cast_fp16_0)[name = string("input_7_cast_fp16")]; tensor var_1384_cast_fp16 = silu(x = input_7_cast_fp16)[name = string("op_1384_cast_fp16")]; string var_1389_pad_type_0 = const()[name = string("op_1389_pad_type_0"), val = string("valid")]; tensor var_1389_strides_0 = const()[name = string("op_1389_strides_0"), val = tensor([1, 1])]; tensor var_1389_pad_0 = const()[name = string("op_1389_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_1389_dilations_0 = const()[name = string("op_1389_dilations_0"), val = tensor([1, 1])]; int32 var_1389_groups_0 = const()[name = string("op_1389_groups_0"), val = int32(1)]; tensor var_1372_to_fp16 = const()[name = string("op_1372_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(111196608)))]; tensor var_1389_cast_fp16 = conv(dilations = var_1389_dilations_0, groups = var_1389_groups_0, pad = var_1389_pad_0, pad_type = var_1389_pad_type_0, strides = var_1389_strides_0, weight = var_1372_to_fp16, x = var_1368_cast_fp16_0)[name = string("op_1389_cast_fp16")]; tensor x_69_cast_fp16 = mul(x = var_1384_cast_fp16, y = var_1389_cast_fp16)[name = string("x_69_cast_fp16")]; string hidden_states_23_pad_type_0 = const()[name = string("hidden_states_23_pad_type_0"), val = string("valid")]; tensor hidden_states_23_strides_0 = const()[name = string("hidden_states_23_strides_0"), val = tensor([1, 1])]; tensor hidden_states_23_pad_0 = const()[name = string("hidden_states_23_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_23_dilations_0 = const()[name = string("hidden_states_23_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_23_groups_0 = const()[name = string("hidden_states_23_groups_0"), val = int32(1)]; tensor var_1371_to_fp16 = const()[name = string("op_1371_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(119585280)))]; tensor hidden_states_23_cast_fp16 = conv(dilations = hidden_states_23_dilations_0, groups = hidden_states_23_groups_0, pad = hidden_states_23_pad_0, pad_type = hidden_states_23_pad_type_0, strides = hidden_states_23_strides_0, weight = var_1371_to_fp16, x = x_69_cast_fp16)[name = string("hidden_states_23_cast_fp16")]; tensor x_71_cast_fp16 = add(x = x_61_cast_fp16, y = hidden_states_23_cast_fp16)[name = string("x_71_cast_fp16")]; int32 var_1402 = const()[name = string("op_1402"), val = int32(1)]; fp16 const_44_promoted_to_fp16 = const()[name = string("const_44_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_1405_cast_fp16 = mul(x = x_71_cast_fp16, y = const_44_promoted_to_fp16)[name = string("op_1405_cast_fp16")]; bool x_73_interleave_0 = const()[name = string("x_73_interleave_0"), val = bool(false)]; tensor x_73_cast_fp16 = concat(axis = var_1402, interleave = x_73_interleave_0, values = (x_71_cast_fp16, var_1405_cast_fp16))[name = string("x_73_cast_fp16")]; tensor out_49_axes_0 = const()[name = string("out_49_axes_0"), val = tensor([1])]; fp16 var_1415_to_fp16 = const()[name = string("op_1415_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_49_cast_fp16 = layer_norm(axes = out_49_axes_0, epsilon = var_1415_to_fp16, x = x_73_cast_fp16)[name = string("out_49_cast_fp16")]; tensor layer_layers_4_input_layernorm_weight_to_fp16 = const()[name = string("layer_layers_4_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(127973952)))]; tensor out_51_cast_fp16 = mul(x = out_49_cast_fp16, y = layer_layers_4_input_layernorm_weight_to_fp16)[name = string("out_51_cast_fp16")]; tensor var_1421_split_sizes_0 = const()[name = string("op_1421_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_1421_axis_0 = const()[name = string("op_1421_axis_0"), val = int32(1)]; tensor var_1421_cast_fp16_0, tensor var_1421_cast_fp16_1 = split(axis = var_1421_axis_0, split_sizes = var_1421_split_sizes_0, x = out_51_cast_fp16)[name = string("op_1421_cast_fp16")]; string query_states_17_pad_type_0 = const()[name = string("query_states_17_pad_type_0"), val = string("valid")]; tensor query_states_17_strides_0 = const()[name = string("query_states_17_strides_0"), val = tensor([1, 1])]; tensor query_states_17_pad_0 = const()[name = string("query_states_17_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_states_17_dilations_0 = const()[name = string("query_states_17_dilations_0"), val = tensor([1, 1])]; int32 query_states_17_groups_0 = const()[name = string("query_states_17_groups_0"), val = int32(1)]; tensor var_1443_to_fp16 = const()[name = string("op_1443_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(127978112)))]; tensor query_states_17_cast_fp16 = conv(dilations = query_states_17_dilations_0, groups = query_states_17_groups_0, pad = query_states_17_pad_0, pad_type = query_states_17_pad_type_0, strides = query_states_17_strides_0, weight = var_1443_to_fp16, x = var_1421_cast_fp16_0)[name = string("query_states_17_cast_fp16")]; string key_states_17_pad_type_0 = const()[name = string("key_states_17_pad_type_0"), val = string("valid")]; tensor key_states_17_strides_0 = const()[name = string("key_states_17_strides_0"), val = tensor([1, 1])]; tensor key_states_17_pad_0 = const()[name = string("key_states_17_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_states_17_dilations_0 = const()[name = string("key_states_17_dilations_0"), val = tensor([1, 1])]; int32 key_states_17_groups_0 = const()[name = string("key_states_17_groups_0"), val = int32(1)]; tensor var_1454_to_fp16 = const()[name = string("op_1454_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(130075328)))]; tensor key_states_17_cast_fp16 = conv(dilations = key_states_17_dilations_0, groups = key_states_17_groups_0, pad = key_states_17_pad_0, pad_type = key_states_17_pad_type_0, strides = key_states_17_strides_0, weight = var_1454_to_fp16, x = var_1421_cast_fp16_0)[name = string("key_states_17_cast_fp16")]; string value_states_17_pad_type_0 = const()[name = string("value_states_17_pad_type_0"), val = string("valid")]; tensor value_states_17_strides_0 = const()[name = string("value_states_17_strides_0"), val = tensor([1, 1])]; tensor value_states_17_pad_0 = const()[name = string("value_states_17_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_states_17_dilations_0 = const()[name = string("value_states_17_dilations_0"), val = tensor([1, 1])]; int32 value_states_17_groups_0 = const()[name = string("value_states_17_groups_0"), val = int32(1)]; tensor var_1465_to_fp16 = const()[name = string("op_1465_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(130337536)))]; tensor value_states_17_cast_fp16 = conv(dilations = value_states_17_dilations_0, groups = value_states_17_groups_0, pad = value_states_17_pad_0, pad_type = value_states_17_pad_type_0, strides = value_states_17_strides_0, weight = var_1465_to_fp16, x = var_1421_cast_fp16_0)[name = string("value_states_17_cast_fp16")]; tensor var_1473 = const()[name = string("op_1473"), val = tensor([1, 16, 64, 64])]; tensor embed_17_cast_fp16 = reshape(shape = var_1473, x = query_states_17_cast_fp16)[name = string("embed_17_cast_fp16")]; tensor var_1477 = const()[name = string("op_1477"), val = tensor([1, 2, 64, 64])]; tensor var_1478_cast_fp16 = reshape(shape = var_1477, x = key_states_17_cast_fp16)[name = string("op_1478_cast_fp16")]; tensor embed_19_perm_0 = const()[name = string("embed_19_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_1484 = const()[name = string("op_1484"), val = tensor([1, 2, 64, 64])]; tensor var_1485_cast_fp16 = reshape(shape = var_1484, x = value_states_17_cast_fp16)[name = string("op_1485_cast_fp16")]; tensor value_states_19_perm_0 = const()[name = string("value_states_19_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_1489_cast_fp16 = mul(x = embed_17_cast_fp16, y = cos_1_cast_fp16)[name = string("op_1489_cast_fp16")]; tensor var_1490_split_sizes_0 = const()[name = string("op_1490_split_sizes_0"), val = tensor([32, 32])]; int32 var_1490_axis_0 = const()[name = string("op_1490_axis_0"), val = int32(-2)]; tensor var_1490_cast_fp16_0, tensor var_1490_cast_fp16_1 = split(axis = var_1490_axis_0, split_sizes = var_1490_split_sizes_0, x = embed_17_cast_fp16)[name = string("op_1490_cast_fp16")]; fp16 const_47_promoted_to_fp16 = const()[name = string("const_47_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_1492_cast_fp16 = mul(x = var_1490_cast_fp16_1, y = const_47_promoted_to_fp16)[name = string("op_1492_cast_fp16")]; int32 var_1494 = const()[name = string("op_1494"), val = int32(-2)]; bool var_1495_interleave_0 = const()[name = string("op_1495_interleave_0"), val = bool(false)]; tensor var_1495_cast_fp16 = concat(axis = var_1494, interleave = var_1495_interleave_0, values = (var_1492_cast_fp16, var_1490_cast_fp16_0))[name = string("op_1495_cast_fp16")]; tensor var_1496_cast_fp16 = mul(x = var_1495_cast_fp16, y = sin_1_cast_fp16)[name = string("op_1496_cast_fp16")]; tensor query_states_19_cast_fp16 = add(x = var_1489_cast_fp16, y = var_1496_cast_fp16)[name = string("query_states_19_cast_fp16")]; tensor embed_19_cast_fp16 = transpose(perm = embed_19_perm_0, x = var_1478_cast_fp16)[name = string("transpose_59")]; tensor var_1499_cast_fp16 = mul(x = embed_19_cast_fp16, y = cos_cast_fp16)[name = string("op_1499_cast_fp16")]; tensor var_1500_split_sizes_0 = const()[name = string("op_1500_split_sizes_0"), val = tensor([32, 32])]; int32 var_1500_axis_0 = const()[name = string("op_1500_axis_0"), val = int32(-1)]; tensor var_1500_cast_fp16_0, tensor var_1500_cast_fp16_1 = split(axis = var_1500_axis_0, split_sizes = var_1500_split_sizes_0, x = embed_19_cast_fp16)[name = string("op_1500_cast_fp16")]; fp16 const_48_promoted_to_fp16 = const()[name = string("const_48_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_1502_cast_fp16 = mul(x = var_1500_cast_fp16_1, y = const_48_promoted_to_fp16)[name = string("op_1502_cast_fp16")]; int32 var_1504 = const()[name = string("op_1504"), val = int32(-1)]; bool var_1505_interleave_0 = const()[name = string("op_1505_interleave_0"), val = bool(false)]; tensor var_1505_cast_fp16 = concat(axis = var_1504, interleave = var_1505_interleave_0, values = (var_1502_cast_fp16, var_1500_cast_fp16_0))[name = string("op_1505_cast_fp16")]; tensor var_1506_cast_fp16 = mul(x = var_1505_cast_fp16, y = sin_cast_fp16)[name = string("op_1506_cast_fp16")]; tensor key_states_19_cast_fp16 = add(x = var_1499_cast_fp16, y = var_1506_cast_fp16)[name = string("key_states_19_cast_fp16")]; tensor expand_dims_41 = const()[name = string("expand_dims_41"), val = tensor([4])]; tensor expand_dims_42 = const()[name = string("expand_dims_42"), val = tensor([0])]; tensor expand_dims_44 = const()[name = string("expand_dims_44"), val = tensor([5])]; tensor concat_35_values3_0 = const()[name = string("concat_35_values3_0"), val = tensor([0])]; int32 concat_35_axis_0 = const()[name = string("concat_35_axis_0"), val = int32(0)]; bool concat_35_interleave_0 = const()[name = string("concat_35_interleave_0"), val = bool(false)]; tensor concat_35 = concat(axis = concat_35_axis_0, interleave = concat_35_interleave_0, values = (expand_dims_41, expand_dims_42, position_id, concat_35_values3_0))[name = string("concat_35")]; tensor concat_36_values1_0 = const()[name = string("concat_36_values1_0"), val = tensor([0])]; tensor concat_36_values3_0 = const()[name = string("concat_36_values3_0"), val = tensor([0])]; int32 concat_36_axis_0 = const()[name = string("concat_36_axis_0"), val = int32(0)]; bool concat_36_interleave_0 = const()[name = string("concat_36_interleave_0"), val = bool(false)]; tensor concat_36 = concat(axis = concat_36_axis_0, interleave = concat_36_interleave_0, values = (expand_dims_44, concat_36_values1_0, var_426, concat_36_values3_0))[name = string("concat_36")]; tensor key_cache_internal_tensor_assign_5_stride_0 = const()[name = string("key_cache_internal_tensor_assign_5_stride_0"), val = tensor([1, 1, 1, 1])]; tensor key_cache_internal_tensor_assign_5_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_5_begin_mask_0"), val = tensor([false, false, false, true])]; tensor key_cache_internal_tensor_assign_5_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_5_end_mask_0"), val = tensor([false, true, false, true])]; tensor key_cache_internal_tensor_assign_5_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_5_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor key_cache_internal_tensor_assign_5_cast_fp16 = slice_update(begin = concat_35, begin_mask = key_cache_internal_tensor_assign_5_begin_mask_0, end = concat_36, end_mask = key_cache_internal_tensor_assign_5_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_5_squeeze_mask_0, stride = key_cache_internal_tensor_assign_5_stride_0, update = key_states_19_cast_fp16, x = coreml_update_state_54)[name = string("key_cache_internal_tensor_assign_5_cast_fp16")]; write_state(data = key_cache_internal_tensor_assign_5_cast_fp16, input = key_cache)[name = string("coreml_update_state_366_write_state")]; tensor coreml_update_state_56 = read_state(input = key_cache)[name = string("coreml_update_state_366")]; tensor value_cache_internal_tensor_assign_5_stride_0 = const()[name = string("value_cache_internal_tensor_assign_5_stride_0"), val = tensor([1, 1, 1, 1])]; tensor value_cache_internal_tensor_assign_5_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_5_begin_mask_0"), val = tensor([false, false, false, true])]; tensor value_cache_internal_tensor_assign_5_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_5_end_mask_0"), val = tensor([false, true, false, true])]; tensor value_cache_internal_tensor_assign_5_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_5_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor value_states_19_cast_fp16 = transpose(perm = value_states_19_perm_0, x = var_1485_cast_fp16)[name = string("transpose_58")]; tensor value_cache_internal_tensor_assign_5_cast_fp16 = slice_update(begin = concat_35, begin_mask = value_cache_internal_tensor_assign_5_begin_mask_0, end = concat_36, end_mask = value_cache_internal_tensor_assign_5_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_5_squeeze_mask_0, stride = value_cache_internal_tensor_assign_5_stride_0, update = value_states_19_cast_fp16, x = coreml_update_state_55)[name = string("value_cache_internal_tensor_assign_5_cast_fp16")]; write_state(data = value_cache_internal_tensor_assign_5_cast_fp16, input = value_cache)[name = string("coreml_update_state_367_write_state")]; tensor coreml_update_state_57 = read_state(input = value_cache)[name = string("coreml_update_state_367")]; tensor var_1549_begin_0 = const()[name = string("op_1549_begin_0"), val = tensor([4, 0, 0, 0])]; tensor var_1549_end_0 = const()[name = string("op_1549_end_0"), val = tensor([5, 2, 2048, 64])]; tensor var_1549_end_mask_0 = const()[name = string("op_1549_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_1549_cast_fp16 = slice_by_index(begin = var_1549_begin_0, end = var_1549_end_0, end_mask = var_1549_end_mask_0, x = coreml_update_state_56)[name = string("op_1549_cast_fp16")]; tensor tile_8 = const()[name = string("tile_8"), val = tensor([1, 1])]; int32 var_1552_axis_0 = const()[name = string("op_1552_axis_0"), val = int32(1)]; tensor var_1552_cast_fp16_0, tensor var_1552_cast_fp16_1 = split(axis = var_1552_axis_0, split_sizes = tile_8, x = var_1549_cast_fp16)[name = string("op_1552_cast_fp16")]; tensor var_1559_begin_0 = const()[name = string("op_1559_begin_0"), val = tensor([4, 0, 0, 0])]; tensor var_1559_end_0 = const()[name = string("op_1559_end_0"), val = tensor([5, 2, 2048, 64])]; tensor var_1559_end_mask_0 = const()[name = string("op_1559_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_1559_cast_fp16 = slice_by_index(begin = var_1559_begin_0, end = var_1559_end_0, end_mask = var_1559_end_mask_0, x = coreml_update_state_57)[name = string("op_1559_cast_fp16")]; tensor tile_9 = const()[name = string("tile_9"), val = tensor([1, 1])]; int32 var_1562_axis_0 = const()[name = string("op_1562_axis_0"), val = int32(1)]; tensor var_1562_cast_fp16_0, tensor var_1562_cast_fp16_1 = split(axis = var_1562_axis_0, split_sizes = tile_9, x = var_1559_cast_fp16)[name = string("op_1562_cast_fp16")]; tensor var_1565_split_sizes_0 = const()[name = string("op_1565_split_sizes_0"), val = tensor([8, 8])]; int32 var_1565_axis_0 = const()[name = string("op_1565_axis_0"), val = int32(1)]; tensor var_1565_cast_fp16_0, tensor var_1565_cast_fp16_1 = split(axis = var_1565_axis_0, split_sizes = var_1565_split_sizes_0, x = query_states_19_cast_fp16)[name = string("op_1565_cast_fp16")]; bool attn_weights_65_transpose_x_0 = const()[name = string("attn_weights_65_transpose_x_0"), val = bool(false)]; bool attn_weights_65_transpose_y_0 = const()[name = string("attn_weights_65_transpose_y_0"), val = bool(false)]; tensor attn_weights_65_cast_fp16 = matmul(transpose_x = attn_weights_65_transpose_x_0, transpose_y = attn_weights_65_transpose_y_0, x = var_1552_cast_fp16_0, y = var_1565_cast_fp16_0)[name = string("attn_weights_65_cast_fp16")]; fp16 _inversed_attn_weights_67_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_67_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_67_cast_fp16 = mul(x = attn_weights_65_cast_fp16, y = _inversed_attn_weights_67_y_0_to_fp16)[name = string("_inversed_attn_weights_67_cast_fp16")]; tensor attn_weights_69_cast_fp16 = add(x = _inversed_attn_weights_67_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_69_cast_fp16")]; int32 var_1572 = const()[name = string("op_1572"), val = int32(2)]; tensor attn_weights_71_cast_fp16 = softmax(axis = var_1572, x = attn_weights_69_cast_fp16)[name = string("attn_weights_71_cast_fp16")]; bool var_1578_transpose_x_1 = const()[name = string("op_1578_transpose_x_1"), val = bool(true)]; bool var_1578_transpose_y_1 = const()[name = string("op_1578_transpose_y_1"), val = bool(false)]; tensor var_1578_cast_fp16 = matmul(transpose_x = var_1578_transpose_x_1, transpose_y = var_1578_transpose_y_1, x = attn_weights_71_cast_fp16, y = var_1562_cast_fp16_0)[name = string("op_1578_cast_fp16")]; bool attn_weights_73_transpose_x_0 = const()[name = string("attn_weights_73_transpose_x_0"), val = bool(false)]; bool attn_weights_73_transpose_y_0 = const()[name = string("attn_weights_73_transpose_y_0"), val = bool(false)]; tensor attn_weights_73_cast_fp16 = matmul(transpose_x = attn_weights_73_transpose_x_0, transpose_y = attn_weights_73_transpose_y_0, x = var_1552_cast_fp16_1, y = var_1565_cast_fp16_1)[name = string("attn_weights_73_cast_fp16")]; fp16 _inversed_attn_weights_75_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_75_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_75_cast_fp16 = mul(x = attn_weights_73_cast_fp16, y = _inversed_attn_weights_75_y_0_to_fp16)[name = string("_inversed_attn_weights_75_cast_fp16")]; tensor attn_weights_77_cast_fp16 = add(x = _inversed_attn_weights_75_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_77_cast_fp16")]; int32 var_1584 = const()[name = string("op_1584"), val = int32(2)]; tensor attn_weights_79_cast_fp16 = softmax(axis = var_1584, x = attn_weights_77_cast_fp16)[name = string("attn_weights_79_cast_fp16")]; bool attn_output_25_transpose_x_1 = const()[name = string("attn_output_25_transpose_x_1"), val = bool(true)]; bool attn_output_25_transpose_y_1 = const()[name = string("attn_output_25_transpose_y_1"), val = bool(false)]; tensor attn_output_25_cast_fp16 = matmul(transpose_x = attn_output_25_transpose_x_1, transpose_y = attn_output_25_transpose_y_1, x = attn_weights_79_cast_fp16, y = var_1562_cast_fp16_1)[name = string("attn_output_25_cast_fp16")]; int32 var_1592 = const()[name = string("op_1592"), val = int32(1)]; bool attn_output_27_interleave_0 = const()[name = string("attn_output_27_interleave_0"), val = bool(false)]; tensor attn_output_27_cast_fp16 = concat(axis = var_1592, interleave = attn_output_27_interleave_0, values = (var_1578_cast_fp16, attn_output_25_cast_fp16))[name = string("attn_output_27_cast_fp16")]; tensor var_1596_perm_0 = const()[name = string("op_1596_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_1601 = const()[name = string("op_1601"), val = tensor([1, 1024, 1, 64])]; tensor var_1596_cast_fp16 = transpose(perm = var_1596_perm_0, x = attn_output_27_cast_fp16)[name = string("transpose_57")]; tensor x_77_cast_fp16 = reshape(shape = var_1601, x = var_1596_cast_fp16)[name = string("x_77_cast_fp16")]; string hidden_states_27_pad_type_0 = const()[name = string("hidden_states_27_pad_type_0"), val = string("valid")]; tensor hidden_states_27_strides_0 = const()[name = string("hidden_states_27_strides_0"), val = tensor([1, 1])]; tensor hidden_states_27_pad_0 = const()[name = string("hidden_states_27_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_27_dilations_0 = const()[name = string("hidden_states_27_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_27_groups_0 = const()[name = string("hidden_states_27_groups_0"), val = int32(1)]; tensor var_1608_to_fp16 = const()[name = string("op_1608_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(130599744)))]; tensor hidden_states_27_cast_fp16 = conv(dilations = hidden_states_27_dilations_0, groups = hidden_states_27_groups_0, pad = hidden_states_27_pad_0, pad_type = hidden_states_27_pad_type_0, strides = hidden_states_27_strides_0, weight = var_1608_to_fp16, x = x_77_cast_fp16)[name = string("hidden_states_27_cast_fp16")]; tensor x_79_cast_fp16 = add(x = x_71_cast_fp16, y = hidden_states_27_cast_fp16)[name = string("x_79_cast_fp16")]; int32 var_1620 = const()[name = string("op_1620"), val = int32(1)]; fp16 const_53_promoted_to_fp16 = const()[name = string("const_53_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_1623_cast_fp16 = mul(x = x_79_cast_fp16, y = const_53_promoted_to_fp16)[name = string("op_1623_cast_fp16")]; bool x_81_interleave_0 = const()[name = string("x_81_interleave_0"), val = bool(false)]; tensor x_81_cast_fp16 = concat(axis = var_1620, interleave = x_81_interleave_0, values = (x_79_cast_fp16, var_1623_cast_fp16))[name = string("x_81_cast_fp16")]; tensor out_55_axes_0 = const()[name = string("out_55_axes_0"), val = tensor([1])]; fp16 var_1633_to_fp16 = const()[name = string("op_1633_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_55_cast_fp16 = layer_norm(axes = out_55_axes_0, epsilon = var_1633_to_fp16, x = x_81_cast_fp16)[name = string("out_55_cast_fp16")]; tensor layer_layers_4_post_attention_layernorm_weight_to_fp16 = const()[name = string("layer_layers_4_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(132696960)))]; tensor out_57_cast_fp16 = mul(x = out_55_cast_fp16, y = layer_layers_4_post_attention_layernorm_weight_to_fp16)[name = string("out_57_cast_fp16")]; tensor var_1639_split_sizes_0 = const()[name = string("op_1639_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_1639_axis_0 = const()[name = string("op_1639_axis_0"), val = int32(1)]; tensor var_1639_cast_fp16_0, tensor var_1639_cast_fp16_1 = split(axis = var_1639_axis_0, split_sizes = var_1639_split_sizes_0, x = out_57_cast_fp16)[name = string("op_1639_cast_fp16")]; string input_9_pad_type_0 = const()[name = string("input_9_pad_type_0"), val = string("valid")]; tensor input_9_strides_0 = const()[name = string("input_9_strides_0"), val = tensor([1, 1])]; tensor input_9_pad_0 = const()[name = string("input_9_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_9_dilations_0 = const()[name = string("input_9_dilations_0"), val = tensor([1, 1])]; int32 input_9_groups_0 = const()[name = string("input_9_groups_0"), val = int32(1)]; tensor var_1644_to_fp16 = const()[name = string("op_1644_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(132701120)))]; tensor input_9_cast_fp16 = conv(dilations = input_9_dilations_0, groups = input_9_groups_0, pad = input_9_pad_0, pad_type = input_9_pad_type_0, strides = input_9_strides_0, weight = var_1644_to_fp16, x = var_1639_cast_fp16_0)[name = string("input_9_cast_fp16")]; tensor var_1655_cast_fp16 = silu(x = input_9_cast_fp16)[name = string("op_1655_cast_fp16")]; string var_1660_pad_type_0 = const()[name = string("op_1660_pad_type_0"), val = string("valid")]; tensor var_1660_strides_0 = const()[name = string("op_1660_strides_0"), val = tensor([1, 1])]; tensor var_1660_pad_0 = const()[name = string("op_1660_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_1660_dilations_0 = const()[name = string("op_1660_dilations_0"), val = tensor([1, 1])]; int32 var_1660_groups_0 = const()[name = string("op_1660_groups_0"), val = int32(1)]; tensor var_1643_to_fp16 = const()[name = string("op_1643_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(141089792)))]; tensor var_1660_cast_fp16 = conv(dilations = var_1660_dilations_0, groups = var_1660_groups_0, pad = var_1660_pad_0, pad_type = var_1660_pad_type_0, strides = var_1660_strides_0, weight = var_1643_to_fp16, x = var_1639_cast_fp16_0)[name = string("op_1660_cast_fp16")]; tensor x_87_cast_fp16 = mul(x = var_1655_cast_fp16, y = var_1660_cast_fp16)[name = string("x_87_cast_fp16")]; string hidden_states_29_pad_type_0 = const()[name = string("hidden_states_29_pad_type_0"), val = string("valid")]; tensor hidden_states_29_strides_0 = const()[name = string("hidden_states_29_strides_0"), val = tensor([1, 1])]; tensor hidden_states_29_pad_0 = const()[name = string("hidden_states_29_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_29_dilations_0 = const()[name = string("hidden_states_29_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_29_groups_0 = const()[name = string("hidden_states_29_groups_0"), val = int32(1)]; tensor var_1642_to_fp16 = const()[name = string("op_1642_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(149478464)))]; tensor hidden_states_29_cast_fp16 = conv(dilations = hidden_states_29_dilations_0, groups = hidden_states_29_groups_0, pad = hidden_states_29_pad_0, pad_type = hidden_states_29_pad_type_0, strides = hidden_states_29_strides_0, weight = var_1642_to_fp16, x = x_87_cast_fp16)[name = string("hidden_states_29_cast_fp16")]; tensor x_89_cast_fp16 = add(x = x_79_cast_fp16, y = hidden_states_29_cast_fp16)[name = string("x_89_cast_fp16")]; int32 var_1673 = const()[name = string("op_1673"), val = int32(1)]; fp16 const_54_promoted_to_fp16 = const()[name = string("const_54_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_1676_cast_fp16 = mul(x = x_89_cast_fp16, y = const_54_promoted_to_fp16)[name = string("op_1676_cast_fp16")]; bool x_91_interleave_0 = const()[name = string("x_91_interleave_0"), val = bool(false)]; tensor x_91_cast_fp16 = concat(axis = var_1673, interleave = x_91_interleave_0, values = (x_89_cast_fp16, var_1676_cast_fp16))[name = string("x_91_cast_fp16")]; tensor out_61_axes_0 = const()[name = string("out_61_axes_0"), val = tensor([1])]; fp16 var_1686_to_fp16 = const()[name = string("op_1686_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_61_cast_fp16 = layer_norm(axes = out_61_axes_0, epsilon = var_1686_to_fp16, x = x_91_cast_fp16)[name = string("out_61_cast_fp16")]; tensor layer_layers_5_input_layernorm_weight_to_fp16 = const()[name = string("layer_layers_5_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(157867136)))]; tensor out_63_cast_fp16 = mul(x = out_61_cast_fp16, y = layer_layers_5_input_layernorm_weight_to_fp16)[name = string("out_63_cast_fp16")]; tensor var_1692_split_sizes_0 = const()[name = string("op_1692_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_1692_axis_0 = const()[name = string("op_1692_axis_0"), val = int32(1)]; tensor var_1692_cast_fp16_0, tensor var_1692_cast_fp16_1 = split(axis = var_1692_axis_0, split_sizes = var_1692_split_sizes_0, x = out_63_cast_fp16)[name = string("op_1692_cast_fp16")]; string query_states_21_pad_type_0 = const()[name = string("query_states_21_pad_type_0"), val = string("valid")]; tensor query_states_21_strides_0 = const()[name = string("query_states_21_strides_0"), val = tensor([1, 1])]; tensor query_states_21_pad_0 = const()[name = string("query_states_21_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_states_21_dilations_0 = const()[name = string("query_states_21_dilations_0"), val = tensor([1, 1])]; int32 query_states_21_groups_0 = const()[name = string("query_states_21_groups_0"), val = int32(1)]; tensor var_1714_to_fp16 = const()[name = string("op_1714_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(157871296)))]; tensor query_states_21_cast_fp16 = conv(dilations = query_states_21_dilations_0, groups = query_states_21_groups_0, pad = query_states_21_pad_0, pad_type = query_states_21_pad_type_0, strides = query_states_21_strides_0, weight = var_1714_to_fp16, x = var_1692_cast_fp16_0)[name = string("query_states_21_cast_fp16")]; string key_states_21_pad_type_0 = const()[name = string("key_states_21_pad_type_0"), val = string("valid")]; tensor key_states_21_strides_0 = const()[name = string("key_states_21_strides_0"), val = tensor([1, 1])]; tensor key_states_21_pad_0 = const()[name = string("key_states_21_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_states_21_dilations_0 = const()[name = string("key_states_21_dilations_0"), val = tensor([1, 1])]; int32 key_states_21_groups_0 = const()[name = string("key_states_21_groups_0"), val = int32(1)]; tensor var_1725_to_fp16 = const()[name = string("op_1725_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(159968512)))]; tensor key_states_21_cast_fp16 = conv(dilations = key_states_21_dilations_0, groups = key_states_21_groups_0, pad = key_states_21_pad_0, pad_type = key_states_21_pad_type_0, strides = key_states_21_strides_0, weight = var_1725_to_fp16, x = var_1692_cast_fp16_0)[name = string("key_states_21_cast_fp16")]; string value_states_21_pad_type_0 = const()[name = string("value_states_21_pad_type_0"), val = string("valid")]; tensor value_states_21_strides_0 = const()[name = string("value_states_21_strides_0"), val = tensor([1, 1])]; tensor value_states_21_pad_0 = const()[name = string("value_states_21_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_states_21_dilations_0 = const()[name = string("value_states_21_dilations_0"), val = tensor([1, 1])]; int32 value_states_21_groups_0 = const()[name = string("value_states_21_groups_0"), val = int32(1)]; tensor var_1736_to_fp16 = const()[name = string("op_1736_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(160230720)))]; tensor value_states_21_cast_fp16 = conv(dilations = value_states_21_dilations_0, groups = value_states_21_groups_0, pad = value_states_21_pad_0, pad_type = value_states_21_pad_type_0, strides = value_states_21_strides_0, weight = var_1736_to_fp16, x = var_1692_cast_fp16_0)[name = string("value_states_21_cast_fp16")]; tensor var_1744 = const()[name = string("op_1744"), val = tensor([1, 16, 64, 64])]; tensor embed_21_cast_fp16 = reshape(shape = var_1744, x = query_states_21_cast_fp16)[name = string("embed_21_cast_fp16")]; tensor var_1748 = const()[name = string("op_1748"), val = tensor([1, 2, 64, 64])]; tensor var_1749_cast_fp16 = reshape(shape = var_1748, x = key_states_21_cast_fp16)[name = string("op_1749_cast_fp16")]; tensor embed_23_perm_0 = const()[name = string("embed_23_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_1755 = const()[name = string("op_1755"), val = tensor([1, 2, 64, 64])]; tensor var_1756_cast_fp16 = reshape(shape = var_1755, x = value_states_21_cast_fp16)[name = string("op_1756_cast_fp16")]; tensor value_states_23_perm_0 = const()[name = string("value_states_23_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_1760_cast_fp16 = mul(x = embed_21_cast_fp16, y = cos_1_cast_fp16)[name = string("op_1760_cast_fp16")]; tensor var_1761_split_sizes_0 = const()[name = string("op_1761_split_sizes_0"), val = tensor([32, 32])]; int32 var_1761_axis_0 = const()[name = string("op_1761_axis_0"), val = int32(-2)]; tensor var_1761_cast_fp16_0, tensor var_1761_cast_fp16_1 = split(axis = var_1761_axis_0, split_sizes = var_1761_split_sizes_0, x = embed_21_cast_fp16)[name = string("op_1761_cast_fp16")]; fp16 const_57_promoted_to_fp16 = const()[name = string("const_57_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_1763_cast_fp16 = mul(x = var_1761_cast_fp16_1, y = const_57_promoted_to_fp16)[name = string("op_1763_cast_fp16")]; int32 var_1765 = const()[name = string("op_1765"), val = int32(-2)]; bool var_1766_interleave_0 = const()[name = string("op_1766_interleave_0"), val = bool(false)]; tensor var_1766_cast_fp16 = concat(axis = var_1765, interleave = var_1766_interleave_0, values = (var_1763_cast_fp16, var_1761_cast_fp16_0))[name = string("op_1766_cast_fp16")]; tensor var_1767_cast_fp16 = mul(x = var_1766_cast_fp16, y = sin_1_cast_fp16)[name = string("op_1767_cast_fp16")]; tensor query_states_23_cast_fp16 = add(x = var_1760_cast_fp16, y = var_1767_cast_fp16)[name = string("query_states_23_cast_fp16")]; tensor embed_23_cast_fp16 = transpose(perm = embed_23_perm_0, x = var_1749_cast_fp16)[name = string("transpose_56")]; tensor var_1770_cast_fp16 = mul(x = embed_23_cast_fp16, y = cos_cast_fp16)[name = string("op_1770_cast_fp16")]; tensor var_1771_split_sizes_0 = const()[name = string("op_1771_split_sizes_0"), val = tensor([32, 32])]; int32 var_1771_axis_0 = const()[name = string("op_1771_axis_0"), val = int32(-1)]; tensor var_1771_cast_fp16_0, tensor var_1771_cast_fp16_1 = split(axis = var_1771_axis_0, split_sizes = var_1771_split_sizes_0, x = embed_23_cast_fp16)[name = string("op_1771_cast_fp16")]; fp16 const_58_promoted_to_fp16 = const()[name = string("const_58_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_1773_cast_fp16 = mul(x = var_1771_cast_fp16_1, y = const_58_promoted_to_fp16)[name = string("op_1773_cast_fp16")]; int32 var_1775 = const()[name = string("op_1775"), val = int32(-1)]; bool var_1776_interleave_0 = const()[name = string("op_1776_interleave_0"), val = bool(false)]; tensor var_1776_cast_fp16 = concat(axis = var_1775, interleave = var_1776_interleave_0, values = (var_1773_cast_fp16, var_1771_cast_fp16_0))[name = string("op_1776_cast_fp16")]; tensor var_1777_cast_fp16 = mul(x = var_1776_cast_fp16, y = sin_cast_fp16)[name = string("op_1777_cast_fp16")]; tensor key_states_23_cast_fp16 = add(x = var_1770_cast_fp16, y = var_1777_cast_fp16)[name = string("key_states_23_cast_fp16")]; tensor expand_dims_51 = const()[name = string("expand_dims_51"), val = tensor([5])]; tensor expand_dims_52 = const()[name = string("expand_dims_52"), val = tensor([0])]; tensor expand_dims_54 = const()[name = string("expand_dims_54"), val = tensor([6])]; tensor concat_43_values3_0 = const()[name = string("concat_43_values3_0"), val = tensor([0])]; int32 concat_43_axis_0 = const()[name = string("concat_43_axis_0"), val = int32(0)]; bool concat_43_interleave_0 = const()[name = string("concat_43_interleave_0"), val = bool(false)]; tensor concat_43 = concat(axis = concat_43_axis_0, interleave = concat_43_interleave_0, values = (expand_dims_51, expand_dims_52, position_id, concat_43_values3_0))[name = string("concat_43")]; tensor concat_44_values1_0 = const()[name = string("concat_44_values1_0"), val = tensor([0])]; tensor concat_44_values3_0 = const()[name = string("concat_44_values3_0"), val = tensor([0])]; int32 concat_44_axis_0 = const()[name = string("concat_44_axis_0"), val = int32(0)]; bool concat_44_interleave_0 = const()[name = string("concat_44_interleave_0"), val = bool(false)]; tensor concat_44 = concat(axis = concat_44_axis_0, interleave = concat_44_interleave_0, values = (expand_dims_54, concat_44_values1_0, var_426, concat_44_values3_0))[name = string("concat_44")]; tensor key_cache_internal_tensor_assign_6_stride_0 = const()[name = string("key_cache_internal_tensor_assign_6_stride_0"), val = tensor([1, 1, 1, 1])]; tensor key_cache_internal_tensor_assign_6_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_6_begin_mask_0"), val = tensor([false, false, false, true])]; tensor key_cache_internal_tensor_assign_6_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_6_end_mask_0"), val = tensor([false, true, false, true])]; tensor key_cache_internal_tensor_assign_6_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_6_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor key_cache_internal_tensor_assign_6_cast_fp16 = slice_update(begin = concat_43, begin_mask = key_cache_internal_tensor_assign_6_begin_mask_0, end = concat_44, end_mask = key_cache_internal_tensor_assign_6_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_6_squeeze_mask_0, stride = key_cache_internal_tensor_assign_6_stride_0, update = key_states_23_cast_fp16, x = coreml_update_state_56)[name = string("key_cache_internal_tensor_assign_6_cast_fp16")]; write_state(data = key_cache_internal_tensor_assign_6_cast_fp16, input = key_cache)[name = string("coreml_update_state_368_write_state")]; tensor coreml_update_state_58 = read_state(input = key_cache)[name = string("coreml_update_state_368")]; tensor value_cache_internal_tensor_assign_6_stride_0 = const()[name = string("value_cache_internal_tensor_assign_6_stride_0"), val = tensor([1, 1, 1, 1])]; tensor value_cache_internal_tensor_assign_6_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_6_begin_mask_0"), val = tensor([false, false, false, true])]; tensor value_cache_internal_tensor_assign_6_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_6_end_mask_0"), val = tensor([false, true, false, true])]; tensor value_cache_internal_tensor_assign_6_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_6_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor value_states_23_cast_fp16 = transpose(perm = value_states_23_perm_0, x = var_1756_cast_fp16)[name = string("transpose_55")]; tensor value_cache_internal_tensor_assign_6_cast_fp16 = slice_update(begin = concat_43, begin_mask = value_cache_internal_tensor_assign_6_begin_mask_0, end = concat_44, end_mask = value_cache_internal_tensor_assign_6_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_6_squeeze_mask_0, stride = value_cache_internal_tensor_assign_6_stride_0, update = value_states_23_cast_fp16, x = coreml_update_state_57)[name = string("value_cache_internal_tensor_assign_6_cast_fp16")]; write_state(data = value_cache_internal_tensor_assign_6_cast_fp16, input = value_cache)[name = string("coreml_update_state_369_write_state")]; tensor coreml_update_state_59 = read_state(input = value_cache)[name = string("coreml_update_state_369")]; tensor var_1820_begin_0 = const()[name = string("op_1820_begin_0"), val = tensor([5, 0, 0, 0])]; tensor var_1820_end_0 = const()[name = string("op_1820_end_0"), val = tensor([6, 2, 2048, 64])]; tensor var_1820_end_mask_0 = const()[name = string("op_1820_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_1820_cast_fp16 = slice_by_index(begin = var_1820_begin_0, end = var_1820_end_0, end_mask = var_1820_end_mask_0, x = coreml_update_state_58)[name = string("op_1820_cast_fp16")]; tensor tile_10 = const()[name = string("tile_10"), val = tensor([1, 1])]; int32 var_1823_axis_0 = const()[name = string("op_1823_axis_0"), val = int32(1)]; tensor var_1823_cast_fp16_0, tensor var_1823_cast_fp16_1 = split(axis = var_1823_axis_0, split_sizes = tile_10, x = var_1820_cast_fp16)[name = string("op_1823_cast_fp16")]; tensor var_1830_begin_0 = const()[name = string("op_1830_begin_0"), val = tensor([5, 0, 0, 0])]; tensor var_1830_end_0 = const()[name = string("op_1830_end_0"), val = tensor([6, 2, 2048, 64])]; tensor var_1830_end_mask_0 = const()[name = string("op_1830_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_1830_cast_fp16 = slice_by_index(begin = var_1830_begin_0, end = var_1830_end_0, end_mask = var_1830_end_mask_0, x = coreml_update_state_59)[name = string("op_1830_cast_fp16")]; tensor tile_11 = const()[name = string("tile_11"), val = tensor([1, 1])]; int32 var_1833_axis_0 = const()[name = string("op_1833_axis_0"), val = int32(1)]; tensor var_1833_cast_fp16_0, tensor var_1833_cast_fp16_1 = split(axis = var_1833_axis_0, split_sizes = tile_11, x = var_1830_cast_fp16)[name = string("op_1833_cast_fp16")]; tensor var_1836_split_sizes_0 = const()[name = string("op_1836_split_sizes_0"), val = tensor([8, 8])]; int32 var_1836_axis_0 = const()[name = string("op_1836_axis_0"), val = int32(1)]; tensor var_1836_cast_fp16_0, tensor var_1836_cast_fp16_1 = split(axis = var_1836_axis_0, split_sizes = var_1836_split_sizes_0, x = query_states_23_cast_fp16)[name = string("op_1836_cast_fp16")]; bool attn_weights_81_transpose_x_0 = const()[name = string("attn_weights_81_transpose_x_0"), val = bool(false)]; bool attn_weights_81_transpose_y_0 = const()[name = string("attn_weights_81_transpose_y_0"), val = bool(false)]; tensor attn_weights_81_cast_fp16 = matmul(transpose_x = attn_weights_81_transpose_x_0, transpose_y = attn_weights_81_transpose_y_0, x = var_1823_cast_fp16_0, y = var_1836_cast_fp16_0)[name = string("attn_weights_81_cast_fp16")]; fp16 _inversed_attn_weights_83_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_83_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_83_cast_fp16 = mul(x = attn_weights_81_cast_fp16, y = _inversed_attn_weights_83_y_0_to_fp16)[name = string("_inversed_attn_weights_83_cast_fp16")]; tensor attn_weights_85_cast_fp16 = add(x = _inversed_attn_weights_83_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_85_cast_fp16")]; int32 var_1843 = const()[name = string("op_1843"), val = int32(2)]; tensor attn_weights_87_cast_fp16 = softmax(axis = var_1843, x = attn_weights_85_cast_fp16)[name = string("attn_weights_87_cast_fp16")]; bool var_1849_transpose_x_1 = const()[name = string("op_1849_transpose_x_1"), val = bool(true)]; bool var_1849_transpose_y_1 = const()[name = string("op_1849_transpose_y_1"), val = bool(false)]; tensor var_1849_cast_fp16 = matmul(transpose_x = var_1849_transpose_x_1, transpose_y = var_1849_transpose_y_1, x = attn_weights_87_cast_fp16, y = var_1833_cast_fp16_0)[name = string("op_1849_cast_fp16")]; bool attn_weights_89_transpose_x_0 = const()[name = string("attn_weights_89_transpose_x_0"), val = bool(false)]; bool attn_weights_89_transpose_y_0 = const()[name = string("attn_weights_89_transpose_y_0"), val = bool(false)]; tensor attn_weights_89_cast_fp16 = matmul(transpose_x = attn_weights_89_transpose_x_0, transpose_y = attn_weights_89_transpose_y_0, x = var_1823_cast_fp16_1, y = var_1836_cast_fp16_1)[name = string("attn_weights_89_cast_fp16")]; fp16 _inversed_attn_weights_91_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_91_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_91_cast_fp16 = mul(x = attn_weights_89_cast_fp16, y = _inversed_attn_weights_91_y_0_to_fp16)[name = string("_inversed_attn_weights_91_cast_fp16")]; tensor attn_weights_93_cast_fp16 = add(x = _inversed_attn_weights_91_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_93_cast_fp16")]; int32 var_1855 = const()[name = string("op_1855"), val = int32(2)]; tensor attn_weights_95_cast_fp16 = softmax(axis = var_1855, x = attn_weights_93_cast_fp16)[name = string("attn_weights_95_cast_fp16")]; bool attn_output_31_transpose_x_1 = const()[name = string("attn_output_31_transpose_x_1"), val = bool(true)]; bool attn_output_31_transpose_y_1 = const()[name = string("attn_output_31_transpose_y_1"), val = bool(false)]; tensor attn_output_31_cast_fp16 = matmul(transpose_x = attn_output_31_transpose_x_1, transpose_y = attn_output_31_transpose_y_1, x = attn_weights_95_cast_fp16, y = var_1833_cast_fp16_1)[name = string("attn_output_31_cast_fp16")]; int32 var_1863 = const()[name = string("op_1863"), val = int32(1)]; bool attn_output_33_interleave_0 = const()[name = string("attn_output_33_interleave_0"), val = bool(false)]; tensor attn_output_33_cast_fp16 = concat(axis = var_1863, interleave = attn_output_33_interleave_0, values = (var_1849_cast_fp16, attn_output_31_cast_fp16))[name = string("attn_output_33_cast_fp16")]; tensor var_1867_perm_0 = const()[name = string("op_1867_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_1872 = const()[name = string("op_1872"), val = tensor([1, 1024, 1, 64])]; tensor var_1867_cast_fp16 = transpose(perm = var_1867_perm_0, x = attn_output_33_cast_fp16)[name = string("transpose_54")]; tensor x_95_cast_fp16 = reshape(shape = var_1872, x = var_1867_cast_fp16)[name = string("x_95_cast_fp16")]; string hidden_states_33_pad_type_0 = const()[name = string("hidden_states_33_pad_type_0"), val = string("valid")]; tensor hidden_states_33_strides_0 = const()[name = string("hidden_states_33_strides_0"), val = tensor([1, 1])]; tensor hidden_states_33_pad_0 = const()[name = string("hidden_states_33_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_33_dilations_0 = const()[name = string("hidden_states_33_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_33_groups_0 = const()[name = string("hidden_states_33_groups_0"), val = int32(1)]; tensor var_1879_to_fp16 = const()[name = string("op_1879_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(160492928)))]; tensor hidden_states_33_cast_fp16 = conv(dilations = hidden_states_33_dilations_0, groups = hidden_states_33_groups_0, pad = hidden_states_33_pad_0, pad_type = hidden_states_33_pad_type_0, strides = hidden_states_33_strides_0, weight = var_1879_to_fp16, x = x_95_cast_fp16)[name = string("hidden_states_33_cast_fp16")]; tensor x_97_cast_fp16 = add(x = x_89_cast_fp16, y = hidden_states_33_cast_fp16)[name = string("x_97_cast_fp16")]; int32 var_1891 = const()[name = string("op_1891"), val = int32(1)]; fp16 const_63_promoted_to_fp16 = const()[name = string("const_63_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_1894_cast_fp16 = mul(x = x_97_cast_fp16, y = const_63_promoted_to_fp16)[name = string("op_1894_cast_fp16")]; bool x_99_interleave_0 = const()[name = string("x_99_interleave_0"), val = bool(false)]; tensor x_99_cast_fp16 = concat(axis = var_1891, interleave = x_99_interleave_0, values = (x_97_cast_fp16, var_1894_cast_fp16))[name = string("x_99_cast_fp16")]; tensor out_67_axes_0 = const()[name = string("out_67_axes_0"), val = tensor([1])]; fp16 var_1904_to_fp16 = const()[name = string("op_1904_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_67_cast_fp16 = layer_norm(axes = out_67_axes_0, epsilon = var_1904_to_fp16, x = x_99_cast_fp16)[name = string("out_67_cast_fp16")]; tensor layer_layers_5_post_attention_layernorm_weight_to_fp16 = const()[name = string("layer_layers_5_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(162590144)))]; tensor out_69_cast_fp16 = mul(x = out_67_cast_fp16, y = layer_layers_5_post_attention_layernorm_weight_to_fp16)[name = string("out_69_cast_fp16")]; tensor var_1910_split_sizes_0 = const()[name = string("op_1910_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_1910_axis_0 = const()[name = string("op_1910_axis_0"), val = int32(1)]; tensor var_1910_cast_fp16_0, tensor var_1910_cast_fp16_1 = split(axis = var_1910_axis_0, split_sizes = var_1910_split_sizes_0, x = out_69_cast_fp16)[name = string("op_1910_cast_fp16")]; string input_11_pad_type_0 = const()[name = string("input_11_pad_type_0"), val = string("valid")]; tensor input_11_strides_0 = const()[name = string("input_11_strides_0"), val = tensor([1, 1])]; tensor input_11_pad_0 = const()[name = string("input_11_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_11_dilations_0 = const()[name = string("input_11_dilations_0"), val = tensor([1, 1])]; int32 input_11_groups_0 = const()[name = string("input_11_groups_0"), val = int32(1)]; tensor var_1915_to_fp16 = const()[name = string("op_1915_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(162594304)))]; tensor input_11_cast_fp16 = conv(dilations = input_11_dilations_0, groups = input_11_groups_0, pad = input_11_pad_0, pad_type = input_11_pad_type_0, strides = input_11_strides_0, weight = var_1915_to_fp16, x = var_1910_cast_fp16_0)[name = string("input_11_cast_fp16")]; tensor var_1926_cast_fp16 = silu(x = input_11_cast_fp16)[name = string("op_1926_cast_fp16")]; string var_1931_pad_type_0 = const()[name = string("op_1931_pad_type_0"), val = string("valid")]; tensor var_1931_strides_0 = const()[name = string("op_1931_strides_0"), val = tensor([1, 1])]; tensor var_1931_pad_0 = const()[name = string("op_1931_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_1931_dilations_0 = const()[name = string("op_1931_dilations_0"), val = tensor([1, 1])]; int32 var_1931_groups_0 = const()[name = string("op_1931_groups_0"), val = int32(1)]; tensor var_1914_to_fp16 = const()[name = string("op_1914_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(170982976)))]; tensor var_1931_cast_fp16 = conv(dilations = var_1931_dilations_0, groups = var_1931_groups_0, pad = var_1931_pad_0, pad_type = var_1931_pad_type_0, strides = var_1931_strides_0, weight = var_1914_to_fp16, x = var_1910_cast_fp16_0)[name = string("op_1931_cast_fp16")]; tensor x_105_cast_fp16 = mul(x = var_1926_cast_fp16, y = var_1931_cast_fp16)[name = string("x_105_cast_fp16")]; string hidden_states_35_pad_type_0 = const()[name = string("hidden_states_35_pad_type_0"), val = string("valid")]; tensor hidden_states_35_strides_0 = const()[name = string("hidden_states_35_strides_0"), val = tensor([1, 1])]; tensor hidden_states_35_pad_0 = const()[name = string("hidden_states_35_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_35_dilations_0 = const()[name = string("hidden_states_35_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_35_groups_0 = const()[name = string("hidden_states_35_groups_0"), val = int32(1)]; tensor var_1913_to_fp16 = const()[name = string("op_1913_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(179371648)))]; tensor hidden_states_35_cast_fp16 = conv(dilations = hidden_states_35_dilations_0, groups = hidden_states_35_groups_0, pad = hidden_states_35_pad_0, pad_type = hidden_states_35_pad_type_0, strides = hidden_states_35_strides_0, weight = var_1913_to_fp16, x = x_105_cast_fp16)[name = string("hidden_states_35_cast_fp16")]; tensor x_107_cast_fp16 = add(x = x_97_cast_fp16, y = hidden_states_35_cast_fp16)[name = string("x_107_cast_fp16")]; int32 var_1944 = const()[name = string("op_1944"), val = int32(1)]; fp16 const_64_promoted_to_fp16 = const()[name = string("const_64_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_1947_cast_fp16 = mul(x = x_107_cast_fp16, y = const_64_promoted_to_fp16)[name = string("op_1947_cast_fp16")]; bool x_109_interleave_0 = const()[name = string("x_109_interleave_0"), val = bool(false)]; tensor x_109_cast_fp16 = concat(axis = var_1944, interleave = x_109_interleave_0, values = (x_107_cast_fp16, var_1947_cast_fp16))[name = string("x_109_cast_fp16")]; tensor out_73_axes_0 = const()[name = string("out_73_axes_0"), val = tensor([1])]; fp16 var_1957_to_fp16 = const()[name = string("op_1957_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_73_cast_fp16 = layer_norm(axes = out_73_axes_0, epsilon = var_1957_to_fp16, x = x_109_cast_fp16)[name = string("out_73_cast_fp16")]; tensor layer_layers_6_input_layernorm_weight_to_fp16 = const()[name = string("layer_layers_6_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(187760320)))]; tensor out_75_cast_fp16 = mul(x = out_73_cast_fp16, y = layer_layers_6_input_layernorm_weight_to_fp16)[name = string("out_75_cast_fp16")]; tensor var_1963_split_sizes_0 = const()[name = string("op_1963_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_1963_axis_0 = const()[name = string("op_1963_axis_0"), val = int32(1)]; tensor var_1963_cast_fp16_0, tensor var_1963_cast_fp16_1 = split(axis = var_1963_axis_0, split_sizes = var_1963_split_sizes_0, x = out_75_cast_fp16)[name = string("op_1963_cast_fp16")]; string query_states_25_pad_type_0 = const()[name = string("query_states_25_pad_type_0"), val = string("valid")]; tensor query_states_25_strides_0 = const()[name = string("query_states_25_strides_0"), val = tensor([1, 1])]; tensor query_states_25_pad_0 = const()[name = string("query_states_25_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_states_25_dilations_0 = const()[name = string("query_states_25_dilations_0"), val = tensor([1, 1])]; int32 query_states_25_groups_0 = const()[name = string("query_states_25_groups_0"), val = int32(1)]; tensor var_1985_to_fp16 = const()[name = string("op_1985_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(187764480)))]; tensor query_states_25_cast_fp16 = conv(dilations = query_states_25_dilations_0, groups = query_states_25_groups_0, pad = query_states_25_pad_0, pad_type = query_states_25_pad_type_0, strides = query_states_25_strides_0, weight = var_1985_to_fp16, x = var_1963_cast_fp16_0)[name = string("query_states_25_cast_fp16")]; string key_states_25_pad_type_0 = const()[name = string("key_states_25_pad_type_0"), val = string("valid")]; tensor key_states_25_strides_0 = const()[name = string("key_states_25_strides_0"), val = tensor([1, 1])]; tensor key_states_25_pad_0 = const()[name = string("key_states_25_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_states_25_dilations_0 = const()[name = string("key_states_25_dilations_0"), val = tensor([1, 1])]; int32 key_states_25_groups_0 = const()[name = string("key_states_25_groups_0"), val = int32(1)]; tensor var_1996_to_fp16 = const()[name = string("op_1996_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(189861696)))]; tensor key_states_25_cast_fp16 = conv(dilations = key_states_25_dilations_0, groups = key_states_25_groups_0, pad = key_states_25_pad_0, pad_type = key_states_25_pad_type_0, strides = key_states_25_strides_0, weight = var_1996_to_fp16, x = var_1963_cast_fp16_0)[name = string("key_states_25_cast_fp16")]; string value_states_25_pad_type_0 = const()[name = string("value_states_25_pad_type_0"), val = string("valid")]; tensor value_states_25_strides_0 = const()[name = string("value_states_25_strides_0"), val = tensor([1, 1])]; tensor value_states_25_pad_0 = const()[name = string("value_states_25_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_states_25_dilations_0 = const()[name = string("value_states_25_dilations_0"), val = tensor([1, 1])]; int32 value_states_25_groups_0 = const()[name = string("value_states_25_groups_0"), val = int32(1)]; tensor var_2007_to_fp16 = const()[name = string("op_2007_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(190123904)))]; tensor value_states_25_cast_fp16 = conv(dilations = value_states_25_dilations_0, groups = value_states_25_groups_0, pad = value_states_25_pad_0, pad_type = value_states_25_pad_type_0, strides = value_states_25_strides_0, weight = var_2007_to_fp16, x = var_1963_cast_fp16_0)[name = string("value_states_25_cast_fp16")]; tensor var_2015 = const()[name = string("op_2015"), val = tensor([1, 16, 64, 64])]; tensor embed_25_cast_fp16 = reshape(shape = var_2015, x = query_states_25_cast_fp16)[name = string("embed_25_cast_fp16")]; tensor var_2019 = const()[name = string("op_2019"), val = tensor([1, 2, 64, 64])]; tensor var_2020_cast_fp16 = reshape(shape = var_2019, x = key_states_25_cast_fp16)[name = string("op_2020_cast_fp16")]; tensor embed_27_perm_0 = const()[name = string("embed_27_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_2026 = const()[name = string("op_2026"), val = tensor([1, 2, 64, 64])]; tensor var_2027_cast_fp16 = reshape(shape = var_2026, x = value_states_25_cast_fp16)[name = string("op_2027_cast_fp16")]; tensor value_states_27_perm_0 = const()[name = string("value_states_27_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_2031_cast_fp16 = mul(x = embed_25_cast_fp16, y = cos_1_cast_fp16)[name = string("op_2031_cast_fp16")]; tensor var_2032_split_sizes_0 = const()[name = string("op_2032_split_sizes_0"), val = tensor([32, 32])]; int32 var_2032_axis_0 = const()[name = string("op_2032_axis_0"), val = int32(-2)]; tensor var_2032_cast_fp16_0, tensor var_2032_cast_fp16_1 = split(axis = var_2032_axis_0, split_sizes = var_2032_split_sizes_0, x = embed_25_cast_fp16)[name = string("op_2032_cast_fp16")]; fp16 const_67_promoted_to_fp16 = const()[name = string("const_67_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_2034_cast_fp16 = mul(x = var_2032_cast_fp16_1, y = const_67_promoted_to_fp16)[name = string("op_2034_cast_fp16")]; int32 var_2036 = const()[name = string("op_2036"), val = int32(-2)]; bool var_2037_interleave_0 = const()[name = string("op_2037_interleave_0"), val = bool(false)]; tensor var_2037_cast_fp16 = concat(axis = var_2036, interleave = var_2037_interleave_0, values = (var_2034_cast_fp16, var_2032_cast_fp16_0))[name = string("op_2037_cast_fp16")]; tensor var_2038_cast_fp16 = mul(x = var_2037_cast_fp16, y = sin_1_cast_fp16)[name = string("op_2038_cast_fp16")]; tensor query_states_27_cast_fp16 = add(x = var_2031_cast_fp16, y = var_2038_cast_fp16)[name = string("query_states_27_cast_fp16")]; tensor embed_27_cast_fp16 = transpose(perm = embed_27_perm_0, x = var_2020_cast_fp16)[name = string("transpose_53")]; tensor var_2041_cast_fp16 = mul(x = embed_27_cast_fp16, y = cos_cast_fp16)[name = string("op_2041_cast_fp16")]; tensor var_2042_split_sizes_0 = const()[name = string("op_2042_split_sizes_0"), val = tensor([32, 32])]; int32 var_2042_axis_0 = const()[name = string("op_2042_axis_0"), val = int32(-1)]; tensor var_2042_cast_fp16_0, tensor var_2042_cast_fp16_1 = split(axis = var_2042_axis_0, split_sizes = var_2042_split_sizes_0, x = embed_27_cast_fp16)[name = string("op_2042_cast_fp16")]; fp16 const_68_promoted_to_fp16 = const()[name = string("const_68_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_2044_cast_fp16 = mul(x = var_2042_cast_fp16_1, y = const_68_promoted_to_fp16)[name = string("op_2044_cast_fp16")]; int32 var_2046 = const()[name = string("op_2046"), val = int32(-1)]; bool var_2047_interleave_0 = const()[name = string("op_2047_interleave_0"), val = bool(false)]; tensor var_2047_cast_fp16 = concat(axis = var_2046, interleave = var_2047_interleave_0, values = (var_2044_cast_fp16, var_2042_cast_fp16_0))[name = string("op_2047_cast_fp16")]; tensor var_2048_cast_fp16 = mul(x = var_2047_cast_fp16, y = sin_cast_fp16)[name = string("op_2048_cast_fp16")]; tensor key_states_27_cast_fp16 = add(x = var_2041_cast_fp16, y = var_2048_cast_fp16)[name = string("key_states_27_cast_fp16")]; tensor expand_dims_61 = const()[name = string("expand_dims_61"), val = tensor([6])]; tensor expand_dims_62 = const()[name = string("expand_dims_62"), val = tensor([0])]; tensor expand_dims_64 = const()[name = string("expand_dims_64"), val = tensor([7])]; tensor concat_51_values3_0 = const()[name = string("concat_51_values3_0"), val = tensor([0])]; int32 concat_51_axis_0 = const()[name = string("concat_51_axis_0"), val = int32(0)]; bool concat_51_interleave_0 = const()[name = string("concat_51_interleave_0"), val = bool(false)]; tensor concat_51 = concat(axis = concat_51_axis_0, interleave = concat_51_interleave_0, values = (expand_dims_61, expand_dims_62, position_id, concat_51_values3_0))[name = string("concat_51")]; tensor concat_52_values1_0 = const()[name = string("concat_52_values1_0"), val = tensor([0])]; tensor concat_52_values3_0 = const()[name = string("concat_52_values3_0"), val = tensor([0])]; int32 concat_52_axis_0 = const()[name = string("concat_52_axis_0"), val = int32(0)]; bool concat_52_interleave_0 = const()[name = string("concat_52_interleave_0"), val = bool(false)]; tensor concat_52 = concat(axis = concat_52_axis_0, interleave = concat_52_interleave_0, values = (expand_dims_64, concat_52_values1_0, var_426, concat_52_values3_0))[name = string("concat_52")]; tensor key_cache_internal_tensor_assign_7_stride_0 = const()[name = string("key_cache_internal_tensor_assign_7_stride_0"), val = tensor([1, 1, 1, 1])]; tensor key_cache_internal_tensor_assign_7_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_7_begin_mask_0"), val = tensor([false, false, false, true])]; tensor key_cache_internal_tensor_assign_7_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_7_end_mask_0"), val = tensor([false, true, false, true])]; tensor key_cache_internal_tensor_assign_7_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_7_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor key_cache_internal_tensor_assign_7_cast_fp16 = slice_update(begin = concat_51, begin_mask = key_cache_internal_tensor_assign_7_begin_mask_0, end = concat_52, end_mask = key_cache_internal_tensor_assign_7_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_7_squeeze_mask_0, stride = key_cache_internal_tensor_assign_7_stride_0, update = key_states_27_cast_fp16, x = coreml_update_state_58)[name = string("key_cache_internal_tensor_assign_7_cast_fp16")]; write_state(data = key_cache_internal_tensor_assign_7_cast_fp16, input = key_cache)[name = string("coreml_update_state_370_write_state")]; tensor coreml_update_state_60 = read_state(input = key_cache)[name = string("coreml_update_state_370")]; tensor value_cache_internal_tensor_assign_7_stride_0 = const()[name = string("value_cache_internal_tensor_assign_7_stride_0"), val = tensor([1, 1, 1, 1])]; tensor value_cache_internal_tensor_assign_7_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_7_begin_mask_0"), val = tensor([false, false, false, true])]; tensor value_cache_internal_tensor_assign_7_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_7_end_mask_0"), val = tensor([false, true, false, true])]; tensor value_cache_internal_tensor_assign_7_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_7_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor value_states_27_cast_fp16 = transpose(perm = value_states_27_perm_0, x = var_2027_cast_fp16)[name = string("transpose_52")]; tensor value_cache_internal_tensor_assign_7_cast_fp16 = slice_update(begin = concat_51, begin_mask = value_cache_internal_tensor_assign_7_begin_mask_0, end = concat_52, end_mask = value_cache_internal_tensor_assign_7_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_7_squeeze_mask_0, stride = value_cache_internal_tensor_assign_7_stride_0, update = value_states_27_cast_fp16, x = coreml_update_state_59)[name = string("value_cache_internal_tensor_assign_7_cast_fp16")]; write_state(data = value_cache_internal_tensor_assign_7_cast_fp16, input = value_cache)[name = string("coreml_update_state_371_write_state")]; tensor coreml_update_state_61 = read_state(input = value_cache)[name = string("coreml_update_state_371")]; tensor var_2091_begin_0 = const()[name = string("op_2091_begin_0"), val = tensor([6, 0, 0, 0])]; tensor var_2091_end_0 = const()[name = string("op_2091_end_0"), val = tensor([7, 2, 2048, 64])]; tensor var_2091_end_mask_0 = const()[name = string("op_2091_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_2091_cast_fp16 = slice_by_index(begin = var_2091_begin_0, end = var_2091_end_0, end_mask = var_2091_end_mask_0, x = coreml_update_state_60)[name = string("op_2091_cast_fp16")]; tensor tile_12 = const()[name = string("tile_12"), val = tensor([1, 1])]; int32 var_2094_axis_0 = const()[name = string("op_2094_axis_0"), val = int32(1)]; tensor var_2094_cast_fp16_0, tensor var_2094_cast_fp16_1 = split(axis = var_2094_axis_0, split_sizes = tile_12, x = var_2091_cast_fp16)[name = string("op_2094_cast_fp16")]; tensor var_2101_begin_0 = const()[name = string("op_2101_begin_0"), val = tensor([6, 0, 0, 0])]; tensor var_2101_end_0 = const()[name = string("op_2101_end_0"), val = tensor([7, 2, 2048, 64])]; tensor var_2101_end_mask_0 = const()[name = string("op_2101_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_2101_cast_fp16 = slice_by_index(begin = var_2101_begin_0, end = var_2101_end_0, end_mask = var_2101_end_mask_0, x = coreml_update_state_61)[name = string("op_2101_cast_fp16")]; tensor tile_13 = const()[name = string("tile_13"), val = tensor([1, 1])]; int32 var_2104_axis_0 = const()[name = string("op_2104_axis_0"), val = int32(1)]; tensor var_2104_cast_fp16_0, tensor var_2104_cast_fp16_1 = split(axis = var_2104_axis_0, split_sizes = tile_13, x = var_2101_cast_fp16)[name = string("op_2104_cast_fp16")]; tensor var_2107_split_sizes_0 = const()[name = string("op_2107_split_sizes_0"), val = tensor([8, 8])]; int32 var_2107_axis_0 = const()[name = string("op_2107_axis_0"), val = int32(1)]; tensor var_2107_cast_fp16_0, tensor var_2107_cast_fp16_1 = split(axis = var_2107_axis_0, split_sizes = var_2107_split_sizes_0, x = query_states_27_cast_fp16)[name = string("op_2107_cast_fp16")]; bool attn_weights_97_transpose_x_0 = const()[name = string("attn_weights_97_transpose_x_0"), val = bool(false)]; bool attn_weights_97_transpose_y_0 = const()[name = string("attn_weights_97_transpose_y_0"), val = bool(false)]; tensor attn_weights_97_cast_fp16 = matmul(transpose_x = attn_weights_97_transpose_x_0, transpose_y = attn_weights_97_transpose_y_0, x = var_2094_cast_fp16_0, y = var_2107_cast_fp16_0)[name = string("attn_weights_97_cast_fp16")]; fp16 _inversed_attn_weights_99_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_99_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_99_cast_fp16 = mul(x = attn_weights_97_cast_fp16, y = _inversed_attn_weights_99_y_0_to_fp16)[name = string("_inversed_attn_weights_99_cast_fp16")]; tensor attn_weights_101_cast_fp16 = add(x = _inversed_attn_weights_99_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_101_cast_fp16")]; int32 var_2114 = const()[name = string("op_2114"), val = int32(2)]; tensor attn_weights_103_cast_fp16 = softmax(axis = var_2114, x = attn_weights_101_cast_fp16)[name = string("attn_weights_103_cast_fp16")]; bool var_2120_transpose_x_1 = const()[name = string("op_2120_transpose_x_1"), val = bool(true)]; bool var_2120_transpose_y_1 = const()[name = string("op_2120_transpose_y_1"), val = bool(false)]; tensor var_2120_cast_fp16 = matmul(transpose_x = var_2120_transpose_x_1, transpose_y = var_2120_transpose_y_1, x = attn_weights_103_cast_fp16, y = var_2104_cast_fp16_0)[name = string("op_2120_cast_fp16")]; bool attn_weights_105_transpose_x_0 = const()[name = string("attn_weights_105_transpose_x_0"), val = bool(false)]; bool attn_weights_105_transpose_y_0 = const()[name = string("attn_weights_105_transpose_y_0"), val = bool(false)]; tensor attn_weights_105_cast_fp16 = matmul(transpose_x = attn_weights_105_transpose_x_0, transpose_y = attn_weights_105_transpose_y_0, x = var_2094_cast_fp16_1, y = var_2107_cast_fp16_1)[name = string("attn_weights_105_cast_fp16")]; fp16 _inversed_attn_weights_107_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_107_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_107_cast_fp16 = mul(x = attn_weights_105_cast_fp16, y = _inversed_attn_weights_107_y_0_to_fp16)[name = string("_inversed_attn_weights_107_cast_fp16")]; tensor attn_weights_109_cast_fp16 = add(x = _inversed_attn_weights_107_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_109_cast_fp16")]; int32 var_2126 = const()[name = string("op_2126"), val = int32(2)]; tensor attn_weights_111_cast_fp16 = softmax(axis = var_2126, x = attn_weights_109_cast_fp16)[name = string("attn_weights_111_cast_fp16")]; bool attn_output_37_transpose_x_1 = const()[name = string("attn_output_37_transpose_x_1"), val = bool(true)]; bool attn_output_37_transpose_y_1 = const()[name = string("attn_output_37_transpose_y_1"), val = bool(false)]; tensor attn_output_37_cast_fp16 = matmul(transpose_x = attn_output_37_transpose_x_1, transpose_y = attn_output_37_transpose_y_1, x = attn_weights_111_cast_fp16, y = var_2104_cast_fp16_1)[name = string("attn_output_37_cast_fp16")]; int32 var_2134 = const()[name = string("op_2134"), val = int32(1)]; bool attn_output_39_interleave_0 = const()[name = string("attn_output_39_interleave_0"), val = bool(false)]; tensor attn_output_39_cast_fp16 = concat(axis = var_2134, interleave = attn_output_39_interleave_0, values = (var_2120_cast_fp16, attn_output_37_cast_fp16))[name = string("attn_output_39_cast_fp16")]; tensor var_2138_perm_0 = const()[name = string("op_2138_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_2143 = const()[name = string("op_2143"), val = tensor([1, 1024, 1, 64])]; tensor var_2138_cast_fp16 = transpose(perm = var_2138_perm_0, x = attn_output_39_cast_fp16)[name = string("transpose_51")]; tensor x_113_cast_fp16 = reshape(shape = var_2143, x = var_2138_cast_fp16)[name = string("x_113_cast_fp16")]; string hidden_states_39_pad_type_0 = const()[name = string("hidden_states_39_pad_type_0"), val = string("valid")]; tensor hidden_states_39_strides_0 = const()[name = string("hidden_states_39_strides_0"), val = tensor([1, 1])]; tensor hidden_states_39_pad_0 = const()[name = string("hidden_states_39_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_39_dilations_0 = const()[name = string("hidden_states_39_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_39_groups_0 = const()[name = string("hidden_states_39_groups_0"), val = int32(1)]; tensor var_2150_to_fp16 = const()[name = string("op_2150_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(190386112)))]; tensor hidden_states_39_cast_fp16 = conv(dilations = hidden_states_39_dilations_0, groups = hidden_states_39_groups_0, pad = hidden_states_39_pad_0, pad_type = hidden_states_39_pad_type_0, strides = hidden_states_39_strides_0, weight = var_2150_to_fp16, x = x_113_cast_fp16)[name = string("hidden_states_39_cast_fp16")]; tensor x_115_cast_fp16 = add(x = x_107_cast_fp16, y = hidden_states_39_cast_fp16)[name = string("x_115_cast_fp16")]; int32 var_2162 = const()[name = string("op_2162"), val = int32(1)]; fp16 const_73_promoted_to_fp16 = const()[name = string("const_73_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_2165_cast_fp16 = mul(x = x_115_cast_fp16, y = const_73_promoted_to_fp16)[name = string("op_2165_cast_fp16")]; bool x_117_interleave_0 = const()[name = string("x_117_interleave_0"), val = bool(false)]; tensor x_117_cast_fp16 = concat(axis = var_2162, interleave = x_117_interleave_0, values = (x_115_cast_fp16, var_2165_cast_fp16))[name = string("x_117_cast_fp16")]; tensor out_79_axes_0 = const()[name = string("out_79_axes_0"), val = tensor([1])]; fp16 var_2175_to_fp16 = const()[name = string("op_2175_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_79_cast_fp16 = layer_norm(axes = out_79_axes_0, epsilon = var_2175_to_fp16, x = x_117_cast_fp16)[name = string("out_79_cast_fp16")]; tensor layer_layers_6_post_attention_layernorm_weight_to_fp16 = const()[name = string("layer_layers_6_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(192483328)))]; tensor out_81_cast_fp16 = mul(x = out_79_cast_fp16, y = layer_layers_6_post_attention_layernorm_weight_to_fp16)[name = string("out_81_cast_fp16")]; tensor var_2181_split_sizes_0 = const()[name = string("op_2181_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_2181_axis_0 = const()[name = string("op_2181_axis_0"), val = int32(1)]; tensor var_2181_cast_fp16_0, tensor var_2181_cast_fp16_1 = split(axis = var_2181_axis_0, split_sizes = var_2181_split_sizes_0, x = out_81_cast_fp16)[name = string("op_2181_cast_fp16")]; string input_13_pad_type_0 = const()[name = string("input_13_pad_type_0"), val = string("valid")]; tensor input_13_strides_0 = const()[name = string("input_13_strides_0"), val = tensor([1, 1])]; tensor input_13_pad_0 = const()[name = string("input_13_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_13_dilations_0 = const()[name = string("input_13_dilations_0"), val = tensor([1, 1])]; int32 input_13_groups_0 = const()[name = string("input_13_groups_0"), val = int32(1)]; tensor var_2186_to_fp16 = const()[name = string("op_2186_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(192487488)))]; tensor input_13_cast_fp16 = conv(dilations = input_13_dilations_0, groups = input_13_groups_0, pad = input_13_pad_0, pad_type = input_13_pad_type_0, strides = input_13_strides_0, weight = var_2186_to_fp16, x = var_2181_cast_fp16_0)[name = string("input_13_cast_fp16")]; tensor var_2197_cast_fp16 = silu(x = input_13_cast_fp16)[name = string("op_2197_cast_fp16")]; string var_2202_pad_type_0 = const()[name = string("op_2202_pad_type_0"), val = string("valid")]; tensor var_2202_strides_0 = const()[name = string("op_2202_strides_0"), val = tensor([1, 1])]; tensor var_2202_pad_0 = const()[name = string("op_2202_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_2202_dilations_0 = const()[name = string("op_2202_dilations_0"), val = tensor([1, 1])]; int32 var_2202_groups_0 = const()[name = string("op_2202_groups_0"), val = int32(1)]; tensor var_2185_to_fp16 = const()[name = string("op_2185_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(200876160)))]; tensor var_2202_cast_fp16 = conv(dilations = var_2202_dilations_0, groups = var_2202_groups_0, pad = var_2202_pad_0, pad_type = var_2202_pad_type_0, strides = var_2202_strides_0, weight = var_2185_to_fp16, x = var_2181_cast_fp16_0)[name = string("op_2202_cast_fp16")]; tensor x_123_cast_fp16 = mul(x = var_2197_cast_fp16, y = var_2202_cast_fp16)[name = string("x_123_cast_fp16")]; string hidden_states_41_pad_type_0 = const()[name = string("hidden_states_41_pad_type_0"), val = string("valid")]; tensor hidden_states_41_strides_0 = const()[name = string("hidden_states_41_strides_0"), val = tensor([1, 1])]; tensor hidden_states_41_pad_0 = const()[name = string("hidden_states_41_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_41_dilations_0 = const()[name = string("hidden_states_41_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_41_groups_0 = const()[name = string("hidden_states_41_groups_0"), val = int32(1)]; tensor var_2184_to_fp16 = const()[name = string("op_2184_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(209264832)))]; tensor hidden_states_41_cast_fp16 = conv(dilations = hidden_states_41_dilations_0, groups = hidden_states_41_groups_0, pad = hidden_states_41_pad_0, pad_type = hidden_states_41_pad_type_0, strides = hidden_states_41_strides_0, weight = var_2184_to_fp16, x = x_123_cast_fp16)[name = string("hidden_states_41_cast_fp16")]; tensor x_125_cast_fp16 = add(x = x_115_cast_fp16, y = hidden_states_41_cast_fp16)[name = string("x_125_cast_fp16")]; int32 var_2215 = const()[name = string("op_2215"), val = int32(1)]; fp16 const_74_promoted_to_fp16 = const()[name = string("const_74_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_2218_cast_fp16 = mul(x = x_125_cast_fp16, y = const_74_promoted_to_fp16)[name = string("op_2218_cast_fp16")]; bool x_127_interleave_0 = const()[name = string("x_127_interleave_0"), val = bool(false)]; tensor x_127_cast_fp16 = concat(axis = var_2215, interleave = x_127_interleave_0, values = (x_125_cast_fp16, var_2218_cast_fp16))[name = string("x_127_cast_fp16")]; tensor out_85_axes_0 = const()[name = string("out_85_axes_0"), val = tensor([1])]; fp16 var_2228_to_fp16 = const()[name = string("op_2228_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_85_cast_fp16 = layer_norm(axes = out_85_axes_0, epsilon = var_2228_to_fp16, x = x_127_cast_fp16)[name = string("out_85_cast_fp16")]; tensor layer_layers_7_input_layernorm_weight_to_fp16 = const()[name = string("layer_layers_7_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(217653504)))]; tensor out_87_cast_fp16 = mul(x = out_85_cast_fp16, y = layer_layers_7_input_layernorm_weight_to_fp16)[name = string("out_87_cast_fp16")]; tensor var_2234_split_sizes_0 = const()[name = string("op_2234_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_2234_axis_0 = const()[name = string("op_2234_axis_0"), val = int32(1)]; tensor var_2234_cast_fp16_0, tensor var_2234_cast_fp16_1 = split(axis = var_2234_axis_0, split_sizes = var_2234_split_sizes_0, x = out_87_cast_fp16)[name = string("op_2234_cast_fp16")]; string query_states_29_pad_type_0 = const()[name = string("query_states_29_pad_type_0"), val = string("valid")]; tensor query_states_29_strides_0 = const()[name = string("query_states_29_strides_0"), val = tensor([1, 1])]; tensor query_states_29_pad_0 = const()[name = string("query_states_29_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_states_29_dilations_0 = const()[name = string("query_states_29_dilations_0"), val = tensor([1, 1])]; int32 query_states_29_groups_0 = const()[name = string("query_states_29_groups_0"), val = int32(1)]; tensor var_2256_to_fp16 = const()[name = string("op_2256_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(217657664)))]; tensor query_states_29_cast_fp16 = conv(dilations = query_states_29_dilations_0, groups = query_states_29_groups_0, pad = query_states_29_pad_0, pad_type = query_states_29_pad_type_0, strides = query_states_29_strides_0, weight = var_2256_to_fp16, x = var_2234_cast_fp16_0)[name = string("query_states_29_cast_fp16")]; string key_states_29_pad_type_0 = const()[name = string("key_states_29_pad_type_0"), val = string("valid")]; tensor key_states_29_strides_0 = const()[name = string("key_states_29_strides_0"), val = tensor([1, 1])]; tensor key_states_29_pad_0 = const()[name = string("key_states_29_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_states_29_dilations_0 = const()[name = string("key_states_29_dilations_0"), val = tensor([1, 1])]; int32 key_states_29_groups_0 = const()[name = string("key_states_29_groups_0"), val = int32(1)]; tensor var_2267_to_fp16 = const()[name = string("op_2267_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(219754880)))]; tensor key_states_29_cast_fp16 = conv(dilations = key_states_29_dilations_0, groups = key_states_29_groups_0, pad = key_states_29_pad_0, pad_type = key_states_29_pad_type_0, strides = key_states_29_strides_0, weight = var_2267_to_fp16, x = var_2234_cast_fp16_0)[name = string("key_states_29_cast_fp16")]; string value_states_29_pad_type_0 = const()[name = string("value_states_29_pad_type_0"), val = string("valid")]; tensor value_states_29_strides_0 = const()[name = string("value_states_29_strides_0"), val = tensor([1, 1])]; tensor value_states_29_pad_0 = const()[name = string("value_states_29_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_states_29_dilations_0 = const()[name = string("value_states_29_dilations_0"), val = tensor([1, 1])]; int32 value_states_29_groups_0 = const()[name = string("value_states_29_groups_0"), val = int32(1)]; tensor var_2278_to_fp16 = const()[name = string("op_2278_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(220017088)))]; tensor value_states_29_cast_fp16 = conv(dilations = value_states_29_dilations_0, groups = value_states_29_groups_0, pad = value_states_29_pad_0, pad_type = value_states_29_pad_type_0, strides = value_states_29_strides_0, weight = var_2278_to_fp16, x = var_2234_cast_fp16_0)[name = string("value_states_29_cast_fp16")]; tensor var_2286 = const()[name = string("op_2286"), val = tensor([1, 16, 64, 64])]; tensor embed_29_cast_fp16 = reshape(shape = var_2286, x = query_states_29_cast_fp16)[name = string("embed_29_cast_fp16")]; tensor var_2290 = const()[name = string("op_2290"), val = tensor([1, 2, 64, 64])]; tensor var_2291_cast_fp16 = reshape(shape = var_2290, x = key_states_29_cast_fp16)[name = string("op_2291_cast_fp16")]; tensor embed_31_perm_0 = const()[name = string("embed_31_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_2297 = const()[name = string("op_2297"), val = tensor([1, 2, 64, 64])]; tensor var_2298_cast_fp16 = reshape(shape = var_2297, x = value_states_29_cast_fp16)[name = string("op_2298_cast_fp16")]; tensor value_states_31_perm_0 = const()[name = string("value_states_31_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_2302_cast_fp16 = mul(x = embed_29_cast_fp16, y = cos_1_cast_fp16)[name = string("op_2302_cast_fp16")]; tensor var_2303_split_sizes_0 = const()[name = string("op_2303_split_sizes_0"), val = tensor([32, 32])]; int32 var_2303_axis_0 = const()[name = string("op_2303_axis_0"), val = int32(-2)]; tensor var_2303_cast_fp16_0, tensor var_2303_cast_fp16_1 = split(axis = var_2303_axis_0, split_sizes = var_2303_split_sizes_0, x = embed_29_cast_fp16)[name = string("op_2303_cast_fp16")]; fp16 const_77_promoted_to_fp16 = const()[name = string("const_77_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_2305_cast_fp16 = mul(x = var_2303_cast_fp16_1, y = const_77_promoted_to_fp16)[name = string("op_2305_cast_fp16")]; int32 var_2307 = const()[name = string("op_2307"), val = int32(-2)]; bool var_2308_interleave_0 = const()[name = string("op_2308_interleave_0"), val = bool(false)]; tensor var_2308_cast_fp16 = concat(axis = var_2307, interleave = var_2308_interleave_0, values = (var_2305_cast_fp16, var_2303_cast_fp16_0))[name = string("op_2308_cast_fp16")]; tensor var_2309_cast_fp16 = mul(x = var_2308_cast_fp16, y = sin_1_cast_fp16)[name = string("op_2309_cast_fp16")]; tensor query_states_31_cast_fp16 = add(x = var_2302_cast_fp16, y = var_2309_cast_fp16)[name = string("query_states_31_cast_fp16")]; tensor embed_31_cast_fp16 = transpose(perm = embed_31_perm_0, x = var_2291_cast_fp16)[name = string("transpose_50")]; tensor var_2312_cast_fp16 = mul(x = embed_31_cast_fp16, y = cos_cast_fp16)[name = string("op_2312_cast_fp16")]; tensor var_2313_split_sizes_0 = const()[name = string("op_2313_split_sizes_0"), val = tensor([32, 32])]; int32 var_2313_axis_0 = const()[name = string("op_2313_axis_0"), val = int32(-1)]; tensor var_2313_cast_fp16_0, tensor var_2313_cast_fp16_1 = split(axis = var_2313_axis_0, split_sizes = var_2313_split_sizes_0, x = embed_31_cast_fp16)[name = string("op_2313_cast_fp16")]; fp16 const_78_promoted_to_fp16 = const()[name = string("const_78_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_2315_cast_fp16 = mul(x = var_2313_cast_fp16_1, y = const_78_promoted_to_fp16)[name = string("op_2315_cast_fp16")]; int32 var_2317 = const()[name = string("op_2317"), val = int32(-1)]; bool var_2318_interleave_0 = const()[name = string("op_2318_interleave_0"), val = bool(false)]; tensor var_2318_cast_fp16 = concat(axis = var_2317, interleave = var_2318_interleave_0, values = (var_2315_cast_fp16, var_2313_cast_fp16_0))[name = string("op_2318_cast_fp16")]; tensor var_2319_cast_fp16 = mul(x = var_2318_cast_fp16, y = sin_cast_fp16)[name = string("op_2319_cast_fp16")]; tensor key_states_31_cast_fp16 = add(x = var_2312_cast_fp16, y = var_2319_cast_fp16)[name = string("key_states_31_cast_fp16")]; tensor expand_dims_71 = const()[name = string("expand_dims_71"), val = tensor([7])]; tensor expand_dims_72 = const()[name = string("expand_dims_72"), val = tensor([0])]; tensor expand_dims_74 = const()[name = string("expand_dims_74"), val = tensor([8])]; tensor concat_59_values3_0 = const()[name = string("concat_59_values3_0"), val = tensor([0])]; int32 concat_59_axis_0 = const()[name = string("concat_59_axis_0"), val = int32(0)]; bool concat_59_interleave_0 = const()[name = string("concat_59_interleave_0"), val = bool(false)]; tensor concat_59 = concat(axis = concat_59_axis_0, interleave = concat_59_interleave_0, values = (expand_dims_71, expand_dims_72, position_id, concat_59_values3_0))[name = string("concat_59")]; tensor concat_60_values1_0 = const()[name = string("concat_60_values1_0"), val = tensor([0])]; tensor concat_60_values3_0 = const()[name = string("concat_60_values3_0"), val = tensor([0])]; int32 concat_60_axis_0 = const()[name = string("concat_60_axis_0"), val = int32(0)]; bool concat_60_interleave_0 = const()[name = string("concat_60_interleave_0"), val = bool(false)]; tensor concat_60 = concat(axis = concat_60_axis_0, interleave = concat_60_interleave_0, values = (expand_dims_74, concat_60_values1_0, var_426, concat_60_values3_0))[name = string("concat_60")]; tensor key_cache_internal_tensor_assign_8_stride_0 = const()[name = string("key_cache_internal_tensor_assign_8_stride_0"), val = tensor([1, 1, 1, 1])]; tensor key_cache_internal_tensor_assign_8_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_8_begin_mask_0"), val = tensor([false, false, false, true])]; tensor key_cache_internal_tensor_assign_8_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_8_end_mask_0"), val = tensor([false, true, false, true])]; tensor key_cache_internal_tensor_assign_8_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_8_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor key_cache_internal_tensor_assign_8_cast_fp16 = slice_update(begin = concat_59, begin_mask = key_cache_internal_tensor_assign_8_begin_mask_0, end = concat_60, end_mask = key_cache_internal_tensor_assign_8_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_8_squeeze_mask_0, stride = key_cache_internal_tensor_assign_8_stride_0, update = key_states_31_cast_fp16, x = coreml_update_state_60)[name = string("key_cache_internal_tensor_assign_8_cast_fp16")]; write_state(data = key_cache_internal_tensor_assign_8_cast_fp16, input = key_cache)[name = string("coreml_update_state_372_write_state")]; tensor coreml_update_state_62 = read_state(input = key_cache)[name = string("coreml_update_state_372")]; tensor value_cache_internal_tensor_assign_8_stride_0 = const()[name = string("value_cache_internal_tensor_assign_8_stride_0"), val = tensor([1, 1, 1, 1])]; tensor value_cache_internal_tensor_assign_8_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_8_begin_mask_0"), val = tensor([false, false, false, true])]; tensor value_cache_internal_tensor_assign_8_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_8_end_mask_0"), val = tensor([false, true, false, true])]; tensor value_cache_internal_tensor_assign_8_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_8_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor value_states_31_cast_fp16 = transpose(perm = value_states_31_perm_0, x = var_2298_cast_fp16)[name = string("transpose_49")]; tensor value_cache_internal_tensor_assign_8_cast_fp16 = slice_update(begin = concat_59, begin_mask = value_cache_internal_tensor_assign_8_begin_mask_0, end = concat_60, end_mask = value_cache_internal_tensor_assign_8_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_8_squeeze_mask_0, stride = value_cache_internal_tensor_assign_8_stride_0, update = value_states_31_cast_fp16, x = coreml_update_state_61)[name = string("value_cache_internal_tensor_assign_8_cast_fp16")]; write_state(data = value_cache_internal_tensor_assign_8_cast_fp16, input = value_cache)[name = string("coreml_update_state_373_write_state")]; tensor coreml_update_state_63 = read_state(input = value_cache)[name = string("coreml_update_state_373")]; tensor var_2362_begin_0 = const()[name = string("op_2362_begin_0"), val = tensor([7, 0, 0, 0])]; tensor var_2362_end_0 = const()[name = string("op_2362_end_0"), val = tensor([8, 2, 2048, 64])]; tensor var_2362_end_mask_0 = const()[name = string("op_2362_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_2362_cast_fp16 = slice_by_index(begin = var_2362_begin_0, end = var_2362_end_0, end_mask = var_2362_end_mask_0, x = coreml_update_state_62)[name = string("op_2362_cast_fp16")]; tensor tile_14 = const()[name = string("tile_14"), val = tensor([1, 1])]; int32 var_2365_axis_0 = const()[name = string("op_2365_axis_0"), val = int32(1)]; tensor var_2365_cast_fp16_0, tensor var_2365_cast_fp16_1 = split(axis = var_2365_axis_0, split_sizes = tile_14, x = var_2362_cast_fp16)[name = string("op_2365_cast_fp16")]; tensor var_2372_begin_0 = const()[name = string("op_2372_begin_0"), val = tensor([7, 0, 0, 0])]; tensor var_2372_end_0 = const()[name = string("op_2372_end_0"), val = tensor([8, 2, 2048, 64])]; tensor var_2372_end_mask_0 = const()[name = string("op_2372_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_2372_cast_fp16 = slice_by_index(begin = var_2372_begin_0, end = var_2372_end_0, end_mask = var_2372_end_mask_0, x = coreml_update_state_63)[name = string("op_2372_cast_fp16")]; tensor tile_15 = const()[name = string("tile_15"), val = tensor([1, 1])]; int32 var_2375_axis_0 = const()[name = string("op_2375_axis_0"), val = int32(1)]; tensor var_2375_cast_fp16_0, tensor var_2375_cast_fp16_1 = split(axis = var_2375_axis_0, split_sizes = tile_15, x = var_2372_cast_fp16)[name = string("op_2375_cast_fp16")]; tensor var_2378_split_sizes_0 = const()[name = string("op_2378_split_sizes_0"), val = tensor([8, 8])]; int32 var_2378_axis_0 = const()[name = string("op_2378_axis_0"), val = int32(1)]; tensor var_2378_cast_fp16_0, tensor var_2378_cast_fp16_1 = split(axis = var_2378_axis_0, split_sizes = var_2378_split_sizes_0, x = query_states_31_cast_fp16)[name = string("op_2378_cast_fp16")]; bool attn_weights_113_transpose_x_0 = const()[name = string("attn_weights_113_transpose_x_0"), val = bool(false)]; bool attn_weights_113_transpose_y_0 = const()[name = string("attn_weights_113_transpose_y_0"), val = bool(false)]; tensor attn_weights_113_cast_fp16 = matmul(transpose_x = attn_weights_113_transpose_x_0, transpose_y = attn_weights_113_transpose_y_0, x = var_2365_cast_fp16_0, y = var_2378_cast_fp16_0)[name = string("attn_weights_113_cast_fp16")]; fp16 _inversed_attn_weights_115_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_115_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_115_cast_fp16 = mul(x = attn_weights_113_cast_fp16, y = _inversed_attn_weights_115_y_0_to_fp16)[name = string("_inversed_attn_weights_115_cast_fp16")]; tensor attn_weights_117_cast_fp16 = add(x = _inversed_attn_weights_115_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_117_cast_fp16")]; int32 var_2385 = const()[name = string("op_2385"), val = int32(2)]; tensor attn_weights_119_cast_fp16 = softmax(axis = var_2385, x = attn_weights_117_cast_fp16)[name = string("attn_weights_119_cast_fp16")]; bool var_2391_transpose_x_1 = const()[name = string("op_2391_transpose_x_1"), val = bool(true)]; bool var_2391_transpose_y_1 = const()[name = string("op_2391_transpose_y_1"), val = bool(false)]; tensor var_2391_cast_fp16 = matmul(transpose_x = var_2391_transpose_x_1, transpose_y = var_2391_transpose_y_1, x = attn_weights_119_cast_fp16, y = var_2375_cast_fp16_0)[name = string("op_2391_cast_fp16")]; bool attn_weights_121_transpose_x_0 = const()[name = string("attn_weights_121_transpose_x_0"), val = bool(false)]; bool attn_weights_121_transpose_y_0 = const()[name = string("attn_weights_121_transpose_y_0"), val = bool(false)]; tensor attn_weights_121_cast_fp16 = matmul(transpose_x = attn_weights_121_transpose_x_0, transpose_y = attn_weights_121_transpose_y_0, x = var_2365_cast_fp16_1, y = var_2378_cast_fp16_1)[name = string("attn_weights_121_cast_fp16")]; fp16 _inversed_attn_weights_123_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_123_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_123_cast_fp16 = mul(x = attn_weights_121_cast_fp16, y = _inversed_attn_weights_123_y_0_to_fp16)[name = string("_inversed_attn_weights_123_cast_fp16")]; tensor attn_weights_125_cast_fp16 = add(x = _inversed_attn_weights_123_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_125_cast_fp16")]; int32 var_2397 = const()[name = string("op_2397"), val = int32(2)]; tensor attn_weights_127_cast_fp16 = softmax(axis = var_2397, x = attn_weights_125_cast_fp16)[name = string("attn_weights_127_cast_fp16")]; bool attn_output_43_transpose_x_1 = const()[name = string("attn_output_43_transpose_x_1"), val = bool(true)]; bool attn_output_43_transpose_y_1 = const()[name = string("attn_output_43_transpose_y_1"), val = bool(false)]; tensor attn_output_43_cast_fp16 = matmul(transpose_x = attn_output_43_transpose_x_1, transpose_y = attn_output_43_transpose_y_1, x = attn_weights_127_cast_fp16, y = var_2375_cast_fp16_1)[name = string("attn_output_43_cast_fp16")]; int32 var_2405 = const()[name = string("op_2405"), val = int32(1)]; bool attn_output_45_interleave_0 = const()[name = string("attn_output_45_interleave_0"), val = bool(false)]; tensor attn_output_45_cast_fp16 = concat(axis = var_2405, interleave = attn_output_45_interleave_0, values = (var_2391_cast_fp16, attn_output_43_cast_fp16))[name = string("attn_output_45_cast_fp16")]; tensor var_2409_perm_0 = const()[name = string("op_2409_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_2414 = const()[name = string("op_2414"), val = tensor([1, 1024, 1, 64])]; tensor var_2409_cast_fp16 = transpose(perm = var_2409_perm_0, x = attn_output_45_cast_fp16)[name = string("transpose_48")]; tensor x_131_cast_fp16 = reshape(shape = var_2414, x = var_2409_cast_fp16)[name = string("x_131_cast_fp16")]; string hidden_states_45_pad_type_0 = const()[name = string("hidden_states_45_pad_type_0"), val = string("valid")]; tensor hidden_states_45_strides_0 = const()[name = string("hidden_states_45_strides_0"), val = tensor([1, 1])]; tensor hidden_states_45_pad_0 = const()[name = string("hidden_states_45_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_45_dilations_0 = const()[name = string("hidden_states_45_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_45_groups_0 = const()[name = string("hidden_states_45_groups_0"), val = int32(1)]; tensor var_2421_to_fp16 = const()[name = string("op_2421_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(220279296)))]; tensor hidden_states_45_cast_fp16 = conv(dilations = hidden_states_45_dilations_0, groups = hidden_states_45_groups_0, pad = hidden_states_45_pad_0, pad_type = hidden_states_45_pad_type_0, strides = hidden_states_45_strides_0, weight = var_2421_to_fp16, x = x_131_cast_fp16)[name = string("hidden_states_45_cast_fp16")]; tensor x_133_cast_fp16 = add(x = x_125_cast_fp16, y = hidden_states_45_cast_fp16)[name = string("x_133_cast_fp16")]; int32 var_2433 = const()[name = string("op_2433"), val = int32(1)]; fp16 const_83_promoted_to_fp16 = const()[name = string("const_83_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_2436_cast_fp16 = mul(x = x_133_cast_fp16, y = const_83_promoted_to_fp16)[name = string("op_2436_cast_fp16")]; bool x_135_interleave_0 = const()[name = string("x_135_interleave_0"), val = bool(false)]; tensor x_135_cast_fp16 = concat(axis = var_2433, interleave = x_135_interleave_0, values = (x_133_cast_fp16, var_2436_cast_fp16))[name = string("x_135_cast_fp16")]; tensor out_91_axes_0 = const()[name = string("out_91_axes_0"), val = tensor([1])]; fp16 var_2446_to_fp16 = const()[name = string("op_2446_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_91_cast_fp16 = layer_norm(axes = out_91_axes_0, epsilon = var_2446_to_fp16, x = x_135_cast_fp16)[name = string("out_91_cast_fp16")]; tensor layer_layers_7_post_attention_layernorm_weight_to_fp16 = const()[name = string("layer_layers_7_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(222376512)))]; tensor out_93_cast_fp16 = mul(x = out_91_cast_fp16, y = layer_layers_7_post_attention_layernorm_weight_to_fp16)[name = string("out_93_cast_fp16")]; tensor var_2452_split_sizes_0 = const()[name = string("op_2452_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_2452_axis_0 = const()[name = string("op_2452_axis_0"), val = int32(1)]; tensor var_2452_cast_fp16_0, tensor var_2452_cast_fp16_1 = split(axis = var_2452_axis_0, split_sizes = var_2452_split_sizes_0, x = out_93_cast_fp16)[name = string("op_2452_cast_fp16")]; string input_15_pad_type_0 = const()[name = string("input_15_pad_type_0"), val = string("valid")]; tensor input_15_strides_0 = const()[name = string("input_15_strides_0"), val = tensor([1, 1])]; tensor input_15_pad_0 = const()[name = string("input_15_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_15_dilations_0 = const()[name = string("input_15_dilations_0"), val = tensor([1, 1])]; int32 input_15_groups_0 = const()[name = string("input_15_groups_0"), val = int32(1)]; tensor var_2457_to_fp16 = const()[name = string("op_2457_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(222380672)))]; tensor input_15_cast_fp16 = conv(dilations = input_15_dilations_0, groups = input_15_groups_0, pad = input_15_pad_0, pad_type = input_15_pad_type_0, strides = input_15_strides_0, weight = var_2457_to_fp16, x = var_2452_cast_fp16_0)[name = string("input_15_cast_fp16")]; tensor var_2468_cast_fp16 = silu(x = input_15_cast_fp16)[name = string("op_2468_cast_fp16")]; string var_2473_pad_type_0 = const()[name = string("op_2473_pad_type_0"), val = string("valid")]; tensor var_2473_strides_0 = const()[name = string("op_2473_strides_0"), val = tensor([1, 1])]; tensor var_2473_pad_0 = const()[name = string("op_2473_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_2473_dilations_0 = const()[name = string("op_2473_dilations_0"), val = tensor([1, 1])]; int32 var_2473_groups_0 = const()[name = string("op_2473_groups_0"), val = int32(1)]; tensor var_2456_to_fp16 = const()[name = string("op_2456_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(230769344)))]; tensor var_2473_cast_fp16 = conv(dilations = var_2473_dilations_0, groups = var_2473_groups_0, pad = var_2473_pad_0, pad_type = var_2473_pad_type_0, strides = var_2473_strides_0, weight = var_2456_to_fp16, x = var_2452_cast_fp16_0)[name = string("op_2473_cast_fp16")]; tensor x_141_cast_fp16 = mul(x = var_2468_cast_fp16, y = var_2473_cast_fp16)[name = string("x_141_cast_fp16")]; string hidden_states_47_pad_type_0 = const()[name = string("hidden_states_47_pad_type_0"), val = string("valid")]; tensor hidden_states_47_strides_0 = const()[name = string("hidden_states_47_strides_0"), val = tensor([1, 1])]; tensor hidden_states_47_pad_0 = const()[name = string("hidden_states_47_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_47_dilations_0 = const()[name = string("hidden_states_47_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_47_groups_0 = const()[name = string("hidden_states_47_groups_0"), val = int32(1)]; tensor var_2455_to_fp16 = const()[name = string("op_2455_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(239158016)))]; tensor hidden_states_47_cast_fp16 = conv(dilations = hidden_states_47_dilations_0, groups = hidden_states_47_groups_0, pad = hidden_states_47_pad_0, pad_type = hidden_states_47_pad_type_0, strides = hidden_states_47_strides_0, weight = var_2455_to_fp16, x = x_141_cast_fp16)[name = string("hidden_states_47_cast_fp16")]; tensor x_143_cast_fp16 = add(x = x_133_cast_fp16, y = hidden_states_47_cast_fp16)[name = string("x_143_cast_fp16")]; int32 var_2486 = const()[name = string("op_2486"), val = int32(1)]; fp16 const_84_promoted_to_fp16 = const()[name = string("const_84_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_2489_cast_fp16 = mul(x = x_143_cast_fp16, y = const_84_promoted_to_fp16)[name = string("op_2489_cast_fp16")]; bool x_145_interleave_0 = const()[name = string("x_145_interleave_0"), val = bool(false)]; tensor x_145_cast_fp16 = concat(axis = var_2486, interleave = x_145_interleave_0, values = (x_143_cast_fp16, var_2489_cast_fp16))[name = string("x_145_cast_fp16")]; tensor out_97_axes_0 = const()[name = string("out_97_axes_0"), val = tensor([1])]; fp16 var_2499_to_fp16 = const()[name = string("op_2499_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_97_cast_fp16 = layer_norm(axes = out_97_axes_0, epsilon = var_2499_to_fp16, x = x_145_cast_fp16)[name = string("out_97_cast_fp16")]; tensor layer_layers_8_input_layernorm_weight_to_fp16 = const()[name = string("layer_layers_8_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(247546688)))]; tensor out_99_cast_fp16 = mul(x = out_97_cast_fp16, y = layer_layers_8_input_layernorm_weight_to_fp16)[name = string("out_99_cast_fp16")]; tensor var_2505_split_sizes_0 = const()[name = string("op_2505_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_2505_axis_0 = const()[name = string("op_2505_axis_0"), val = int32(1)]; tensor var_2505_cast_fp16_0, tensor var_2505_cast_fp16_1 = split(axis = var_2505_axis_0, split_sizes = var_2505_split_sizes_0, x = out_99_cast_fp16)[name = string("op_2505_cast_fp16")]; string query_states_33_pad_type_0 = const()[name = string("query_states_33_pad_type_0"), val = string("valid")]; tensor query_states_33_strides_0 = const()[name = string("query_states_33_strides_0"), val = tensor([1, 1])]; tensor query_states_33_pad_0 = const()[name = string("query_states_33_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_states_33_dilations_0 = const()[name = string("query_states_33_dilations_0"), val = tensor([1, 1])]; int32 query_states_33_groups_0 = const()[name = string("query_states_33_groups_0"), val = int32(1)]; tensor var_2527_to_fp16 = const()[name = string("op_2527_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(247550848)))]; tensor query_states_33_cast_fp16 = conv(dilations = query_states_33_dilations_0, groups = query_states_33_groups_0, pad = query_states_33_pad_0, pad_type = query_states_33_pad_type_0, strides = query_states_33_strides_0, weight = var_2527_to_fp16, x = var_2505_cast_fp16_0)[name = string("query_states_33_cast_fp16")]; string key_states_33_pad_type_0 = const()[name = string("key_states_33_pad_type_0"), val = string("valid")]; tensor key_states_33_strides_0 = const()[name = string("key_states_33_strides_0"), val = tensor([1, 1])]; tensor key_states_33_pad_0 = const()[name = string("key_states_33_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_states_33_dilations_0 = const()[name = string("key_states_33_dilations_0"), val = tensor([1, 1])]; int32 key_states_33_groups_0 = const()[name = string("key_states_33_groups_0"), val = int32(1)]; tensor var_2538_to_fp16 = const()[name = string("op_2538_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(249648064)))]; tensor key_states_33_cast_fp16 = conv(dilations = key_states_33_dilations_0, groups = key_states_33_groups_0, pad = key_states_33_pad_0, pad_type = key_states_33_pad_type_0, strides = key_states_33_strides_0, weight = var_2538_to_fp16, x = var_2505_cast_fp16_0)[name = string("key_states_33_cast_fp16")]; string value_states_33_pad_type_0 = const()[name = string("value_states_33_pad_type_0"), val = string("valid")]; tensor value_states_33_strides_0 = const()[name = string("value_states_33_strides_0"), val = tensor([1, 1])]; tensor value_states_33_pad_0 = const()[name = string("value_states_33_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_states_33_dilations_0 = const()[name = string("value_states_33_dilations_0"), val = tensor([1, 1])]; int32 value_states_33_groups_0 = const()[name = string("value_states_33_groups_0"), val = int32(1)]; tensor var_2549_to_fp16 = const()[name = string("op_2549_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(249910272)))]; tensor value_states_33_cast_fp16 = conv(dilations = value_states_33_dilations_0, groups = value_states_33_groups_0, pad = value_states_33_pad_0, pad_type = value_states_33_pad_type_0, strides = value_states_33_strides_0, weight = var_2549_to_fp16, x = var_2505_cast_fp16_0)[name = string("value_states_33_cast_fp16")]; tensor var_2557 = const()[name = string("op_2557"), val = tensor([1, 16, 64, 64])]; tensor embed_33_cast_fp16 = reshape(shape = var_2557, x = query_states_33_cast_fp16)[name = string("embed_33_cast_fp16")]; tensor var_2561 = const()[name = string("op_2561"), val = tensor([1, 2, 64, 64])]; tensor var_2562_cast_fp16 = reshape(shape = var_2561, x = key_states_33_cast_fp16)[name = string("op_2562_cast_fp16")]; tensor embed_35_perm_0 = const()[name = string("embed_35_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_2568 = const()[name = string("op_2568"), val = tensor([1, 2, 64, 64])]; tensor var_2569_cast_fp16 = reshape(shape = var_2568, x = value_states_33_cast_fp16)[name = string("op_2569_cast_fp16")]; tensor value_states_35_perm_0 = const()[name = string("value_states_35_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_2573_cast_fp16 = mul(x = embed_33_cast_fp16, y = cos_1_cast_fp16)[name = string("op_2573_cast_fp16")]; tensor var_2574_split_sizes_0 = const()[name = string("op_2574_split_sizes_0"), val = tensor([32, 32])]; int32 var_2574_axis_0 = const()[name = string("op_2574_axis_0"), val = int32(-2)]; tensor var_2574_cast_fp16_0, tensor var_2574_cast_fp16_1 = split(axis = var_2574_axis_0, split_sizes = var_2574_split_sizes_0, x = embed_33_cast_fp16)[name = string("op_2574_cast_fp16")]; fp16 const_87_promoted_to_fp16 = const()[name = string("const_87_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_2576_cast_fp16 = mul(x = var_2574_cast_fp16_1, y = const_87_promoted_to_fp16)[name = string("op_2576_cast_fp16")]; int32 var_2578 = const()[name = string("op_2578"), val = int32(-2)]; bool var_2579_interleave_0 = const()[name = string("op_2579_interleave_0"), val = bool(false)]; tensor var_2579_cast_fp16 = concat(axis = var_2578, interleave = var_2579_interleave_0, values = (var_2576_cast_fp16, var_2574_cast_fp16_0))[name = string("op_2579_cast_fp16")]; tensor var_2580_cast_fp16 = mul(x = var_2579_cast_fp16, y = sin_1_cast_fp16)[name = string("op_2580_cast_fp16")]; tensor query_states_35_cast_fp16 = add(x = var_2573_cast_fp16, y = var_2580_cast_fp16)[name = string("query_states_35_cast_fp16")]; tensor embed_35_cast_fp16 = transpose(perm = embed_35_perm_0, x = var_2562_cast_fp16)[name = string("transpose_47")]; tensor var_2583_cast_fp16 = mul(x = embed_35_cast_fp16, y = cos_cast_fp16)[name = string("op_2583_cast_fp16")]; tensor var_2584_split_sizes_0 = const()[name = string("op_2584_split_sizes_0"), val = tensor([32, 32])]; int32 var_2584_axis_0 = const()[name = string("op_2584_axis_0"), val = int32(-1)]; tensor var_2584_cast_fp16_0, tensor var_2584_cast_fp16_1 = split(axis = var_2584_axis_0, split_sizes = var_2584_split_sizes_0, x = embed_35_cast_fp16)[name = string("op_2584_cast_fp16")]; fp16 const_88_promoted_to_fp16 = const()[name = string("const_88_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_2586_cast_fp16 = mul(x = var_2584_cast_fp16_1, y = const_88_promoted_to_fp16)[name = string("op_2586_cast_fp16")]; int32 var_2588 = const()[name = string("op_2588"), val = int32(-1)]; bool var_2589_interleave_0 = const()[name = string("op_2589_interleave_0"), val = bool(false)]; tensor var_2589_cast_fp16 = concat(axis = var_2588, interleave = var_2589_interleave_0, values = (var_2586_cast_fp16, var_2584_cast_fp16_0))[name = string("op_2589_cast_fp16")]; tensor var_2590_cast_fp16 = mul(x = var_2589_cast_fp16, y = sin_cast_fp16)[name = string("op_2590_cast_fp16")]; tensor key_states_35_cast_fp16 = add(x = var_2583_cast_fp16, y = var_2590_cast_fp16)[name = string("key_states_35_cast_fp16")]; tensor expand_dims_81 = const()[name = string("expand_dims_81"), val = tensor([8])]; tensor expand_dims_82 = const()[name = string("expand_dims_82"), val = tensor([0])]; tensor expand_dims_84 = const()[name = string("expand_dims_84"), val = tensor([9])]; tensor concat_67_values3_0 = const()[name = string("concat_67_values3_0"), val = tensor([0])]; int32 concat_67_axis_0 = const()[name = string("concat_67_axis_0"), val = int32(0)]; bool concat_67_interleave_0 = const()[name = string("concat_67_interleave_0"), val = bool(false)]; tensor concat_67 = concat(axis = concat_67_axis_0, interleave = concat_67_interleave_0, values = (expand_dims_81, expand_dims_82, position_id, concat_67_values3_0))[name = string("concat_67")]; tensor concat_68_values1_0 = const()[name = string("concat_68_values1_0"), val = tensor([0])]; tensor concat_68_values3_0 = const()[name = string("concat_68_values3_0"), val = tensor([0])]; int32 concat_68_axis_0 = const()[name = string("concat_68_axis_0"), val = int32(0)]; bool concat_68_interleave_0 = const()[name = string("concat_68_interleave_0"), val = bool(false)]; tensor concat_68 = concat(axis = concat_68_axis_0, interleave = concat_68_interleave_0, values = (expand_dims_84, concat_68_values1_0, var_426, concat_68_values3_0))[name = string("concat_68")]; tensor key_cache_internal_tensor_assign_9_stride_0 = const()[name = string("key_cache_internal_tensor_assign_9_stride_0"), val = tensor([1, 1, 1, 1])]; tensor key_cache_internal_tensor_assign_9_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_9_begin_mask_0"), val = tensor([false, false, false, true])]; tensor key_cache_internal_tensor_assign_9_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_9_end_mask_0"), val = tensor([false, true, false, true])]; tensor key_cache_internal_tensor_assign_9_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_9_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor key_cache_internal_tensor_assign_9_cast_fp16 = slice_update(begin = concat_67, begin_mask = key_cache_internal_tensor_assign_9_begin_mask_0, end = concat_68, end_mask = key_cache_internal_tensor_assign_9_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_9_squeeze_mask_0, stride = key_cache_internal_tensor_assign_9_stride_0, update = key_states_35_cast_fp16, x = coreml_update_state_62)[name = string("key_cache_internal_tensor_assign_9_cast_fp16")]; write_state(data = key_cache_internal_tensor_assign_9_cast_fp16, input = key_cache)[name = string("coreml_update_state_374_write_state")]; tensor coreml_update_state_64 = read_state(input = key_cache)[name = string("coreml_update_state_374")]; tensor value_cache_internal_tensor_assign_9_stride_0 = const()[name = string("value_cache_internal_tensor_assign_9_stride_0"), val = tensor([1, 1, 1, 1])]; tensor value_cache_internal_tensor_assign_9_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_9_begin_mask_0"), val = tensor([false, false, false, true])]; tensor value_cache_internal_tensor_assign_9_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_9_end_mask_0"), val = tensor([false, true, false, true])]; tensor value_cache_internal_tensor_assign_9_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_9_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor value_states_35_cast_fp16 = transpose(perm = value_states_35_perm_0, x = var_2569_cast_fp16)[name = string("transpose_46")]; tensor value_cache_internal_tensor_assign_9_cast_fp16 = slice_update(begin = concat_67, begin_mask = value_cache_internal_tensor_assign_9_begin_mask_0, end = concat_68, end_mask = value_cache_internal_tensor_assign_9_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_9_squeeze_mask_0, stride = value_cache_internal_tensor_assign_9_stride_0, update = value_states_35_cast_fp16, x = coreml_update_state_63)[name = string("value_cache_internal_tensor_assign_9_cast_fp16")]; write_state(data = value_cache_internal_tensor_assign_9_cast_fp16, input = value_cache)[name = string("coreml_update_state_375_write_state")]; tensor coreml_update_state_65 = read_state(input = value_cache)[name = string("coreml_update_state_375")]; tensor var_2633_begin_0 = const()[name = string("op_2633_begin_0"), val = tensor([8, 0, 0, 0])]; tensor var_2633_end_0 = const()[name = string("op_2633_end_0"), val = tensor([9, 2, 2048, 64])]; tensor var_2633_end_mask_0 = const()[name = string("op_2633_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_2633_cast_fp16 = slice_by_index(begin = var_2633_begin_0, end = var_2633_end_0, end_mask = var_2633_end_mask_0, x = coreml_update_state_64)[name = string("op_2633_cast_fp16")]; tensor tile_16 = const()[name = string("tile_16"), val = tensor([1, 1])]; int32 var_2636_axis_0 = const()[name = string("op_2636_axis_0"), val = int32(1)]; tensor var_2636_cast_fp16_0, tensor var_2636_cast_fp16_1 = split(axis = var_2636_axis_0, split_sizes = tile_16, x = var_2633_cast_fp16)[name = string("op_2636_cast_fp16")]; tensor var_2643_begin_0 = const()[name = string("op_2643_begin_0"), val = tensor([8, 0, 0, 0])]; tensor var_2643_end_0 = const()[name = string("op_2643_end_0"), val = tensor([9, 2, 2048, 64])]; tensor var_2643_end_mask_0 = const()[name = string("op_2643_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_2643_cast_fp16 = slice_by_index(begin = var_2643_begin_0, end = var_2643_end_0, end_mask = var_2643_end_mask_0, x = coreml_update_state_65)[name = string("op_2643_cast_fp16")]; tensor tile_17 = const()[name = string("tile_17"), val = tensor([1, 1])]; int32 var_2646_axis_0 = const()[name = string("op_2646_axis_0"), val = int32(1)]; tensor var_2646_cast_fp16_0, tensor var_2646_cast_fp16_1 = split(axis = var_2646_axis_0, split_sizes = tile_17, x = var_2643_cast_fp16)[name = string("op_2646_cast_fp16")]; tensor var_2649_split_sizes_0 = const()[name = string("op_2649_split_sizes_0"), val = tensor([8, 8])]; int32 var_2649_axis_0 = const()[name = string("op_2649_axis_0"), val = int32(1)]; tensor var_2649_cast_fp16_0, tensor var_2649_cast_fp16_1 = split(axis = var_2649_axis_0, split_sizes = var_2649_split_sizes_0, x = query_states_35_cast_fp16)[name = string("op_2649_cast_fp16")]; bool attn_weights_129_transpose_x_0 = const()[name = string("attn_weights_129_transpose_x_0"), val = bool(false)]; bool attn_weights_129_transpose_y_0 = const()[name = string("attn_weights_129_transpose_y_0"), val = bool(false)]; tensor attn_weights_129_cast_fp16 = matmul(transpose_x = attn_weights_129_transpose_x_0, transpose_y = attn_weights_129_transpose_y_0, x = var_2636_cast_fp16_0, y = var_2649_cast_fp16_0)[name = string("attn_weights_129_cast_fp16")]; fp16 _inversed_attn_weights_131_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_131_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_131_cast_fp16 = mul(x = attn_weights_129_cast_fp16, y = _inversed_attn_weights_131_y_0_to_fp16)[name = string("_inversed_attn_weights_131_cast_fp16")]; tensor attn_weights_133_cast_fp16 = add(x = _inversed_attn_weights_131_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_133_cast_fp16")]; int32 var_2656 = const()[name = string("op_2656"), val = int32(2)]; tensor attn_weights_135_cast_fp16 = softmax(axis = var_2656, x = attn_weights_133_cast_fp16)[name = string("attn_weights_135_cast_fp16")]; bool var_2662_transpose_x_1 = const()[name = string("op_2662_transpose_x_1"), val = bool(true)]; bool var_2662_transpose_y_1 = const()[name = string("op_2662_transpose_y_1"), val = bool(false)]; tensor var_2662_cast_fp16 = matmul(transpose_x = var_2662_transpose_x_1, transpose_y = var_2662_transpose_y_1, x = attn_weights_135_cast_fp16, y = var_2646_cast_fp16_0)[name = string("op_2662_cast_fp16")]; bool attn_weights_137_transpose_x_0 = const()[name = string("attn_weights_137_transpose_x_0"), val = bool(false)]; bool attn_weights_137_transpose_y_0 = const()[name = string("attn_weights_137_transpose_y_0"), val = bool(false)]; tensor attn_weights_137_cast_fp16 = matmul(transpose_x = attn_weights_137_transpose_x_0, transpose_y = attn_weights_137_transpose_y_0, x = var_2636_cast_fp16_1, y = var_2649_cast_fp16_1)[name = string("attn_weights_137_cast_fp16")]; fp16 _inversed_attn_weights_139_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_139_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_139_cast_fp16 = mul(x = attn_weights_137_cast_fp16, y = _inversed_attn_weights_139_y_0_to_fp16)[name = string("_inversed_attn_weights_139_cast_fp16")]; tensor attn_weights_141_cast_fp16 = add(x = _inversed_attn_weights_139_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_141_cast_fp16")]; int32 var_2668 = const()[name = string("op_2668"), val = int32(2)]; tensor attn_weights_143_cast_fp16 = softmax(axis = var_2668, x = attn_weights_141_cast_fp16)[name = string("attn_weights_143_cast_fp16")]; bool attn_output_49_transpose_x_1 = const()[name = string("attn_output_49_transpose_x_1"), val = bool(true)]; bool attn_output_49_transpose_y_1 = const()[name = string("attn_output_49_transpose_y_1"), val = bool(false)]; tensor attn_output_49_cast_fp16 = matmul(transpose_x = attn_output_49_transpose_x_1, transpose_y = attn_output_49_transpose_y_1, x = attn_weights_143_cast_fp16, y = var_2646_cast_fp16_1)[name = string("attn_output_49_cast_fp16")]; int32 var_2676 = const()[name = string("op_2676"), val = int32(1)]; bool attn_output_51_interleave_0 = const()[name = string("attn_output_51_interleave_0"), val = bool(false)]; tensor attn_output_51_cast_fp16 = concat(axis = var_2676, interleave = attn_output_51_interleave_0, values = (var_2662_cast_fp16, attn_output_49_cast_fp16))[name = string("attn_output_51_cast_fp16")]; tensor var_2680_perm_0 = const()[name = string("op_2680_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_2685 = const()[name = string("op_2685"), val = tensor([1, 1024, 1, 64])]; tensor var_2680_cast_fp16 = transpose(perm = var_2680_perm_0, x = attn_output_51_cast_fp16)[name = string("transpose_45")]; tensor x_149_cast_fp16 = reshape(shape = var_2685, x = var_2680_cast_fp16)[name = string("x_149_cast_fp16")]; string hidden_states_51_pad_type_0 = const()[name = string("hidden_states_51_pad_type_0"), val = string("valid")]; tensor hidden_states_51_strides_0 = const()[name = string("hidden_states_51_strides_0"), val = tensor([1, 1])]; tensor hidden_states_51_pad_0 = const()[name = string("hidden_states_51_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_51_dilations_0 = const()[name = string("hidden_states_51_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_51_groups_0 = const()[name = string("hidden_states_51_groups_0"), val = int32(1)]; tensor var_2692_to_fp16 = const()[name = string("op_2692_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(250172480)))]; tensor hidden_states_51_cast_fp16 = conv(dilations = hidden_states_51_dilations_0, groups = hidden_states_51_groups_0, pad = hidden_states_51_pad_0, pad_type = hidden_states_51_pad_type_0, strides = hidden_states_51_strides_0, weight = var_2692_to_fp16, x = x_149_cast_fp16)[name = string("hidden_states_51_cast_fp16")]; tensor x_151_cast_fp16 = add(x = x_143_cast_fp16, y = hidden_states_51_cast_fp16)[name = string("x_151_cast_fp16")]; int32 var_2704 = const()[name = string("op_2704"), val = int32(1)]; fp16 const_93_promoted_to_fp16 = const()[name = string("const_93_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_2707_cast_fp16 = mul(x = x_151_cast_fp16, y = const_93_promoted_to_fp16)[name = string("op_2707_cast_fp16")]; bool x_153_interleave_0 = const()[name = string("x_153_interleave_0"), val = bool(false)]; tensor x_153_cast_fp16 = concat(axis = var_2704, interleave = x_153_interleave_0, values = (x_151_cast_fp16, var_2707_cast_fp16))[name = string("x_153_cast_fp16")]; tensor out_103_axes_0 = const()[name = string("out_103_axes_0"), val = tensor([1])]; fp16 var_2717_to_fp16 = const()[name = string("op_2717_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_103_cast_fp16 = layer_norm(axes = out_103_axes_0, epsilon = var_2717_to_fp16, x = x_153_cast_fp16)[name = string("out_103_cast_fp16")]; tensor layer_layers_8_post_attention_layernorm_weight_to_fp16 = const()[name = string("layer_layers_8_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(252269696)))]; tensor out_105_cast_fp16 = mul(x = out_103_cast_fp16, y = layer_layers_8_post_attention_layernorm_weight_to_fp16)[name = string("out_105_cast_fp16")]; tensor var_2723_split_sizes_0 = const()[name = string("op_2723_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_2723_axis_0 = const()[name = string("op_2723_axis_0"), val = int32(1)]; tensor var_2723_cast_fp16_0, tensor var_2723_cast_fp16_1 = split(axis = var_2723_axis_0, split_sizes = var_2723_split_sizes_0, x = out_105_cast_fp16)[name = string("op_2723_cast_fp16")]; string input_17_pad_type_0 = const()[name = string("input_17_pad_type_0"), val = string("valid")]; tensor input_17_strides_0 = const()[name = string("input_17_strides_0"), val = tensor([1, 1])]; tensor input_17_pad_0 = const()[name = string("input_17_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_17_dilations_0 = const()[name = string("input_17_dilations_0"), val = tensor([1, 1])]; int32 input_17_groups_0 = const()[name = string("input_17_groups_0"), val = int32(1)]; tensor var_2728_to_fp16 = const()[name = string("op_2728_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(252273856)))]; tensor input_17_cast_fp16 = conv(dilations = input_17_dilations_0, groups = input_17_groups_0, pad = input_17_pad_0, pad_type = input_17_pad_type_0, strides = input_17_strides_0, weight = var_2728_to_fp16, x = var_2723_cast_fp16_0)[name = string("input_17_cast_fp16")]; tensor var_2739_cast_fp16 = silu(x = input_17_cast_fp16)[name = string("op_2739_cast_fp16")]; string var_2744_pad_type_0 = const()[name = string("op_2744_pad_type_0"), val = string("valid")]; tensor var_2744_strides_0 = const()[name = string("op_2744_strides_0"), val = tensor([1, 1])]; tensor var_2744_pad_0 = const()[name = string("op_2744_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_2744_dilations_0 = const()[name = string("op_2744_dilations_0"), val = tensor([1, 1])]; int32 var_2744_groups_0 = const()[name = string("op_2744_groups_0"), val = int32(1)]; tensor var_2727_to_fp16 = const()[name = string("op_2727_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(260662528)))]; tensor var_2744_cast_fp16 = conv(dilations = var_2744_dilations_0, groups = var_2744_groups_0, pad = var_2744_pad_0, pad_type = var_2744_pad_type_0, strides = var_2744_strides_0, weight = var_2727_to_fp16, x = var_2723_cast_fp16_0)[name = string("op_2744_cast_fp16")]; tensor x_159_cast_fp16 = mul(x = var_2739_cast_fp16, y = var_2744_cast_fp16)[name = string("x_159_cast_fp16")]; string hidden_states_53_pad_type_0 = const()[name = string("hidden_states_53_pad_type_0"), val = string("valid")]; tensor hidden_states_53_strides_0 = const()[name = string("hidden_states_53_strides_0"), val = tensor([1, 1])]; tensor hidden_states_53_pad_0 = const()[name = string("hidden_states_53_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_53_dilations_0 = const()[name = string("hidden_states_53_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_53_groups_0 = const()[name = string("hidden_states_53_groups_0"), val = int32(1)]; tensor var_2726_to_fp16 = const()[name = string("op_2726_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(269051200)))]; tensor hidden_states_53_cast_fp16 = conv(dilations = hidden_states_53_dilations_0, groups = hidden_states_53_groups_0, pad = hidden_states_53_pad_0, pad_type = hidden_states_53_pad_type_0, strides = hidden_states_53_strides_0, weight = var_2726_to_fp16, x = x_159_cast_fp16)[name = string("hidden_states_53_cast_fp16")]; tensor x_161_cast_fp16 = add(x = x_151_cast_fp16, y = hidden_states_53_cast_fp16)[name = string("x_161_cast_fp16")]; int32 var_2757 = const()[name = string("op_2757"), val = int32(1)]; fp16 const_94_promoted_to_fp16 = const()[name = string("const_94_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_2760_cast_fp16 = mul(x = x_161_cast_fp16, y = const_94_promoted_to_fp16)[name = string("op_2760_cast_fp16")]; bool x_163_interleave_0 = const()[name = string("x_163_interleave_0"), val = bool(false)]; tensor x_163_cast_fp16 = concat(axis = var_2757, interleave = x_163_interleave_0, values = (x_161_cast_fp16, var_2760_cast_fp16))[name = string("x_163_cast_fp16")]; tensor out_109_axes_0 = const()[name = string("out_109_axes_0"), val = tensor([1])]; fp16 var_2770_to_fp16 = const()[name = string("op_2770_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_109_cast_fp16 = layer_norm(axes = out_109_axes_0, epsilon = var_2770_to_fp16, x = x_163_cast_fp16)[name = string("out_109_cast_fp16")]; tensor layer_layers_9_input_layernorm_weight_to_fp16 = const()[name = string("layer_layers_9_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(277439872)))]; tensor out_111_cast_fp16 = mul(x = out_109_cast_fp16, y = layer_layers_9_input_layernorm_weight_to_fp16)[name = string("out_111_cast_fp16")]; tensor var_2776_split_sizes_0 = const()[name = string("op_2776_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_2776_axis_0 = const()[name = string("op_2776_axis_0"), val = int32(1)]; tensor var_2776_cast_fp16_0, tensor var_2776_cast_fp16_1 = split(axis = var_2776_axis_0, split_sizes = var_2776_split_sizes_0, x = out_111_cast_fp16)[name = string("op_2776_cast_fp16")]; string query_states_37_pad_type_0 = const()[name = string("query_states_37_pad_type_0"), val = string("valid")]; tensor query_states_37_strides_0 = const()[name = string("query_states_37_strides_0"), val = tensor([1, 1])]; tensor query_states_37_pad_0 = const()[name = string("query_states_37_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_states_37_dilations_0 = const()[name = string("query_states_37_dilations_0"), val = tensor([1, 1])]; int32 query_states_37_groups_0 = const()[name = string("query_states_37_groups_0"), val = int32(1)]; tensor var_2798_to_fp16 = const()[name = string("op_2798_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(277444032)))]; tensor query_states_37_cast_fp16 = conv(dilations = query_states_37_dilations_0, groups = query_states_37_groups_0, pad = query_states_37_pad_0, pad_type = query_states_37_pad_type_0, strides = query_states_37_strides_0, weight = var_2798_to_fp16, x = var_2776_cast_fp16_0)[name = string("query_states_37_cast_fp16")]; string key_states_37_pad_type_0 = const()[name = string("key_states_37_pad_type_0"), val = string("valid")]; tensor key_states_37_strides_0 = const()[name = string("key_states_37_strides_0"), val = tensor([1, 1])]; tensor key_states_37_pad_0 = const()[name = string("key_states_37_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_states_37_dilations_0 = const()[name = string("key_states_37_dilations_0"), val = tensor([1, 1])]; int32 key_states_37_groups_0 = const()[name = string("key_states_37_groups_0"), val = int32(1)]; tensor var_2809_to_fp16 = const()[name = string("op_2809_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(279541248)))]; tensor key_states_37_cast_fp16 = conv(dilations = key_states_37_dilations_0, groups = key_states_37_groups_0, pad = key_states_37_pad_0, pad_type = key_states_37_pad_type_0, strides = key_states_37_strides_0, weight = var_2809_to_fp16, x = var_2776_cast_fp16_0)[name = string("key_states_37_cast_fp16")]; string value_states_37_pad_type_0 = const()[name = string("value_states_37_pad_type_0"), val = string("valid")]; tensor value_states_37_strides_0 = const()[name = string("value_states_37_strides_0"), val = tensor([1, 1])]; tensor value_states_37_pad_0 = const()[name = string("value_states_37_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_states_37_dilations_0 = const()[name = string("value_states_37_dilations_0"), val = tensor([1, 1])]; int32 value_states_37_groups_0 = const()[name = string("value_states_37_groups_0"), val = int32(1)]; tensor var_2820_to_fp16 = const()[name = string("op_2820_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(279803456)))]; tensor value_states_37_cast_fp16 = conv(dilations = value_states_37_dilations_0, groups = value_states_37_groups_0, pad = value_states_37_pad_0, pad_type = value_states_37_pad_type_0, strides = value_states_37_strides_0, weight = var_2820_to_fp16, x = var_2776_cast_fp16_0)[name = string("value_states_37_cast_fp16")]; tensor var_2828 = const()[name = string("op_2828"), val = tensor([1, 16, 64, 64])]; tensor embed_37_cast_fp16 = reshape(shape = var_2828, x = query_states_37_cast_fp16)[name = string("embed_37_cast_fp16")]; tensor var_2832 = const()[name = string("op_2832"), val = tensor([1, 2, 64, 64])]; tensor var_2833_cast_fp16 = reshape(shape = var_2832, x = key_states_37_cast_fp16)[name = string("op_2833_cast_fp16")]; tensor embed_39_perm_0 = const()[name = string("embed_39_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_2839 = const()[name = string("op_2839"), val = tensor([1, 2, 64, 64])]; tensor var_2840_cast_fp16 = reshape(shape = var_2839, x = value_states_37_cast_fp16)[name = string("op_2840_cast_fp16")]; tensor value_states_39_perm_0 = const()[name = string("value_states_39_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_2844_cast_fp16 = mul(x = embed_37_cast_fp16, y = cos_1_cast_fp16)[name = string("op_2844_cast_fp16")]; tensor var_2845_split_sizes_0 = const()[name = string("op_2845_split_sizes_0"), val = tensor([32, 32])]; int32 var_2845_axis_0 = const()[name = string("op_2845_axis_0"), val = int32(-2)]; tensor var_2845_cast_fp16_0, tensor var_2845_cast_fp16_1 = split(axis = var_2845_axis_0, split_sizes = var_2845_split_sizes_0, x = embed_37_cast_fp16)[name = string("op_2845_cast_fp16")]; fp16 const_97_promoted_to_fp16 = const()[name = string("const_97_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_2847_cast_fp16 = mul(x = var_2845_cast_fp16_1, y = const_97_promoted_to_fp16)[name = string("op_2847_cast_fp16")]; int32 var_2849 = const()[name = string("op_2849"), val = int32(-2)]; bool var_2850_interleave_0 = const()[name = string("op_2850_interleave_0"), val = bool(false)]; tensor var_2850_cast_fp16 = concat(axis = var_2849, interleave = var_2850_interleave_0, values = (var_2847_cast_fp16, var_2845_cast_fp16_0))[name = string("op_2850_cast_fp16")]; tensor var_2851_cast_fp16 = mul(x = var_2850_cast_fp16, y = sin_1_cast_fp16)[name = string("op_2851_cast_fp16")]; tensor query_states_39_cast_fp16 = add(x = var_2844_cast_fp16, y = var_2851_cast_fp16)[name = string("query_states_39_cast_fp16")]; tensor embed_39_cast_fp16 = transpose(perm = embed_39_perm_0, x = var_2833_cast_fp16)[name = string("transpose_44")]; tensor var_2854_cast_fp16 = mul(x = embed_39_cast_fp16, y = cos_cast_fp16)[name = string("op_2854_cast_fp16")]; tensor var_2855_split_sizes_0 = const()[name = string("op_2855_split_sizes_0"), val = tensor([32, 32])]; int32 var_2855_axis_0 = const()[name = string("op_2855_axis_0"), val = int32(-1)]; tensor var_2855_cast_fp16_0, tensor var_2855_cast_fp16_1 = split(axis = var_2855_axis_0, split_sizes = var_2855_split_sizes_0, x = embed_39_cast_fp16)[name = string("op_2855_cast_fp16")]; fp16 const_98_promoted_to_fp16 = const()[name = string("const_98_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_2857_cast_fp16 = mul(x = var_2855_cast_fp16_1, y = const_98_promoted_to_fp16)[name = string("op_2857_cast_fp16")]; int32 var_2859 = const()[name = string("op_2859"), val = int32(-1)]; bool var_2860_interleave_0 = const()[name = string("op_2860_interleave_0"), val = bool(false)]; tensor var_2860_cast_fp16 = concat(axis = var_2859, interleave = var_2860_interleave_0, values = (var_2857_cast_fp16, var_2855_cast_fp16_0))[name = string("op_2860_cast_fp16")]; tensor var_2861_cast_fp16 = mul(x = var_2860_cast_fp16, y = sin_cast_fp16)[name = string("op_2861_cast_fp16")]; tensor key_states_39_cast_fp16 = add(x = var_2854_cast_fp16, y = var_2861_cast_fp16)[name = string("key_states_39_cast_fp16")]; tensor expand_dims_91 = const()[name = string("expand_dims_91"), val = tensor([9])]; tensor expand_dims_92 = const()[name = string("expand_dims_92"), val = tensor([0])]; tensor expand_dims_94 = const()[name = string("expand_dims_94"), val = tensor([10])]; tensor concat_75_values3_0 = const()[name = string("concat_75_values3_0"), val = tensor([0])]; int32 concat_75_axis_0 = const()[name = string("concat_75_axis_0"), val = int32(0)]; bool concat_75_interleave_0 = const()[name = string("concat_75_interleave_0"), val = bool(false)]; tensor concat_75 = concat(axis = concat_75_axis_0, interleave = concat_75_interleave_0, values = (expand_dims_91, expand_dims_92, position_id, concat_75_values3_0))[name = string("concat_75")]; tensor concat_76_values1_0 = const()[name = string("concat_76_values1_0"), val = tensor([0])]; tensor concat_76_values3_0 = const()[name = string("concat_76_values3_0"), val = tensor([0])]; int32 concat_76_axis_0 = const()[name = string("concat_76_axis_0"), val = int32(0)]; bool concat_76_interleave_0 = const()[name = string("concat_76_interleave_0"), val = bool(false)]; tensor concat_76 = concat(axis = concat_76_axis_0, interleave = concat_76_interleave_0, values = (expand_dims_94, concat_76_values1_0, var_426, concat_76_values3_0))[name = string("concat_76")]; tensor key_cache_internal_tensor_assign_10_stride_0 = const()[name = string("key_cache_internal_tensor_assign_10_stride_0"), val = tensor([1, 1, 1, 1])]; tensor key_cache_internal_tensor_assign_10_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_10_begin_mask_0"), val = tensor([false, false, false, true])]; tensor key_cache_internal_tensor_assign_10_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_10_end_mask_0"), val = tensor([false, true, false, true])]; tensor key_cache_internal_tensor_assign_10_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_10_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor key_cache_internal_tensor_assign_10_cast_fp16 = slice_update(begin = concat_75, begin_mask = key_cache_internal_tensor_assign_10_begin_mask_0, end = concat_76, end_mask = key_cache_internal_tensor_assign_10_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_10_squeeze_mask_0, stride = key_cache_internal_tensor_assign_10_stride_0, update = key_states_39_cast_fp16, x = coreml_update_state_64)[name = string("key_cache_internal_tensor_assign_10_cast_fp16")]; write_state(data = key_cache_internal_tensor_assign_10_cast_fp16, input = key_cache)[name = string("coreml_update_state_376_write_state")]; tensor coreml_update_state_66 = read_state(input = key_cache)[name = string("coreml_update_state_376")]; tensor value_cache_internal_tensor_assign_10_stride_0 = const()[name = string("value_cache_internal_tensor_assign_10_stride_0"), val = tensor([1, 1, 1, 1])]; tensor value_cache_internal_tensor_assign_10_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_10_begin_mask_0"), val = tensor([false, false, false, true])]; tensor value_cache_internal_tensor_assign_10_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_10_end_mask_0"), val = tensor([false, true, false, true])]; tensor value_cache_internal_tensor_assign_10_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_10_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor value_states_39_cast_fp16 = transpose(perm = value_states_39_perm_0, x = var_2840_cast_fp16)[name = string("transpose_43")]; tensor value_cache_internal_tensor_assign_10_cast_fp16 = slice_update(begin = concat_75, begin_mask = value_cache_internal_tensor_assign_10_begin_mask_0, end = concat_76, end_mask = value_cache_internal_tensor_assign_10_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_10_squeeze_mask_0, stride = value_cache_internal_tensor_assign_10_stride_0, update = value_states_39_cast_fp16, x = coreml_update_state_65)[name = string("value_cache_internal_tensor_assign_10_cast_fp16")]; write_state(data = value_cache_internal_tensor_assign_10_cast_fp16, input = value_cache)[name = string("coreml_update_state_377_write_state")]; tensor coreml_update_state_67 = read_state(input = value_cache)[name = string("coreml_update_state_377")]; tensor var_2904_begin_0 = const()[name = string("op_2904_begin_0"), val = tensor([9, 0, 0, 0])]; tensor var_2904_end_0 = const()[name = string("op_2904_end_0"), val = tensor([10, 2, 2048, 64])]; tensor var_2904_end_mask_0 = const()[name = string("op_2904_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_2904_cast_fp16 = slice_by_index(begin = var_2904_begin_0, end = var_2904_end_0, end_mask = var_2904_end_mask_0, x = coreml_update_state_66)[name = string("op_2904_cast_fp16")]; tensor tile_18 = const()[name = string("tile_18"), val = tensor([1, 1])]; int32 var_2907_axis_0 = const()[name = string("op_2907_axis_0"), val = int32(1)]; tensor var_2907_cast_fp16_0, tensor var_2907_cast_fp16_1 = split(axis = var_2907_axis_0, split_sizes = tile_18, x = var_2904_cast_fp16)[name = string("op_2907_cast_fp16")]; tensor var_2914_begin_0 = const()[name = string("op_2914_begin_0"), val = tensor([9, 0, 0, 0])]; tensor var_2914_end_0 = const()[name = string("op_2914_end_0"), val = tensor([10, 2, 2048, 64])]; tensor var_2914_end_mask_0 = const()[name = string("op_2914_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_2914_cast_fp16 = slice_by_index(begin = var_2914_begin_0, end = var_2914_end_0, end_mask = var_2914_end_mask_0, x = coreml_update_state_67)[name = string("op_2914_cast_fp16")]; tensor tile_19 = const()[name = string("tile_19"), val = tensor([1, 1])]; int32 var_2917_axis_0 = const()[name = string("op_2917_axis_0"), val = int32(1)]; tensor var_2917_cast_fp16_0, tensor var_2917_cast_fp16_1 = split(axis = var_2917_axis_0, split_sizes = tile_19, x = var_2914_cast_fp16)[name = string("op_2917_cast_fp16")]; tensor var_2920_split_sizes_0 = const()[name = string("op_2920_split_sizes_0"), val = tensor([8, 8])]; int32 var_2920_axis_0 = const()[name = string("op_2920_axis_0"), val = int32(1)]; tensor var_2920_cast_fp16_0, tensor var_2920_cast_fp16_1 = split(axis = var_2920_axis_0, split_sizes = var_2920_split_sizes_0, x = query_states_39_cast_fp16)[name = string("op_2920_cast_fp16")]; bool attn_weights_145_transpose_x_0 = const()[name = string("attn_weights_145_transpose_x_0"), val = bool(false)]; bool attn_weights_145_transpose_y_0 = const()[name = string("attn_weights_145_transpose_y_0"), val = bool(false)]; tensor attn_weights_145_cast_fp16 = matmul(transpose_x = attn_weights_145_transpose_x_0, transpose_y = attn_weights_145_transpose_y_0, x = var_2907_cast_fp16_0, y = var_2920_cast_fp16_0)[name = string("attn_weights_145_cast_fp16")]; fp16 _inversed_attn_weights_147_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_147_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_147_cast_fp16 = mul(x = attn_weights_145_cast_fp16, y = _inversed_attn_weights_147_y_0_to_fp16)[name = string("_inversed_attn_weights_147_cast_fp16")]; tensor attn_weights_149_cast_fp16 = add(x = _inversed_attn_weights_147_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_149_cast_fp16")]; int32 var_2927 = const()[name = string("op_2927"), val = int32(2)]; tensor attn_weights_151_cast_fp16 = softmax(axis = var_2927, x = attn_weights_149_cast_fp16)[name = string("attn_weights_151_cast_fp16")]; bool var_2933_transpose_x_1 = const()[name = string("op_2933_transpose_x_1"), val = bool(true)]; bool var_2933_transpose_y_1 = const()[name = string("op_2933_transpose_y_1"), val = bool(false)]; tensor var_2933_cast_fp16 = matmul(transpose_x = var_2933_transpose_x_1, transpose_y = var_2933_transpose_y_1, x = attn_weights_151_cast_fp16, y = var_2917_cast_fp16_0)[name = string("op_2933_cast_fp16")]; bool attn_weights_153_transpose_x_0 = const()[name = string("attn_weights_153_transpose_x_0"), val = bool(false)]; bool attn_weights_153_transpose_y_0 = const()[name = string("attn_weights_153_transpose_y_0"), val = bool(false)]; tensor attn_weights_153_cast_fp16 = matmul(transpose_x = attn_weights_153_transpose_x_0, transpose_y = attn_weights_153_transpose_y_0, x = var_2907_cast_fp16_1, y = var_2920_cast_fp16_1)[name = string("attn_weights_153_cast_fp16")]; fp16 _inversed_attn_weights_155_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_155_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_155_cast_fp16 = mul(x = attn_weights_153_cast_fp16, y = _inversed_attn_weights_155_y_0_to_fp16)[name = string("_inversed_attn_weights_155_cast_fp16")]; tensor attn_weights_157_cast_fp16 = add(x = _inversed_attn_weights_155_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_157_cast_fp16")]; int32 var_2939 = const()[name = string("op_2939"), val = int32(2)]; tensor attn_weights_159_cast_fp16 = softmax(axis = var_2939, x = attn_weights_157_cast_fp16)[name = string("attn_weights_159_cast_fp16")]; bool attn_output_55_transpose_x_1 = const()[name = string("attn_output_55_transpose_x_1"), val = bool(true)]; bool attn_output_55_transpose_y_1 = const()[name = string("attn_output_55_transpose_y_1"), val = bool(false)]; tensor attn_output_55_cast_fp16 = matmul(transpose_x = attn_output_55_transpose_x_1, transpose_y = attn_output_55_transpose_y_1, x = attn_weights_159_cast_fp16, y = var_2917_cast_fp16_1)[name = string("attn_output_55_cast_fp16")]; int32 var_2947 = const()[name = string("op_2947"), val = int32(1)]; bool attn_output_57_interleave_0 = const()[name = string("attn_output_57_interleave_0"), val = bool(false)]; tensor attn_output_57_cast_fp16 = concat(axis = var_2947, interleave = attn_output_57_interleave_0, values = (var_2933_cast_fp16, attn_output_55_cast_fp16))[name = string("attn_output_57_cast_fp16")]; tensor var_2951_perm_0 = const()[name = string("op_2951_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_2956 = const()[name = string("op_2956"), val = tensor([1, 1024, 1, 64])]; tensor var_2951_cast_fp16 = transpose(perm = var_2951_perm_0, x = attn_output_57_cast_fp16)[name = string("transpose_42")]; tensor x_167_cast_fp16 = reshape(shape = var_2956, x = var_2951_cast_fp16)[name = string("x_167_cast_fp16")]; string hidden_states_57_pad_type_0 = const()[name = string("hidden_states_57_pad_type_0"), val = string("valid")]; tensor hidden_states_57_strides_0 = const()[name = string("hidden_states_57_strides_0"), val = tensor([1, 1])]; tensor hidden_states_57_pad_0 = const()[name = string("hidden_states_57_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_57_dilations_0 = const()[name = string("hidden_states_57_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_57_groups_0 = const()[name = string("hidden_states_57_groups_0"), val = int32(1)]; tensor var_2963_to_fp16 = const()[name = string("op_2963_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(280065664)))]; tensor hidden_states_57_cast_fp16 = conv(dilations = hidden_states_57_dilations_0, groups = hidden_states_57_groups_0, pad = hidden_states_57_pad_0, pad_type = hidden_states_57_pad_type_0, strides = hidden_states_57_strides_0, weight = var_2963_to_fp16, x = x_167_cast_fp16)[name = string("hidden_states_57_cast_fp16")]; tensor x_169_cast_fp16 = add(x = x_161_cast_fp16, y = hidden_states_57_cast_fp16)[name = string("x_169_cast_fp16")]; int32 var_2975 = const()[name = string("op_2975"), val = int32(1)]; fp16 const_103_promoted_to_fp16 = const()[name = string("const_103_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_2978_cast_fp16 = mul(x = x_169_cast_fp16, y = const_103_promoted_to_fp16)[name = string("op_2978_cast_fp16")]; bool x_171_interleave_0 = const()[name = string("x_171_interleave_0"), val = bool(false)]; tensor x_171_cast_fp16 = concat(axis = var_2975, interleave = x_171_interleave_0, values = (x_169_cast_fp16, var_2978_cast_fp16))[name = string("x_171_cast_fp16")]; tensor out_115_axes_0 = const()[name = string("out_115_axes_0"), val = tensor([1])]; fp16 var_2988_to_fp16 = const()[name = string("op_2988_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_115_cast_fp16 = layer_norm(axes = out_115_axes_0, epsilon = var_2988_to_fp16, x = x_171_cast_fp16)[name = string("out_115_cast_fp16")]; tensor layer_layers_9_post_attention_layernorm_weight_to_fp16 = const()[name = string("layer_layers_9_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(282162880)))]; tensor out_117_cast_fp16 = mul(x = out_115_cast_fp16, y = layer_layers_9_post_attention_layernorm_weight_to_fp16)[name = string("out_117_cast_fp16")]; tensor var_2994_split_sizes_0 = const()[name = string("op_2994_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_2994_axis_0 = const()[name = string("op_2994_axis_0"), val = int32(1)]; tensor var_2994_cast_fp16_0, tensor var_2994_cast_fp16_1 = split(axis = var_2994_axis_0, split_sizes = var_2994_split_sizes_0, x = out_117_cast_fp16)[name = string("op_2994_cast_fp16")]; string input_19_pad_type_0 = const()[name = string("input_19_pad_type_0"), val = string("valid")]; tensor input_19_strides_0 = const()[name = string("input_19_strides_0"), val = tensor([1, 1])]; tensor input_19_pad_0 = const()[name = string("input_19_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_19_dilations_0 = const()[name = string("input_19_dilations_0"), val = tensor([1, 1])]; int32 input_19_groups_0 = const()[name = string("input_19_groups_0"), val = int32(1)]; tensor var_2999_to_fp16 = const()[name = string("op_2999_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(282167040)))]; tensor input_19_cast_fp16 = conv(dilations = input_19_dilations_0, groups = input_19_groups_0, pad = input_19_pad_0, pad_type = input_19_pad_type_0, strides = input_19_strides_0, weight = var_2999_to_fp16, x = var_2994_cast_fp16_0)[name = string("input_19_cast_fp16")]; tensor var_3010_cast_fp16 = silu(x = input_19_cast_fp16)[name = string("op_3010_cast_fp16")]; string var_3015_pad_type_0 = const()[name = string("op_3015_pad_type_0"), val = string("valid")]; tensor var_3015_strides_0 = const()[name = string("op_3015_strides_0"), val = tensor([1, 1])]; tensor var_3015_pad_0 = const()[name = string("op_3015_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_3015_dilations_0 = const()[name = string("op_3015_dilations_0"), val = tensor([1, 1])]; int32 var_3015_groups_0 = const()[name = string("op_3015_groups_0"), val = int32(1)]; tensor var_2998_to_fp16 = const()[name = string("op_2998_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(290555712)))]; tensor var_3015_cast_fp16 = conv(dilations = var_3015_dilations_0, groups = var_3015_groups_0, pad = var_3015_pad_0, pad_type = var_3015_pad_type_0, strides = var_3015_strides_0, weight = var_2998_to_fp16, x = var_2994_cast_fp16_0)[name = string("op_3015_cast_fp16")]; tensor x_177_cast_fp16 = mul(x = var_3010_cast_fp16, y = var_3015_cast_fp16)[name = string("x_177_cast_fp16")]; string hidden_states_59_pad_type_0 = const()[name = string("hidden_states_59_pad_type_0"), val = string("valid")]; tensor hidden_states_59_strides_0 = const()[name = string("hidden_states_59_strides_0"), val = tensor([1, 1])]; tensor hidden_states_59_pad_0 = const()[name = string("hidden_states_59_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_59_dilations_0 = const()[name = string("hidden_states_59_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_59_groups_0 = const()[name = string("hidden_states_59_groups_0"), val = int32(1)]; tensor var_2997_to_fp16 = const()[name = string("op_2997_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(298944384)))]; tensor hidden_states_59_cast_fp16 = conv(dilations = hidden_states_59_dilations_0, groups = hidden_states_59_groups_0, pad = hidden_states_59_pad_0, pad_type = hidden_states_59_pad_type_0, strides = hidden_states_59_strides_0, weight = var_2997_to_fp16, x = x_177_cast_fp16)[name = string("hidden_states_59_cast_fp16")]; tensor x_179_cast_fp16 = add(x = x_169_cast_fp16, y = hidden_states_59_cast_fp16)[name = string("x_179_cast_fp16")]; int32 var_3028 = const()[name = string("op_3028"), val = int32(1)]; fp16 const_104_promoted_to_fp16 = const()[name = string("const_104_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_3031_cast_fp16 = mul(x = x_179_cast_fp16, y = const_104_promoted_to_fp16)[name = string("op_3031_cast_fp16")]; bool x_181_interleave_0 = const()[name = string("x_181_interleave_0"), val = bool(false)]; tensor x_181_cast_fp16 = concat(axis = var_3028, interleave = x_181_interleave_0, values = (x_179_cast_fp16, var_3031_cast_fp16))[name = string("x_181_cast_fp16")]; tensor out_121_axes_0 = const()[name = string("out_121_axes_0"), val = tensor([1])]; fp16 var_3041_to_fp16 = const()[name = string("op_3041_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_121_cast_fp16 = layer_norm(axes = out_121_axes_0, epsilon = var_3041_to_fp16, x = x_181_cast_fp16)[name = string("out_121_cast_fp16")]; tensor layer_layers_10_input_layernorm_weight_to_fp16 = const()[name = string("layer_layers_10_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(307333056)))]; tensor out_123_cast_fp16 = mul(x = out_121_cast_fp16, y = layer_layers_10_input_layernorm_weight_to_fp16)[name = string("out_123_cast_fp16")]; tensor var_3047_split_sizes_0 = const()[name = string("op_3047_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_3047_axis_0 = const()[name = string("op_3047_axis_0"), val = int32(1)]; tensor var_3047_cast_fp16_0, tensor var_3047_cast_fp16_1 = split(axis = var_3047_axis_0, split_sizes = var_3047_split_sizes_0, x = out_123_cast_fp16)[name = string("op_3047_cast_fp16")]; string query_states_41_pad_type_0 = const()[name = string("query_states_41_pad_type_0"), val = string("valid")]; tensor query_states_41_strides_0 = const()[name = string("query_states_41_strides_0"), val = tensor([1, 1])]; tensor query_states_41_pad_0 = const()[name = string("query_states_41_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_states_41_dilations_0 = const()[name = string("query_states_41_dilations_0"), val = tensor([1, 1])]; int32 query_states_41_groups_0 = const()[name = string("query_states_41_groups_0"), val = int32(1)]; tensor var_3069_to_fp16 = const()[name = string("op_3069_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(307337216)))]; tensor query_states_41_cast_fp16 = conv(dilations = query_states_41_dilations_0, groups = query_states_41_groups_0, pad = query_states_41_pad_0, pad_type = query_states_41_pad_type_0, strides = query_states_41_strides_0, weight = var_3069_to_fp16, x = var_3047_cast_fp16_0)[name = string("query_states_41_cast_fp16")]; string key_states_41_pad_type_0 = const()[name = string("key_states_41_pad_type_0"), val = string("valid")]; tensor key_states_41_strides_0 = const()[name = string("key_states_41_strides_0"), val = tensor([1, 1])]; tensor key_states_41_pad_0 = const()[name = string("key_states_41_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_states_41_dilations_0 = const()[name = string("key_states_41_dilations_0"), val = tensor([1, 1])]; int32 key_states_41_groups_0 = const()[name = string("key_states_41_groups_0"), val = int32(1)]; tensor var_3080_to_fp16 = const()[name = string("op_3080_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(309434432)))]; tensor key_states_41_cast_fp16 = conv(dilations = key_states_41_dilations_0, groups = key_states_41_groups_0, pad = key_states_41_pad_0, pad_type = key_states_41_pad_type_0, strides = key_states_41_strides_0, weight = var_3080_to_fp16, x = var_3047_cast_fp16_0)[name = string("key_states_41_cast_fp16")]; string value_states_41_pad_type_0 = const()[name = string("value_states_41_pad_type_0"), val = string("valid")]; tensor value_states_41_strides_0 = const()[name = string("value_states_41_strides_0"), val = tensor([1, 1])]; tensor value_states_41_pad_0 = const()[name = string("value_states_41_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_states_41_dilations_0 = const()[name = string("value_states_41_dilations_0"), val = tensor([1, 1])]; int32 value_states_41_groups_0 = const()[name = string("value_states_41_groups_0"), val = int32(1)]; tensor var_3091_to_fp16 = const()[name = string("op_3091_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(309696640)))]; tensor value_states_41_cast_fp16 = conv(dilations = value_states_41_dilations_0, groups = value_states_41_groups_0, pad = value_states_41_pad_0, pad_type = value_states_41_pad_type_0, strides = value_states_41_strides_0, weight = var_3091_to_fp16, x = var_3047_cast_fp16_0)[name = string("value_states_41_cast_fp16")]; tensor var_3099 = const()[name = string("op_3099"), val = tensor([1, 16, 64, 64])]; tensor embed_41_cast_fp16 = reshape(shape = var_3099, x = query_states_41_cast_fp16)[name = string("embed_41_cast_fp16")]; tensor var_3103 = const()[name = string("op_3103"), val = tensor([1, 2, 64, 64])]; tensor var_3104_cast_fp16 = reshape(shape = var_3103, x = key_states_41_cast_fp16)[name = string("op_3104_cast_fp16")]; tensor embed_43_perm_0 = const()[name = string("embed_43_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_3110 = const()[name = string("op_3110"), val = tensor([1, 2, 64, 64])]; tensor var_3111_cast_fp16 = reshape(shape = var_3110, x = value_states_41_cast_fp16)[name = string("op_3111_cast_fp16")]; tensor value_states_43_perm_0 = const()[name = string("value_states_43_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_3115_cast_fp16 = mul(x = embed_41_cast_fp16, y = cos_1_cast_fp16)[name = string("op_3115_cast_fp16")]; tensor var_3116_split_sizes_0 = const()[name = string("op_3116_split_sizes_0"), val = tensor([32, 32])]; int32 var_3116_axis_0 = const()[name = string("op_3116_axis_0"), val = int32(-2)]; tensor var_3116_cast_fp16_0, tensor var_3116_cast_fp16_1 = split(axis = var_3116_axis_0, split_sizes = var_3116_split_sizes_0, x = embed_41_cast_fp16)[name = string("op_3116_cast_fp16")]; fp16 const_107_promoted_to_fp16 = const()[name = string("const_107_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_3118_cast_fp16 = mul(x = var_3116_cast_fp16_1, y = const_107_promoted_to_fp16)[name = string("op_3118_cast_fp16")]; int32 var_3120 = const()[name = string("op_3120"), val = int32(-2)]; bool var_3121_interleave_0 = const()[name = string("op_3121_interleave_0"), val = bool(false)]; tensor var_3121_cast_fp16 = concat(axis = var_3120, interleave = var_3121_interleave_0, values = (var_3118_cast_fp16, var_3116_cast_fp16_0))[name = string("op_3121_cast_fp16")]; tensor var_3122_cast_fp16 = mul(x = var_3121_cast_fp16, y = sin_1_cast_fp16)[name = string("op_3122_cast_fp16")]; tensor query_states_43_cast_fp16 = add(x = var_3115_cast_fp16, y = var_3122_cast_fp16)[name = string("query_states_43_cast_fp16")]; tensor embed_43_cast_fp16 = transpose(perm = embed_43_perm_0, x = var_3104_cast_fp16)[name = string("transpose_41")]; tensor var_3125_cast_fp16 = mul(x = embed_43_cast_fp16, y = cos_cast_fp16)[name = string("op_3125_cast_fp16")]; tensor var_3126_split_sizes_0 = const()[name = string("op_3126_split_sizes_0"), val = tensor([32, 32])]; int32 var_3126_axis_0 = const()[name = string("op_3126_axis_0"), val = int32(-1)]; tensor var_3126_cast_fp16_0, tensor var_3126_cast_fp16_1 = split(axis = var_3126_axis_0, split_sizes = var_3126_split_sizes_0, x = embed_43_cast_fp16)[name = string("op_3126_cast_fp16")]; fp16 const_108_promoted_to_fp16 = const()[name = string("const_108_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_3128_cast_fp16 = mul(x = var_3126_cast_fp16_1, y = const_108_promoted_to_fp16)[name = string("op_3128_cast_fp16")]; int32 var_3130 = const()[name = string("op_3130"), val = int32(-1)]; bool var_3131_interleave_0 = const()[name = string("op_3131_interleave_0"), val = bool(false)]; tensor var_3131_cast_fp16 = concat(axis = var_3130, interleave = var_3131_interleave_0, values = (var_3128_cast_fp16, var_3126_cast_fp16_0))[name = string("op_3131_cast_fp16")]; tensor var_3132_cast_fp16 = mul(x = var_3131_cast_fp16, y = sin_cast_fp16)[name = string("op_3132_cast_fp16")]; tensor key_states_43_cast_fp16 = add(x = var_3125_cast_fp16, y = var_3132_cast_fp16)[name = string("key_states_43_cast_fp16")]; tensor expand_dims_101 = const()[name = string("expand_dims_101"), val = tensor([10])]; tensor expand_dims_102 = const()[name = string("expand_dims_102"), val = tensor([0])]; tensor expand_dims_104 = const()[name = string("expand_dims_104"), val = tensor([11])]; tensor concat_83_values3_0 = const()[name = string("concat_83_values3_0"), val = tensor([0])]; int32 concat_83_axis_0 = const()[name = string("concat_83_axis_0"), val = int32(0)]; bool concat_83_interleave_0 = const()[name = string("concat_83_interleave_0"), val = bool(false)]; tensor concat_83 = concat(axis = concat_83_axis_0, interleave = concat_83_interleave_0, values = (expand_dims_101, expand_dims_102, position_id, concat_83_values3_0))[name = string("concat_83")]; tensor concat_84_values1_0 = const()[name = string("concat_84_values1_0"), val = tensor([0])]; tensor concat_84_values3_0 = const()[name = string("concat_84_values3_0"), val = tensor([0])]; int32 concat_84_axis_0 = const()[name = string("concat_84_axis_0"), val = int32(0)]; bool concat_84_interleave_0 = const()[name = string("concat_84_interleave_0"), val = bool(false)]; tensor concat_84 = concat(axis = concat_84_axis_0, interleave = concat_84_interleave_0, values = (expand_dims_104, concat_84_values1_0, var_426, concat_84_values3_0))[name = string("concat_84")]; tensor key_cache_internal_tensor_assign_11_stride_0 = const()[name = string("key_cache_internal_tensor_assign_11_stride_0"), val = tensor([1, 1, 1, 1])]; tensor key_cache_internal_tensor_assign_11_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_11_begin_mask_0"), val = tensor([false, false, false, true])]; tensor key_cache_internal_tensor_assign_11_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_11_end_mask_0"), val = tensor([false, true, false, true])]; tensor key_cache_internal_tensor_assign_11_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_11_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor key_cache_internal_tensor_assign_11_cast_fp16 = slice_update(begin = concat_83, begin_mask = key_cache_internal_tensor_assign_11_begin_mask_0, end = concat_84, end_mask = key_cache_internal_tensor_assign_11_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_11_squeeze_mask_0, stride = key_cache_internal_tensor_assign_11_stride_0, update = key_states_43_cast_fp16, x = coreml_update_state_66)[name = string("key_cache_internal_tensor_assign_11_cast_fp16")]; write_state(data = key_cache_internal_tensor_assign_11_cast_fp16, input = key_cache)[name = string("coreml_update_state_378_write_state")]; tensor coreml_update_state_68 = read_state(input = key_cache)[name = string("coreml_update_state_378")]; tensor value_cache_internal_tensor_assign_11_stride_0 = const()[name = string("value_cache_internal_tensor_assign_11_stride_0"), val = tensor([1, 1, 1, 1])]; tensor value_cache_internal_tensor_assign_11_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_11_begin_mask_0"), val = tensor([false, false, false, true])]; tensor value_cache_internal_tensor_assign_11_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_11_end_mask_0"), val = tensor([false, true, false, true])]; tensor value_cache_internal_tensor_assign_11_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_11_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor value_states_43_cast_fp16 = transpose(perm = value_states_43_perm_0, x = var_3111_cast_fp16)[name = string("transpose_40")]; tensor value_cache_internal_tensor_assign_11_cast_fp16 = slice_update(begin = concat_83, begin_mask = value_cache_internal_tensor_assign_11_begin_mask_0, end = concat_84, end_mask = value_cache_internal_tensor_assign_11_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_11_squeeze_mask_0, stride = value_cache_internal_tensor_assign_11_stride_0, update = value_states_43_cast_fp16, x = coreml_update_state_67)[name = string("value_cache_internal_tensor_assign_11_cast_fp16")]; write_state(data = value_cache_internal_tensor_assign_11_cast_fp16, input = value_cache)[name = string("coreml_update_state_379_write_state")]; tensor coreml_update_state_69 = read_state(input = value_cache)[name = string("coreml_update_state_379")]; tensor var_3175_begin_0 = const()[name = string("op_3175_begin_0"), val = tensor([10, 0, 0, 0])]; tensor var_3175_end_0 = const()[name = string("op_3175_end_0"), val = tensor([11, 2, 2048, 64])]; tensor var_3175_end_mask_0 = const()[name = string("op_3175_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_3175_cast_fp16 = slice_by_index(begin = var_3175_begin_0, end = var_3175_end_0, end_mask = var_3175_end_mask_0, x = coreml_update_state_68)[name = string("op_3175_cast_fp16")]; tensor tile_20 = const()[name = string("tile_20"), val = tensor([1, 1])]; int32 var_3178_axis_0 = const()[name = string("op_3178_axis_0"), val = int32(1)]; tensor var_3178_cast_fp16_0, tensor var_3178_cast_fp16_1 = split(axis = var_3178_axis_0, split_sizes = tile_20, x = var_3175_cast_fp16)[name = string("op_3178_cast_fp16")]; tensor var_3185_begin_0 = const()[name = string("op_3185_begin_0"), val = tensor([10, 0, 0, 0])]; tensor var_3185_end_0 = const()[name = string("op_3185_end_0"), val = tensor([11, 2, 2048, 64])]; tensor var_3185_end_mask_0 = const()[name = string("op_3185_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_3185_cast_fp16 = slice_by_index(begin = var_3185_begin_0, end = var_3185_end_0, end_mask = var_3185_end_mask_0, x = coreml_update_state_69)[name = string("op_3185_cast_fp16")]; tensor tile_21 = const()[name = string("tile_21"), val = tensor([1, 1])]; int32 var_3188_axis_0 = const()[name = string("op_3188_axis_0"), val = int32(1)]; tensor var_3188_cast_fp16_0, tensor var_3188_cast_fp16_1 = split(axis = var_3188_axis_0, split_sizes = tile_21, x = var_3185_cast_fp16)[name = string("op_3188_cast_fp16")]; tensor var_3191_split_sizes_0 = const()[name = string("op_3191_split_sizes_0"), val = tensor([8, 8])]; int32 var_3191_axis_0 = const()[name = string("op_3191_axis_0"), val = int32(1)]; tensor var_3191_cast_fp16_0, tensor var_3191_cast_fp16_1 = split(axis = var_3191_axis_0, split_sizes = var_3191_split_sizes_0, x = query_states_43_cast_fp16)[name = string("op_3191_cast_fp16")]; bool attn_weights_161_transpose_x_0 = const()[name = string("attn_weights_161_transpose_x_0"), val = bool(false)]; bool attn_weights_161_transpose_y_0 = const()[name = string("attn_weights_161_transpose_y_0"), val = bool(false)]; tensor attn_weights_161_cast_fp16 = matmul(transpose_x = attn_weights_161_transpose_x_0, transpose_y = attn_weights_161_transpose_y_0, x = var_3178_cast_fp16_0, y = var_3191_cast_fp16_0)[name = string("attn_weights_161_cast_fp16")]; fp16 _inversed_attn_weights_163_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_163_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_163_cast_fp16 = mul(x = attn_weights_161_cast_fp16, y = _inversed_attn_weights_163_y_0_to_fp16)[name = string("_inversed_attn_weights_163_cast_fp16")]; tensor attn_weights_165_cast_fp16 = add(x = _inversed_attn_weights_163_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_165_cast_fp16")]; int32 var_3198 = const()[name = string("op_3198"), val = int32(2)]; tensor attn_weights_167_cast_fp16 = softmax(axis = var_3198, x = attn_weights_165_cast_fp16)[name = string("attn_weights_167_cast_fp16")]; bool var_3204_transpose_x_1 = const()[name = string("op_3204_transpose_x_1"), val = bool(true)]; bool var_3204_transpose_y_1 = const()[name = string("op_3204_transpose_y_1"), val = bool(false)]; tensor var_3204_cast_fp16 = matmul(transpose_x = var_3204_transpose_x_1, transpose_y = var_3204_transpose_y_1, x = attn_weights_167_cast_fp16, y = var_3188_cast_fp16_0)[name = string("op_3204_cast_fp16")]; bool attn_weights_169_transpose_x_0 = const()[name = string("attn_weights_169_transpose_x_0"), val = bool(false)]; bool attn_weights_169_transpose_y_0 = const()[name = string("attn_weights_169_transpose_y_0"), val = bool(false)]; tensor attn_weights_169_cast_fp16 = matmul(transpose_x = attn_weights_169_transpose_x_0, transpose_y = attn_weights_169_transpose_y_0, x = var_3178_cast_fp16_1, y = var_3191_cast_fp16_1)[name = string("attn_weights_169_cast_fp16")]; fp16 _inversed_attn_weights_171_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_171_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_171_cast_fp16 = mul(x = attn_weights_169_cast_fp16, y = _inversed_attn_weights_171_y_0_to_fp16)[name = string("_inversed_attn_weights_171_cast_fp16")]; tensor attn_weights_173_cast_fp16 = add(x = _inversed_attn_weights_171_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_173_cast_fp16")]; int32 var_3210 = const()[name = string("op_3210"), val = int32(2)]; tensor attn_weights_175_cast_fp16 = softmax(axis = var_3210, x = attn_weights_173_cast_fp16)[name = string("attn_weights_175_cast_fp16")]; bool attn_output_61_transpose_x_1 = const()[name = string("attn_output_61_transpose_x_1"), val = bool(true)]; bool attn_output_61_transpose_y_1 = const()[name = string("attn_output_61_transpose_y_1"), val = bool(false)]; tensor attn_output_61_cast_fp16 = matmul(transpose_x = attn_output_61_transpose_x_1, transpose_y = attn_output_61_transpose_y_1, x = attn_weights_175_cast_fp16, y = var_3188_cast_fp16_1)[name = string("attn_output_61_cast_fp16")]; int32 var_3218 = const()[name = string("op_3218"), val = int32(1)]; bool attn_output_63_interleave_0 = const()[name = string("attn_output_63_interleave_0"), val = bool(false)]; tensor attn_output_63_cast_fp16 = concat(axis = var_3218, interleave = attn_output_63_interleave_0, values = (var_3204_cast_fp16, attn_output_61_cast_fp16))[name = string("attn_output_63_cast_fp16")]; tensor var_3222_perm_0 = const()[name = string("op_3222_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_3227 = const()[name = string("op_3227"), val = tensor([1, 1024, 1, 64])]; tensor var_3222_cast_fp16 = transpose(perm = var_3222_perm_0, x = attn_output_63_cast_fp16)[name = string("transpose_39")]; tensor x_185_cast_fp16 = reshape(shape = var_3227, x = var_3222_cast_fp16)[name = string("x_185_cast_fp16")]; string hidden_states_63_pad_type_0 = const()[name = string("hidden_states_63_pad_type_0"), val = string("valid")]; tensor hidden_states_63_strides_0 = const()[name = string("hidden_states_63_strides_0"), val = tensor([1, 1])]; tensor hidden_states_63_pad_0 = const()[name = string("hidden_states_63_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_63_dilations_0 = const()[name = string("hidden_states_63_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_63_groups_0 = const()[name = string("hidden_states_63_groups_0"), val = int32(1)]; tensor var_3234_to_fp16 = const()[name = string("op_3234_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(309958848)))]; tensor hidden_states_63_cast_fp16 = conv(dilations = hidden_states_63_dilations_0, groups = hidden_states_63_groups_0, pad = hidden_states_63_pad_0, pad_type = hidden_states_63_pad_type_0, strides = hidden_states_63_strides_0, weight = var_3234_to_fp16, x = x_185_cast_fp16)[name = string("hidden_states_63_cast_fp16")]; tensor x_187_cast_fp16 = add(x = x_179_cast_fp16, y = hidden_states_63_cast_fp16)[name = string("x_187_cast_fp16")]; int32 var_3246 = const()[name = string("op_3246"), val = int32(1)]; fp16 const_113_promoted_to_fp16 = const()[name = string("const_113_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_3249_cast_fp16 = mul(x = x_187_cast_fp16, y = const_113_promoted_to_fp16)[name = string("op_3249_cast_fp16")]; bool x_189_interleave_0 = const()[name = string("x_189_interleave_0"), val = bool(false)]; tensor x_189_cast_fp16 = concat(axis = var_3246, interleave = x_189_interleave_0, values = (x_187_cast_fp16, var_3249_cast_fp16))[name = string("x_189_cast_fp16")]; tensor out_127_axes_0 = const()[name = string("out_127_axes_0"), val = tensor([1])]; fp16 var_3259_to_fp16 = const()[name = string("op_3259_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_127_cast_fp16 = layer_norm(axes = out_127_axes_0, epsilon = var_3259_to_fp16, x = x_189_cast_fp16)[name = string("out_127_cast_fp16")]; tensor layer_layers_10_post_attention_layernorm_weight_to_fp16 = const()[name = string("layer_layers_10_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(312056064)))]; tensor out_129_cast_fp16 = mul(x = out_127_cast_fp16, y = layer_layers_10_post_attention_layernorm_weight_to_fp16)[name = string("out_129_cast_fp16")]; tensor var_3265_split_sizes_0 = const()[name = string("op_3265_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_3265_axis_0 = const()[name = string("op_3265_axis_0"), val = int32(1)]; tensor var_3265_cast_fp16_0, tensor var_3265_cast_fp16_1 = split(axis = var_3265_axis_0, split_sizes = var_3265_split_sizes_0, x = out_129_cast_fp16)[name = string("op_3265_cast_fp16")]; string input_21_pad_type_0 = const()[name = string("input_21_pad_type_0"), val = string("valid")]; tensor input_21_strides_0 = const()[name = string("input_21_strides_0"), val = tensor([1, 1])]; tensor input_21_pad_0 = const()[name = string("input_21_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_21_dilations_0 = const()[name = string("input_21_dilations_0"), val = tensor([1, 1])]; int32 input_21_groups_0 = const()[name = string("input_21_groups_0"), val = int32(1)]; tensor var_3270_to_fp16 = const()[name = string("op_3270_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(312060224)))]; tensor input_21_cast_fp16 = conv(dilations = input_21_dilations_0, groups = input_21_groups_0, pad = input_21_pad_0, pad_type = input_21_pad_type_0, strides = input_21_strides_0, weight = var_3270_to_fp16, x = var_3265_cast_fp16_0)[name = string("input_21_cast_fp16")]; tensor var_3281_cast_fp16 = silu(x = input_21_cast_fp16)[name = string("op_3281_cast_fp16")]; string var_3286_pad_type_0 = const()[name = string("op_3286_pad_type_0"), val = string("valid")]; tensor var_3286_strides_0 = const()[name = string("op_3286_strides_0"), val = tensor([1, 1])]; tensor var_3286_pad_0 = const()[name = string("op_3286_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_3286_dilations_0 = const()[name = string("op_3286_dilations_0"), val = tensor([1, 1])]; int32 var_3286_groups_0 = const()[name = string("op_3286_groups_0"), val = int32(1)]; tensor var_3269_to_fp16 = const()[name = string("op_3269_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(320448896)))]; tensor var_3286_cast_fp16 = conv(dilations = var_3286_dilations_0, groups = var_3286_groups_0, pad = var_3286_pad_0, pad_type = var_3286_pad_type_0, strides = var_3286_strides_0, weight = var_3269_to_fp16, x = var_3265_cast_fp16_0)[name = string("op_3286_cast_fp16")]; tensor x_195_cast_fp16 = mul(x = var_3281_cast_fp16, y = var_3286_cast_fp16)[name = string("x_195_cast_fp16")]; string hidden_states_65_pad_type_0 = const()[name = string("hidden_states_65_pad_type_0"), val = string("valid")]; tensor hidden_states_65_strides_0 = const()[name = string("hidden_states_65_strides_0"), val = tensor([1, 1])]; tensor hidden_states_65_pad_0 = const()[name = string("hidden_states_65_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_65_dilations_0 = const()[name = string("hidden_states_65_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_65_groups_0 = const()[name = string("hidden_states_65_groups_0"), val = int32(1)]; tensor var_3268_to_fp16 = const()[name = string("op_3268_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(328837568)))]; tensor hidden_states_65_cast_fp16 = conv(dilations = hidden_states_65_dilations_0, groups = hidden_states_65_groups_0, pad = hidden_states_65_pad_0, pad_type = hidden_states_65_pad_type_0, strides = hidden_states_65_strides_0, weight = var_3268_to_fp16, x = x_195_cast_fp16)[name = string("hidden_states_65_cast_fp16")]; tensor x_197_cast_fp16 = add(x = x_187_cast_fp16, y = hidden_states_65_cast_fp16)[name = string("x_197_cast_fp16")]; int32 var_3299 = const()[name = string("op_3299"), val = int32(1)]; fp16 const_114_promoted_to_fp16 = const()[name = string("const_114_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_3302_cast_fp16 = mul(x = x_197_cast_fp16, y = const_114_promoted_to_fp16)[name = string("op_3302_cast_fp16")]; bool x_199_interleave_0 = const()[name = string("x_199_interleave_0"), val = bool(false)]; tensor x_199_cast_fp16 = concat(axis = var_3299, interleave = x_199_interleave_0, values = (x_197_cast_fp16, var_3302_cast_fp16))[name = string("x_199_cast_fp16")]; tensor out_133_axes_0 = const()[name = string("out_133_axes_0"), val = tensor([1])]; fp16 var_3312_to_fp16 = const()[name = string("op_3312_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_133_cast_fp16 = layer_norm(axes = out_133_axes_0, epsilon = var_3312_to_fp16, x = x_199_cast_fp16)[name = string("out_133_cast_fp16")]; tensor layer_layers_11_input_layernorm_weight_to_fp16 = const()[name = string("layer_layers_11_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(337226240)))]; tensor out_135_cast_fp16 = mul(x = out_133_cast_fp16, y = layer_layers_11_input_layernorm_weight_to_fp16)[name = string("out_135_cast_fp16")]; tensor var_3318_split_sizes_0 = const()[name = string("op_3318_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_3318_axis_0 = const()[name = string("op_3318_axis_0"), val = int32(1)]; tensor var_3318_cast_fp16_0, tensor var_3318_cast_fp16_1 = split(axis = var_3318_axis_0, split_sizes = var_3318_split_sizes_0, x = out_135_cast_fp16)[name = string("op_3318_cast_fp16")]; string query_states_45_pad_type_0 = const()[name = string("query_states_45_pad_type_0"), val = string("valid")]; tensor query_states_45_strides_0 = const()[name = string("query_states_45_strides_0"), val = tensor([1, 1])]; tensor query_states_45_pad_0 = const()[name = string("query_states_45_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_states_45_dilations_0 = const()[name = string("query_states_45_dilations_0"), val = tensor([1, 1])]; int32 query_states_45_groups_0 = const()[name = string("query_states_45_groups_0"), val = int32(1)]; tensor var_3340_to_fp16 = const()[name = string("op_3340_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(337230400)))]; tensor query_states_45_cast_fp16 = conv(dilations = query_states_45_dilations_0, groups = query_states_45_groups_0, pad = query_states_45_pad_0, pad_type = query_states_45_pad_type_0, strides = query_states_45_strides_0, weight = var_3340_to_fp16, x = var_3318_cast_fp16_0)[name = string("query_states_45_cast_fp16")]; string key_states_45_pad_type_0 = const()[name = string("key_states_45_pad_type_0"), val = string("valid")]; tensor key_states_45_strides_0 = const()[name = string("key_states_45_strides_0"), val = tensor([1, 1])]; tensor key_states_45_pad_0 = const()[name = string("key_states_45_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_states_45_dilations_0 = const()[name = string("key_states_45_dilations_0"), val = tensor([1, 1])]; int32 key_states_45_groups_0 = const()[name = string("key_states_45_groups_0"), val = int32(1)]; tensor var_3351_to_fp16 = const()[name = string("op_3351_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(339327616)))]; tensor key_states_45_cast_fp16 = conv(dilations = key_states_45_dilations_0, groups = key_states_45_groups_0, pad = key_states_45_pad_0, pad_type = key_states_45_pad_type_0, strides = key_states_45_strides_0, weight = var_3351_to_fp16, x = var_3318_cast_fp16_0)[name = string("key_states_45_cast_fp16")]; string value_states_45_pad_type_0 = const()[name = string("value_states_45_pad_type_0"), val = string("valid")]; tensor value_states_45_strides_0 = const()[name = string("value_states_45_strides_0"), val = tensor([1, 1])]; tensor value_states_45_pad_0 = const()[name = string("value_states_45_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_states_45_dilations_0 = const()[name = string("value_states_45_dilations_0"), val = tensor([1, 1])]; int32 value_states_45_groups_0 = const()[name = string("value_states_45_groups_0"), val = int32(1)]; tensor var_3362_to_fp16 = const()[name = string("op_3362_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(339589824)))]; tensor value_states_45_cast_fp16 = conv(dilations = value_states_45_dilations_0, groups = value_states_45_groups_0, pad = value_states_45_pad_0, pad_type = value_states_45_pad_type_0, strides = value_states_45_strides_0, weight = var_3362_to_fp16, x = var_3318_cast_fp16_0)[name = string("value_states_45_cast_fp16")]; tensor var_3370 = const()[name = string("op_3370"), val = tensor([1, 16, 64, 64])]; tensor embed_45_cast_fp16 = reshape(shape = var_3370, x = query_states_45_cast_fp16)[name = string("embed_45_cast_fp16")]; tensor var_3374 = const()[name = string("op_3374"), val = tensor([1, 2, 64, 64])]; tensor var_3375_cast_fp16 = reshape(shape = var_3374, x = key_states_45_cast_fp16)[name = string("op_3375_cast_fp16")]; tensor embed_47_perm_0 = const()[name = string("embed_47_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_3381 = const()[name = string("op_3381"), val = tensor([1, 2, 64, 64])]; tensor var_3382_cast_fp16 = reshape(shape = var_3381, x = value_states_45_cast_fp16)[name = string("op_3382_cast_fp16")]; tensor value_states_47_perm_0 = const()[name = string("value_states_47_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_3386_cast_fp16 = mul(x = embed_45_cast_fp16, y = cos_1_cast_fp16)[name = string("op_3386_cast_fp16")]; tensor var_3387_split_sizes_0 = const()[name = string("op_3387_split_sizes_0"), val = tensor([32, 32])]; int32 var_3387_axis_0 = const()[name = string("op_3387_axis_0"), val = int32(-2)]; tensor var_3387_cast_fp16_0, tensor var_3387_cast_fp16_1 = split(axis = var_3387_axis_0, split_sizes = var_3387_split_sizes_0, x = embed_45_cast_fp16)[name = string("op_3387_cast_fp16")]; fp16 const_117_promoted_to_fp16 = const()[name = string("const_117_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_3389_cast_fp16 = mul(x = var_3387_cast_fp16_1, y = const_117_promoted_to_fp16)[name = string("op_3389_cast_fp16")]; int32 var_3391 = const()[name = string("op_3391"), val = int32(-2)]; bool var_3392_interleave_0 = const()[name = string("op_3392_interleave_0"), val = bool(false)]; tensor var_3392_cast_fp16 = concat(axis = var_3391, interleave = var_3392_interleave_0, values = (var_3389_cast_fp16, var_3387_cast_fp16_0))[name = string("op_3392_cast_fp16")]; tensor var_3393_cast_fp16 = mul(x = var_3392_cast_fp16, y = sin_1_cast_fp16)[name = string("op_3393_cast_fp16")]; tensor query_states_47_cast_fp16 = add(x = var_3386_cast_fp16, y = var_3393_cast_fp16)[name = string("query_states_47_cast_fp16")]; tensor embed_47_cast_fp16 = transpose(perm = embed_47_perm_0, x = var_3375_cast_fp16)[name = string("transpose_38")]; tensor var_3396_cast_fp16 = mul(x = embed_47_cast_fp16, y = cos_cast_fp16)[name = string("op_3396_cast_fp16")]; tensor var_3397_split_sizes_0 = const()[name = string("op_3397_split_sizes_0"), val = tensor([32, 32])]; int32 var_3397_axis_0 = const()[name = string("op_3397_axis_0"), val = int32(-1)]; tensor var_3397_cast_fp16_0, tensor var_3397_cast_fp16_1 = split(axis = var_3397_axis_0, split_sizes = var_3397_split_sizes_0, x = embed_47_cast_fp16)[name = string("op_3397_cast_fp16")]; fp16 const_118_promoted_to_fp16 = const()[name = string("const_118_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_3399_cast_fp16 = mul(x = var_3397_cast_fp16_1, y = const_118_promoted_to_fp16)[name = string("op_3399_cast_fp16")]; int32 var_3401 = const()[name = string("op_3401"), val = int32(-1)]; bool var_3402_interleave_0 = const()[name = string("op_3402_interleave_0"), val = bool(false)]; tensor var_3402_cast_fp16 = concat(axis = var_3401, interleave = var_3402_interleave_0, values = (var_3399_cast_fp16, var_3397_cast_fp16_0))[name = string("op_3402_cast_fp16")]; tensor var_3403_cast_fp16 = mul(x = var_3402_cast_fp16, y = sin_cast_fp16)[name = string("op_3403_cast_fp16")]; tensor key_states_47_cast_fp16 = add(x = var_3396_cast_fp16, y = var_3403_cast_fp16)[name = string("key_states_47_cast_fp16")]; tensor expand_dims_111 = const()[name = string("expand_dims_111"), val = tensor([11])]; tensor expand_dims_112 = const()[name = string("expand_dims_112"), val = tensor([0])]; tensor expand_dims_114 = const()[name = string("expand_dims_114"), val = tensor([12])]; tensor concat_91_values3_0 = const()[name = string("concat_91_values3_0"), val = tensor([0])]; int32 concat_91_axis_0 = const()[name = string("concat_91_axis_0"), val = int32(0)]; bool concat_91_interleave_0 = const()[name = string("concat_91_interleave_0"), val = bool(false)]; tensor concat_91 = concat(axis = concat_91_axis_0, interleave = concat_91_interleave_0, values = (expand_dims_111, expand_dims_112, position_id, concat_91_values3_0))[name = string("concat_91")]; tensor concat_92_values1_0 = const()[name = string("concat_92_values1_0"), val = tensor([0])]; tensor concat_92_values3_0 = const()[name = string("concat_92_values3_0"), val = tensor([0])]; int32 concat_92_axis_0 = const()[name = string("concat_92_axis_0"), val = int32(0)]; bool concat_92_interleave_0 = const()[name = string("concat_92_interleave_0"), val = bool(false)]; tensor concat_92 = concat(axis = concat_92_axis_0, interleave = concat_92_interleave_0, values = (expand_dims_114, concat_92_values1_0, var_426, concat_92_values3_0))[name = string("concat_92")]; tensor key_cache_internal_tensor_assign_12_stride_0 = const()[name = string("key_cache_internal_tensor_assign_12_stride_0"), val = tensor([1, 1, 1, 1])]; tensor key_cache_internal_tensor_assign_12_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_12_begin_mask_0"), val = tensor([false, false, false, true])]; tensor key_cache_internal_tensor_assign_12_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_12_end_mask_0"), val = tensor([false, true, false, true])]; tensor key_cache_internal_tensor_assign_12_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_12_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor key_cache_internal_tensor_assign_12_cast_fp16 = slice_update(begin = concat_91, begin_mask = key_cache_internal_tensor_assign_12_begin_mask_0, end = concat_92, end_mask = key_cache_internal_tensor_assign_12_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_12_squeeze_mask_0, stride = key_cache_internal_tensor_assign_12_stride_0, update = key_states_47_cast_fp16, x = coreml_update_state_68)[name = string("key_cache_internal_tensor_assign_12_cast_fp16")]; write_state(data = key_cache_internal_tensor_assign_12_cast_fp16, input = key_cache)[name = string("coreml_update_state_380_write_state")]; tensor coreml_update_state_70 = read_state(input = key_cache)[name = string("coreml_update_state_380")]; tensor value_cache_internal_tensor_assign_12_stride_0 = const()[name = string("value_cache_internal_tensor_assign_12_stride_0"), val = tensor([1, 1, 1, 1])]; tensor value_cache_internal_tensor_assign_12_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_12_begin_mask_0"), val = tensor([false, false, false, true])]; tensor value_cache_internal_tensor_assign_12_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_12_end_mask_0"), val = tensor([false, true, false, true])]; tensor value_cache_internal_tensor_assign_12_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_12_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor value_states_47_cast_fp16 = transpose(perm = value_states_47_perm_0, x = var_3382_cast_fp16)[name = string("transpose_37")]; tensor value_cache_internal_tensor_assign_12_cast_fp16 = slice_update(begin = concat_91, begin_mask = value_cache_internal_tensor_assign_12_begin_mask_0, end = concat_92, end_mask = value_cache_internal_tensor_assign_12_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_12_squeeze_mask_0, stride = value_cache_internal_tensor_assign_12_stride_0, update = value_states_47_cast_fp16, x = coreml_update_state_69)[name = string("value_cache_internal_tensor_assign_12_cast_fp16")]; write_state(data = value_cache_internal_tensor_assign_12_cast_fp16, input = value_cache)[name = string("coreml_update_state_381_write_state")]; tensor coreml_update_state_71 = read_state(input = value_cache)[name = string("coreml_update_state_381")]; tensor var_3446_begin_0 = const()[name = string("op_3446_begin_0"), val = tensor([11, 0, 0, 0])]; tensor var_3446_end_0 = const()[name = string("op_3446_end_0"), val = tensor([12, 2, 2048, 64])]; tensor var_3446_end_mask_0 = const()[name = string("op_3446_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_3446_cast_fp16 = slice_by_index(begin = var_3446_begin_0, end = var_3446_end_0, end_mask = var_3446_end_mask_0, x = coreml_update_state_70)[name = string("op_3446_cast_fp16")]; tensor tile_22 = const()[name = string("tile_22"), val = tensor([1, 1])]; int32 var_3449_axis_0 = const()[name = string("op_3449_axis_0"), val = int32(1)]; tensor var_3449_cast_fp16_0, tensor var_3449_cast_fp16_1 = split(axis = var_3449_axis_0, split_sizes = tile_22, x = var_3446_cast_fp16)[name = string("op_3449_cast_fp16")]; tensor var_3456_begin_0 = const()[name = string("op_3456_begin_0"), val = tensor([11, 0, 0, 0])]; tensor var_3456_end_0 = const()[name = string("op_3456_end_0"), val = tensor([12, 2, 2048, 64])]; tensor var_3456_end_mask_0 = const()[name = string("op_3456_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_3456_cast_fp16 = slice_by_index(begin = var_3456_begin_0, end = var_3456_end_0, end_mask = var_3456_end_mask_0, x = coreml_update_state_71)[name = string("op_3456_cast_fp16")]; tensor tile_23 = const()[name = string("tile_23"), val = tensor([1, 1])]; int32 var_3459_axis_0 = const()[name = string("op_3459_axis_0"), val = int32(1)]; tensor var_3459_cast_fp16_0, tensor var_3459_cast_fp16_1 = split(axis = var_3459_axis_0, split_sizes = tile_23, x = var_3456_cast_fp16)[name = string("op_3459_cast_fp16")]; tensor var_3462_split_sizes_0 = const()[name = string("op_3462_split_sizes_0"), val = tensor([8, 8])]; int32 var_3462_axis_0 = const()[name = string("op_3462_axis_0"), val = int32(1)]; tensor var_3462_cast_fp16_0, tensor var_3462_cast_fp16_1 = split(axis = var_3462_axis_0, split_sizes = var_3462_split_sizes_0, x = query_states_47_cast_fp16)[name = string("op_3462_cast_fp16")]; bool attn_weights_177_transpose_x_0 = const()[name = string("attn_weights_177_transpose_x_0"), val = bool(false)]; bool attn_weights_177_transpose_y_0 = const()[name = string("attn_weights_177_transpose_y_0"), val = bool(false)]; tensor attn_weights_177_cast_fp16 = matmul(transpose_x = attn_weights_177_transpose_x_0, transpose_y = attn_weights_177_transpose_y_0, x = var_3449_cast_fp16_0, y = var_3462_cast_fp16_0)[name = string("attn_weights_177_cast_fp16")]; fp16 _inversed_attn_weights_179_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_179_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_179_cast_fp16 = mul(x = attn_weights_177_cast_fp16, y = _inversed_attn_weights_179_y_0_to_fp16)[name = string("_inversed_attn_weights_179_cast_fp16")]; tensor attn_weights_181_cast_fp16 = add(x = _inversed_attn_weights_179_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_181_cast_fp16")]; int32 var_3469 = const()[name = string("op_3469"), val = int32(2)]; tensor attn_weights_183_cast_fp16 = softmax(axis = var_3469, x = attn_weights_181_cast_fp16)[name = string("attn_weights_183_cast_fp16")]; bool var_3475_transpose_x_1 = const()[name = string("op_3475_transpose_x_1"), val = bool(true)]; bool var_3475_transpose_y_1 = const()[name = string("op_3475_transpose_y_1"), val = bool(false)]; tensor var_3475_cast_fp16 = matmul(transpose_x = var_3475_transpose_x_1, transpose_y = var_3475_transpose_y_1, x = attn_weights_183_cast_fp16, y = var_3459_cast_fp16_0)[name = string("op_3475_cast_fp16")]; bool attn_weights_185_transpose_x_0 = const()[name = string("attn_weights_185_transpose_x_0"), val = bool(false)]; bool attn_weights_185_transpose_y_0 = const()[name = string("attn_weights_185_transpose_y_0"), val = bool(false)]; tensor attn_weights_185_cast_fp16 = matmul(transpose_x = attn_weights_185_transpose_x_0, transpose_y = attn_weights_185_transpose_y_0, x = var_3449_cast_fp16_1, y = var_3462_cast_fp16_1)[name = string("attn_weights_185_cast_fp16")]; fp16 _inversed_attn_weights_187_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_187_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_187_cast_fp16 = mul(x = attn_weights_185_cast_fp16, y = _inversed_attn_weights_187_y_0_to_fp16)[name = string("_inversed_attn_weights_187_cast_fp16")]; tensor attn_weights_189_cast_fp16 = add(x = _inversed_attn_weights_187_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_189_cast_fp16")]; int32 var_3481 = const()[name = string("op_3481"), val = int32(2)]; tensor attn_weights_191_cast_fp16 = softmax(axis = var_3481, x = attn_weights_189_cast_fp16)[name = string("attn_weights_191_cast_fp16")]; bool attn_output_67_transpose_x_1 = const()[name = string("attn_output_67_transpose_x_1"), val = bool(true)]; bool attn_output_67_transpose_y_1 = const()[name = string("attn_output_67_transpose_y_1"), val = bool(false)]; tensor attn_output_67_cast_fp16 = matmul(transpose_x = attn_output_67_transpose_x_1, transpose_y = attn_output_67_transpose_y_1, x = attn_weights_191_cast_fp16, y = var_3459_cast_fp16_1)[name = string("attn_output_67_cast_fp16")]; int32 var_3489 = const()[name = string("op_3489"), val = int32(1)]; bool attn_output_69_interleave_0 = const()[name = string("attn_output_69_interleave_0"), val = bool(false)]; tensor attn_output_69_cast_fp16 = concat(axis = var_3489, interleave = attn_output_69_interleave_0, values = (var_3475_cast_fp16, attn_output_67_cast_fp16))[name = string("attn_output_69_cast_fp16")]; tensor var_3493_perm_0 = const()[name = string("op_3493_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_3498 = const()[name = string("op_3498"), val = tensor([1, 1024, 1, 64])]; tensor var_3493_cast_fp16 = transpose(perm = var_3493_perm_0, x = attn_output_69_cast_fp16)[name = string("transpose_36")]; tensor x_203_cast_fp16 = reshape(shape = var_3498, x = var_3493_cast_fp16)[name = string("x_203_cast_fp16")]; string hidden_states_69_pad_type_0 = const()[name = string("hidden_states_69_pad_type_0"), val = string("valid")]; tensor hidden_states_69_strides_0 = const()[name = string("hidden_states_69_strides_0"), val = tensor([1, 1])]; tensor hidden_states_69_pad_0 = const()[name = string("hidden_states_69_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_69_dilations_0 = const()[name = string("hidden_states_69_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_69_groups_0 = const()[name = string("hidden_states_69_groups_0"), val = int32(1)]; tensor var_3505_to_fp16 = const()[name = string("op_3505_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(339852032)))]; tensor hidden_states_69_cast_fp16 = conv(dilations = hidden_states_69_dilations_0, groups = hidden_states_69_groups_0, pad = hidden_states_69_pad_0, pad_type = hidden_states_69_pad_type_0, strides = hidden_states_69_strides_0, weight = var_3505_to_fp16, x = x_203_cast_fp16)[name = string("hidden_states_69_cast_fp16")]; tensor x_205_cast_fp16 = add(x = x_197_cast_fp16, y = hidden_states_69_cast_fp16)[name = string("x_205_cast_fp16")]; int32 var_3517 = const()[name = string("op_3517"), val = int32(1)]; fp16 const_123_promoted_to_fp16 = const()[name = string("const_123_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_3520_cast_fp16 = mul(x = x_205_cast_fp16, y = const_123_promoted_to_fp16)[name = string("op_3520_cast_fp16")]; bool x_207_interleave_0 = const()[name = string("x_207_interleave_0"), val = bool(false)]; tensor x_207_cast_fp16 = concat(axis = var_3517, interleave = x_207_interleave_0, values = (x_205_cast_fp16, var_3520_cast_fp16))[name = string("x_207_cast_fp16")]; tensor out_139_axes_0 = const()[name = string("out_139_axes_0"), val = tensor([1])]; fp16 var_3530_to_fp16 = const()[name = string("op_3530_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_139_cast_fp16 = layer_norm(axes = out_139_axes_0, epsilon = var_3530_to_fp16, x = x_207_cast_fp16)[name = string("out_139_cast_fp16")]; tensor layer_layers_11_post_attention_layernorm_weight_to_fp16 = const()[name = string("layer_layers_11_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(341949248)))]; tensor out_141_cast_fp16 = mul(x = out_139_cast_fp16, y = layer_layers_11_post_attention_layernorm_weight_to_fp16)[name = string("out_141_cast_fp16")]; tensor var_3536_split_sizes_0 = const()[name = string("op_3536_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_3536_axis_0 = const()[name = string("op_3536_axis_0"), val = int32(1)]; tensor var_3536_cast_fp16_0, tensor var_3536_cast_fp16_1 = split(axis = var_3536_axis_0, split_sizes = var_3536_split_sizes_0, x = out_141_cast_fp16)[name = string("op_3536_cast_fp16")]; string input_23_pad_type_0 = const()[name = string("input_23_pad_type_0"), val = string("valid")]; tensor input_23_strides_0 = const()[name = string("input_23_strides_0"), val = tensor([1, 1])]; tensor input_23_pad_0 = const()[name = string("input_23_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_23_dilations_0 = const()[name = string("input_23_dilations_0"), val = tensor([1, 1])]; int32 input_23_groups_0 = const()[name = string("input_23_groups_0"), val = int32(1)]; tensor var_3541_to_fp16 = const()[name = string("op_3541_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(341953408)))]; tensor input_23_cast_fp16 = conv(dilations = input_23_dilations_0, groups = input_23_groups_0, pad = input_23_pad_0, pad_type = input_23_pad_type_0, strides = input_23_strides_0, weight = var_3541_to_fp16, x = var_3536_cast_fp16_0)[name = string("input_23_cast_fp16")]; tensor var_3552_cast_fp16 = silu(x = input_23_cast_fp16)[name = string("op_3552_cast_fp16")]; string var_3557_pad_type_0 = const()[name = string("op_3557_pad_type_0"), val = string("valid")]; tensor var_3557_strides_0 = const()[name = string("op_3557_strides_0"), val = tensor([1, 1])]; tensor var_3557_pad_0 = const()[name = string("op_3557_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_3557_dilations_0 = const()[name = string("op_3557_dilations_0"), val = tensor([1, 1])]; int32 var_3557_groups_0 = const()[name = string("op_3557_groups_0"), val = int32(1)]; tensor var_3540_to_fp16 = const()[name = string("op_3540_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(350342080)))]; tensor var_3557_cast_fp16 = conv(dilations = var_3557_dilations_0, groups = var_3557_groups_0, pad = var_3557_pad_0, pad_type = var_3557_pad_type_0, strides = var_3557_strides_0, weight = var_3540_to_fp16, x = var_3536_cast_fp16_0)[name = string("op_3557_cast_fp16")]; tensor x_213_cast_fp16 = mul(x = var_3552_cast_fp16, y = var_3557_cast_fp16)[name = string("x_213_cast_fp16")]; string hidden_states_71_pad_type_0 = const()[name = string("hidden_states_71_pad_type_0"), val = string("valid")]; tensor hidden_states_71_strides_0 = const()[name = string("hidden_states_71_strides_0"), val = tensor([1, 1])]; tensor hidden_states_71_pad_0 = const()[name = string("hidden_states_71_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_71_dilations_0 = const()[name = string("hidden_states_71_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_71_groups_0 = const()[name = string("hidden_states_71_groups_0"), val = int32(1)]; tensor var_3539_to_fp16 = const()[name = string("op_3539_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(358730752)))]; tensor hidden_states_71_cast_fp16 = conv(dilations = hidden_states_71_dilations_0, groups = hidden_states_71_groups_0, pad = hidden_states_71_pad_0, pad_type = hidden_states_71_pad_type_0, strides = hidden_states_71_strides_0, weight = var_3539_to_fp16, x = x_213_cast_fp16)[name = string("hidden_states_71_cast_fp16")]; tensor x_215_cast_fp16 = add(x = x_205_cast_fp16, y = hidden_states_71_cast_fp16)[name = string("x_215_cast_fp16")]; int32 var_3570 = const()[name = string("op_3570"), val = int32(1)]; fp16 const_124_promoted_to_fp16 = const()[name = string("const_124_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_3573_cast_fp16 = mul(x = x_215_cast_fp16, y = const_124_promoted_to_fp16)[name = string("op_3573_cast_fp16")]; bool x_217_interleave_0 = const()[name = string("x_217_interleave_0"), val = bool(false)]; tensor x_217_cast_fp16 = concat(axis = var_3570, interleave = x_217_interleave_0, values = (x_215_cast_fp16, var_3573_cast_fp16))[name = string("x_217_cast_fp16")]; tensor out_145_axes_0 = const()[name = string("out_145_axes_0"), val = tensor([1])]; fp16 var_3583_to_fp16 = const()[name = string("op_3583_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_145_cast_fp16 = layer_norm(axes = out_145_axes_0, epsilon = var_3583_to_fp16, x = x_217_cast_fp16)[name = string("out_145_cast_fp16")]; tensor layer_layers_12_input_layernorm_weight_to_fp16 = const()[name = string("layer_layers_12_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(367119424)))]; tensor out_147_cast_fp16 = mul(x = out_145_cast_fp16, y = layer_layers_12_input_layernorm_weight_to_fp16)[name = string("out_147_cast_fp16")]; tensor var_3589_split_sizes_0 = const()[name = string("op_3589_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_3589_axis_0 = const()[name = string("op_3589_axis_0"), val = int32(1)]; tensor var_3589_cast_fp16_0, tensor var_3589_cast_fp16_1 = split(axis = var_3589_axis_0, split_sizes = var_3589_split_sizes_0, x = out_147_cast_fp16)[name = string("op_3589_cast_fp16")]; string query_states_49_pad_type_0 = const()[name = string("query_states_49_pad_type_0"), val = string("valid")]; tensor query_states_49_strides_0 = const()[name = string("query_states_49_strides_0"), val = tensor([1, 1])]; tensor query_states_49_pad_0 = const()[name = string("query_states_49_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_states_49_dilations_0 = const()[name = string("query_states_49_dilations_0"), val = tensor([1, 1])]; int32 query_states_49_groups_0 = const()[name = string("query_states_49_groups_0"), val = int32(1)]; tensor var_3611_to_fp16 = const()[name = string("op_3611_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(367123584)))]; tensor query_states_49_cast_fp16 = conv(dilations = query_states_49_dilations_0, groups = query_states_49_groups_0, pad = query_states_49_pad_0, pad_type = query_states_49_pad_type_0, strides = query_states_49_strides_0, weight = var_3611_to_fp16, x = var_3589_cast_fp16_0)[name = string("query_states_49_cast_fp16")]; string key_states_49_pad_type_0 = const()[name = string("key_states_49_pad_type_0"), val = string("valid")]; tensor key_states_49_strides_0 = const()[name = string("key_states_49_strides_0"), val = tensor([1, 1])]; tensor key_states_49_pad_0 = const()[name = string("key_states_49_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_states_49_dilations_0 = const()[name = string("key_states_49_dilations_0"), val = tensor([1, 1])]; int32 key_states_49_groups_0 = const()[name = string("key_states_49_groups_0"), val = int32(1)]; tensor var_3622_to_fp16 = const()[name = string("op_3622_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(369220800)))]; tensor key_states_49_cast_fp16 = conv(dilations = key_states_49_dilations_0, groups = key_states_49_groups_0, pad = key_states_49_pad_0, pad_type = key_states_49_pad_type_0, strides = key_states_49_strides_0, weight = var_3622_to_fp16, x = var_3589_cast_fp16_0)[name = string("key_states_49_cast_fp16")]; string value_states_49_pad_type_0 = const()[name = string("value_states_49_pad_type_0"), val = string("valid")]; tensor value_states_49_strides_0 = const()[name = string("value_states_49_strides_0"), val = tensor([1, 1])]; tensor value_states_49_pad_0 = const()[name = string("value_states_49_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_states_49_dilations_0 = const()[name = string("value_states_49_dilations_0"), val = tensor([1, 1])]; int32 value_states_49_groups_0 = const()[name = string("value_states_49_groups_0"), val = int32(1)]; tensor var_3633_to_fp16 = const()[name = string("op_3633_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(369483008)))]; tensor value_states_49_cast_fp16 = conv(dilations = value_states_49_dilations_0, groups = value_states_49_groups_0, pad = value_states_49_pad_0, pad_type = value_states_49_pad_type_0, strides = value_states_49_strides_0, weight = var_3633_to_fp16, x = var_3589_cast_fp16_0)[name = string("value_states_49_cast_fp16")]; tensor var_3641 = const()[name = string("op_3641"), val = tensor([1, 16, 64, 64])]; tensor embed_49_cast_fp16 = reshape(shape = var_3641, x = query_states_49_cast_fp16)[name = string("embed_49_cast_fp16")]; tensor var_3645 = const()[name = string("op_3645"), val = tensor([1, 2, 64, 64])]; tensor var_3646_cast_fp16 = reshape(shape = var_3645, x = key_states_49_cast_fp16)[name = string("op_3646_cast_fp16")]; tensor embed_51_perm_0 = const()[name = string("embed_51_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_3652 = const()[name = string("op_3652"), val = tensor([1, 2, 64, 64])]; tensor var_3653_cast_fp16 = reshape(shape = var_3652, x = value_states_49_cast_fp16)[name = string("op_3653_cast_fp16")]; tensor value_states_51_perm_0 = const()[name = string("value_states_51_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_3657_cast_fp16 = mul(x = embed_49_cast_fp16, y = cos_1_cast_fp16)[name = string("op_3657_cast_fp16")]; tensor var_3658_split_sizes_0 = const()[name = string("op_3658_split_sizes_0"), val = tensor([32, 32])]; int32 var_3658_axis_0 = const()[name = string("op_3658_axis_0"), val = int32(-2)]; tensor var_3658_cast_fp16_0, tensor var_3658_cast_fp16_1 = split(axis = var_3658_axis_0, split_sizes = var_3658_split_sizes_0, x = embed_49_cast_fp16)[name = string("op_3658_cast_fp16")]; fp16 const_127_promoted_to_fp16 = const()[name = string("const_127_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_3660_cast_fp16 = mul(x = var_3658_cast_fp16_1, y = const_127_promoted_to_fp16)[name = string("op_3660_cast_fp16")]; int32 var_3662 = const()[name = string("op_3662"), val = int32(-2)]; bool var_3663_interleave_0 = const()[name = string("op_3663_interleave_0"), val = bool(false)]; tensor var_3663_cast_fp16 = concat(axis = var_3662, interleave = var_3663_interleave_0, values = (var_3660_cast_fp16, var_3658_cast_fp16_0))[name = string("op_3663_cast_fp16")]; tensor var_3664_cast_fp16 = mul(x = var_3663_cast_fp16, y = sin_1_cast_fp16)[name = string("op_3664_cast_fp16")]; tensor query_states_51_cast_fp16 = add(x = var_3657_cast_fp16, y = var_3664_cast_fp16)[name = string("query_states_51_cast_fp16")]; tensor embed_51_cast_fp16 = transpose(perm = embed_51_perm_0, x = var_3646_cast_fp16)[name = string("transpose_35")]; tensor var_3667_cast_fp16 = mul(x = embed_51_cast_fp16, y = cos_cast_fp16)[name = string("op_3667_cast_fp16")]; tensor var_3668_split_sizes_0 = const()[name = string("op_3668_split_sizes_0"), val = tensor([32, 32])]; int32 var_3668_axis_0 = const()[name = string("op_3668_axis_0"), val = int32(-1)]; tensor var_3668_cast_fp16_0, tensor var_3668_cast_fp16_1 = split(axis = var_3668_axis_0, split_sizes = var_3668_split_sizes_0, x = embed_51_cast_fp16)[name = string("op_3668_cast_fp16")]; fp16 const_128_promoted_to_fp16 = const()[name = string("const_128_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_3670_cast_fp16 = mul(x = var_3668_cast_fp16_1, y = const_128_promoted_to_fp16)[name = string("op_3670_cast_fp16")]; int32 var_3672 = const()[name = string("op_3672"), val = int32(-1)]; bool var_3673_interleave_0 = const()[name = string("op_3673_interleave_0"), val = bool(false)]; tensor var_3673_cast_fp16 = concat(axis = var_3672, interleave = var_3673_interleave_0, values = (var_3670_cast_fp16, var_3668_cast_fp16_0))[name = string("op_3673_cast_fp16")]; tensor var_3674_cast_fp16 = mul(x = var_3673_cast_fp16, y = sin_cast_fp16)[name = string("op_3674_cast_fp16")]; tensor key_states_51_cast_fp16 = add(x = var_3667_cast_fp16, y = var_3674_cast_fp16)[name = string("key_states_51_cast_fp16")]; tensor expand_dims_121 = const()[name = string("expand_dims_121"), val = tensor([12])]; tensor expand_dims_122 = const()[name = string("expand_dims_122"), val = tensor([0])]; tensor expand_dims_124 = const()[name = string("expand_dims_124"), val = tensor([13])]; tensor concat_99_values3_0 = const()[name = string("concat_99_values3_0"), val = tensor([0])]; int32 concat_99_axis_0 = const()[name = string("concat_99_axis_0"), val = int32(0)]; bool concat_99_interleave_0 = const()[name = string("concat_99_interleave_0"), val = bool(false)]; tensor concat_99 = concat(axis = concat_99_axis_0, interleave = concat_99_interleave_0, values = (expand_dims_121, expand_dims_122, position_id, concat_99_values3_0))[name = string("concat_99")]; tensor concat_100_values1_0 = const()[name = string("concat_100_values1_0"), val = tensor([0])]; tensor concat_100_values3_0 = const()[name = string("concat_100_values3_0"), val = tensor([0])]; int32 concat_100_axis_0 = const()[name = string("concat_100_axis_0"), val = int32(0)]; bool concat_100_interleave_0 = const()[name = string("concat_100_interleave_0"), val = bool(false)]; tensor concat_100 = concat(axis = concat_100_axis_0, interleave = concat_100_interleave_0, values = (expand_dims_124, concat_100_values1_0, var_426, concat_100_values3_0))[name = string("concat_100")]; tensor key_cache_internal_tensor_assign_13_stride_0 = const()[name = string("key_cache_internal_tensor_assign_13_stride_0"), val = tensor([1, 1, 1, 1])]; tensor key_cache_internal_tensor_assign_13_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_13_begin_mask_0"), val = tensor([false, false, false, true])]; tensor key_cache_internal_tensor_assign_13_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_13_end_mask_0"), val = tensor([false, true, false, true])]; tensor key_cache_internal_tensor_assign_13_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_13_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor key_cache_internal_tensor_assign_13_cast_fp16 = slice_update(begin = concat_99, begin_mask = key_cache_internal_tensor_assign_13_begin_mask_0, end = concat_100, end_mask = key_cache_internal_tensor_assign_13_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_13_squeeze_mask_0, stride = key_cache_internal_tensor_assign_13_stride_0, update = key_states_51_cast_fp16, x = coreml_update_state_70)[name = string("key_cache_internal_tensor_assign_13_cast_fp16")]; write_state(data = key_cache_internal_tensor_assign_13_cast_fp16, input = key_cache)[name = string("coreml_update_state_382_write_state")]; tensor coreml_update_state_72 = read_state(input = key_cache)[name = string("coreml_update_state_382")]; tensor value_cache_internal_tensor_assign_13_stride_0 = const()[name = string("value_cache_internal_tensor_assign_13_stride_0"), val = tensor([1, 1, 1, 1])]; tensor value_cache_internal_tensor_assign_13_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_13_begin_mask_0"), val = tensor([false, false, false, true])]; tensor value_cache_internal_tensor_assign_13_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_13_end_mask_0"), val = tensor([false, true, false, true])]; tensor value_cache_internal_tensor_assign_13_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_13_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor value_states_51_cast_fp16 = transpose(perm = value_states_51_perm_0, x = var_3653_cast_fp16)[name = string("transpose_34")]; tensor value_cache_internal_tensor_assign_13_cast_fp16 = slice_update(begin = concat_99, begin_mask = value_cache_internal_tensor_assign_13_begin_mask_0, end = concat_100, end_mask = value_cache_internal_tensor_assign_13_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_13_squeeze_mask_0, stride = value_cache_internal_tensor_assign_13_stride_0, update = value_states_51_cast_fp16, x = coreml_update_state_71)[name = string("value_cache_internal_tensor_assign_13_cast_fp16")]; write_state(data = value_cache_internal_tensor_assign_13_cast_fp16, input = value_cache)[name = string("coreml_update_state_383_write_state")]; tensor coreml_update_state_73 = read_state(input = value_cache)[name = string("coreml_update_state_383")]; tensor var_3717_begin_0 = const()[name = string("op_3717_begin_0"), val = tensor([12, 0, 0, 0])]; tensor var_3717_end_0 = const()[name = string("op_3717_end_0"), val = tensor([13, 2, 2048, 64])]; tensor var_3717_end_mask_0 = const()[name = string("op_3717_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_3717_cast_fp16 = slice_by_index(begin = var_3717_begin_0, end = var_3717_end_0, end_mask = var_3717_end_mask_0, x = coreml_update_state_72)[name = string("op_3717_cast_fp16")]; tensor tile_24 = const()[name = string("tile_24"), val = tensor([1, 1])]; int32 var_3720_axis_0 = const()[name = string("op_3720_axis_0"), val = int32(1)]; tensor var_3720_cast_fp16_0, tensor var_3720_cast_fp16_1 = split(axis = var_3720_axis_0, split_sizes = tile_24, x = var_3717_cast_fp16)[name = string("op_3720_cast_fp16")]; tensor var_3727_begin_0 = const()[name = string("op_3727_begin_0"), val = tensor([12, 0, 0, 0])]; tensor var_3727_end_0 = const()[name = string("op_3727_end_0"), val = tensor([13, 2, 2048, 64])]; tensor var_3727_end_mask_0 = const()[name = string("op_3727_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_3727_cast_fp16 = slice_by_index(begin = var_3727_begin_0, end = var_3727_end_0, end_mask = var_3727_end_mask_0, x = coreml_update_state_73)[name = string("op_3727_cast_fp16")]; tensor tile_25 = const()[name = string("tile_25"), val = tensor([1, 1])]; int32 var_3730_axis_0 = const()[name = string("op_3730_axis_0"), val = int32(1)]; tensor var_3730_cast_fp16_0, tensor var_3730_cast_fp16_1 = split(axis = var_3730_axis_0, split_sizes = tile_25, x = var_3727_cast_fp16)[name = string("op_3730_cast_fp16")]; tensor var_3733_split_sizes_0 = const()[name = string("op_3733_split_sizes_0"), val = tensor([8, 8])]; int32 var_3733_axis_0 = const()[name = string("op_3733_axis_0"), val = int32(1)]; tensor var_3733_cast_fp16_0, tensor var_3733_cast_fp16_1 = split(axis = var_3733_axis_0, split_sizes = var_3733_split_sizes_0, x = query_states_51_cast_fp16)[name = string("op_3733_cast_fp16")]; bool attn_weights_193_transpose_x_0 = const()[name = string("attn_weights_193_transpose_x_0"), val = bool(false)]; bool attn_weights_193_transpose_y_0 = const()[name = string("attn_weights_193_transpose_y_0"), val = bool(false)]; tensor attn_weights_193_cast_fp16 = matmul(transpose_x = attn_weights_193_transpose_x_0, transpose_y = attn_weights_193_transpose_y_0, x = var_3720_cast_fp16_0, y = var_3733_cast_fp16_0)[name = string("attn_weights_193_cast_fp16")]; fp16 _inversed_attn_weights_195_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_195_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_195_cast_fp16 = mul(x = attn_weights_193_cast_fp16, y = _inversed_attn_weights_195_y_0_to_fp16)[name = string("_inversed_attn_weights_195_cast_fp16")]; tensor attn_weights_197_cast_fp16 = add(x = _inversed_attn_weights_195_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_197_cast_fp16")]; int32 var_3740 = const()[name = string("op_3740"), val = int32(2)]; tensor attn_weights_199_cast_fp16 = softmax(axis = var_3740, x = attn_weights_197_cast_fp16)[name = string("attn_weights_199_cast_fp16")]; bool var_3746_transpose_x_1 = const()[name = string("op_3746_transpose_x_1"), val = bool(true)]; bool var_3746_transpose_y_1 = const()[name = string("op_3746_transpose_y_1"), val = bool(false)]; tensor var_3746_cast_fp16 = matmul(transpose_x = var_3746_transpose_x_1, transpose_y = var_3746_transpose_y_1, x = attn_weights_199_cast_fp16, y = var_3730_cast_fp16_0)[name = string("op_3746_cast_fp16")]; bool attn_weights_201_transpose_x_0 = const()[name = string("attn_weights_201_transpose_x_0"), val = bool(false)]; bool attn_weights_201_transpose_y_0 = const()[name = string("attn_weights_201_transpose_y_0"), val = bool(false)]; tensor attn_weights_201_cast_fp16 = matmul(transpose_x = attn_weights_201_transpose_x_0, transpose_y = attn_weights_201_transpose_y_0, x = var_3720_cast_fp16_1, y = var_3733_cast_fp16_1)[name = string("attn_weights_201_cast_fp16")]; fp16 _inversed_attn_weights_203_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_203_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_203_cast_fp16 = mul(x = attn_weights_201_cast_fp16, y = _inversed_attn_weights_203_y_0_to_fp16)[name = string("_inversed_attn_weights_203_cast_fp16")]; tensor attn_weights_205_cast_fp16 = add(x = _inversed_attn_weights_203_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_205_cast_fp16")]; int32 var_3752 = const()[name = string("op_3752"), val = int32(2)]; tensor attn_weights_207_cast_fp16 = softmax(axis = var_3752, x = attn_weights_205_cast_fp16)[name = string("attn_weights_207_cast_fp16")]; bool attn_output_73_transpose_x_1 = const()[name = string("attn_output_73_transpose_x_1"), val = bool(true)]; bool attn_output_73_transpose_y_1 = const()[name = string("attn_output_73_transpose_y_1"), val = bool(false)]; tensor attn_output_73_cast_fp16 = matmul(transpose_x = attn_output_73_transpose_x_1, transpose_y = attn_output_73_transpose_y_1, x = attn_weights_207_cast_fp16, y = var_3730_cast_fp16_1)[name = string("attn_output_73_cast_fp16")]; int32 var_3760 = const()[name = string("op_3760"), val = int32(1)]; bool attn_output_75_interleave_0 = const()[name = string("attn_output_75_interleave_0"), val = bool(false)]; tensor attn_output_75_cast_fp16 = concat(axis = var_3760, interleave = attn_output_75_interleave_0, values = (var_3746_cast_fp16, attn_output_73_cast_fp16))[name = string("attn_output_75_cast_fp16")]; tensor var_3764_perm_0 = const()[name = string("op_3764_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_3769 = const()[name = string("op_3769"), val = tensor([1, 1024, 1, 64])]; tensor var_3764_cast_fp16 = transpose(perm = var_3764_perm_0, x = attn_output_75_cast_fp16)[name = string("transpose_33")]; tensor x_221_cast_fp16 = reshape(shape = var_3769, x = var_3764_cast_fp16)[name = string("x_221_cast_fp16")]; string hidden_states_75_pad_type_0 = const()[name = string("hidden_states_75_pad_type_0"), val = string("valid")]; tensor hidden_states_75_strides_0 = const()[name = string("hidden_states_75_strides_0"), val = tensor([1, 1])]; tensor hidden_states_75_pad_0 = const()[name = string("hidden_states_75_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_75_dilations_0 = const()[name = string("hidden_states_75_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_75_groups_0 = const()[name = string("hidden_states_75_groups_0"), val = int32(1)]; tensor var_3776_to_fp16 = const()[name = string("op_3776_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(369745216)))]; tensor hidden_states_75_cast_fp16 = conv(dilations = hidden_states_75_dilations_0, groups = hidden_states_75_groups_0, pad = hidden_states_75_pad_0, pad_type = hidden_states_75_pad_type_0, strides = hidden_states_75_strides_0, weight = var_3776_to_fp16, x = x_221_cast_fp16)[name = string("hidden_states_75_cast_fp16")]; tensor x_223_cast_fp16 = add(x = x_215_cast_fp16, y = hidden_states_75_cast_fp16)[name = string("x_223_cast_fp16")]; int32 var_3788 = const()[name = string("op_3788"), val = int32(1)]; fp16 const_133_promoted_to_fp16 = const()[name = string("const_133_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_3791_cast_fp16 = mul(x = x_223_cast_fp16, y = const_133_promoted_to_fp16)[name = string("op_3791_cast_fp16")]; bool x_225_interleave_0 = const()[name = string("x_225_interleave_0"), val = bool(false)]; tensor x_225_cast_fp16 = concat(axis = var_3788, interleave = x_225_interleave_0, values = (x_223_cast_fp16, var_3791_cast_fp16))[name = string("x_225_cast_fp16")]; tensor out_151_axes_0 = const()[name = string("out_151_axes_0"), val = tensor([1])]; fp16 var_3801_to_fp16 = const()[name = string("op_3801_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_151_cast_fp16 = layer_norm(axes = out_151_axes_0, epsilon = var_3801_to_fp16, x = x_225_cast_fp16)[name = string("out_151_cast_fp16")]; tensor layer_layers_12_post_attention_layernorm_weight_to_fp16 = const()[name = string("layer_layers_12_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(371842432)))]; tensor out_153_cast_fp16 = mul(x = out_151_cast_fp16, y = layer_layers_12_post_attention_layernorm_weight_to_fp16)[name = string("out_153_cast_fp16")]; tensor var_3807_split_sizes_0 = const()[name = string("op_3807_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_3807_axis_0 = const()[name = string("op_3807_axis_0"), val = int32(1)]; tensor var_3807_cast_fp16_0, tensor var_3807_cast_fp16_1 = split(axis = var_3807_axis_0, split_sizes = var_3807_split_sizes_0, x = out_153_cast_fp16)[name = string("op_3807_cast_fp16")]; string input_25_pad_type_0 = const()[name = string("input_25_pad_type_0"), val = string("valid")]; tensor input_25_strides_0 = const()[name = string("input_25_strides_0"), val = tensor([1, 1])]; tensor input_25_pad_0 = const()[name = string("input_25_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_25_dilations_0 = const()[name = string("input_25_dilations_0"), val = tensor([1, 1])]; int32 input_25_groups_0 = const()[name = string("input_25_groups_0"), val = int32(1)]; tensor var_3812_to_fp16 = const()[name = string("op_3812_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(371846592)))]; tensor input_25_cast_fp16 = conv(dilations = input_25_dilations_0, groups = input_25_groups_0, pad = input_25_pad_0, pad_type = input_25_pad_type_0, strides = input_25_strides_0, weight = var_3812_to_fp16, x = var_3807_cast_fp16_0)[name = string("input_25_cast_fp16")]; tensor var_3823_cast_fp16 = silu(x = input_25_cast_fp16)[name = string("op_3823_cast_fp16")]; string var_3828_pad_type_0 = const()[name = string("op_3828_pad_type_0"), val = string("valid")]; tensor var_3828_strides_0 = const()[name = string("op_3828_strides_0"), val = tensor([1, 1])]; tensor var_3828_pad_0 = const()[name = string("op_3828_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_3828_dilations_0 = const()[name = string("op_3828_dilations_0"), val = tensor([1, 1])]; int32 var_3828_groups_0 = const()[name = string("op_3828_groups_0"), val = int32(1)]; tensor var_3811_to_fp16 = const()[name = string("op_3811_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(380235264)))]; tensor var_3828_cast_fp16 = conv(dilations = var_3828_dilations_0, groups = var_3828_groups_0, pad = var_3828_pad_0, pad_type = var_3828_pad_type_0, strides = var_3828_strides_0, weight = var_3811_to_fp16, x = var_3807_cast_fp16_0)[name = string("op_3828_cast_fp16")]; tensor x_231_cast_fp16 = mul(x = var_3823_cast_fp16, y = var_3828_cast_fp16)[name = string("x_231_cast_fp16")]; string hidden_states_77_pad_type_0 = const()[name = string("hidden_states_77_pad_type_0"), val = string("valid")]; tensor hidden_states_77_strides_0 = const()[name = string("hidden_states_77_strides_0"), val = tensor([1, 1])]; tensor hidden_states_77_pad_0 = const()[name = string("hidden_states_77_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_77_dilations_0 = const()[name = string("hidden_states_77_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_77_groups_0 = const()[name = string("hidden_states_77_groups_0"), val = int32(1)]; tensor var_3810_to_fp16 = const()[name = string("op_3810_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(388623936)))]; tensor hidden_states_77_cast_fp16 = conv(dilations = hidden_states_77_dilations_0, groups = hidden_states_77_groups_0, pad = hidden_states_77_pad_0, pad_type = hidden_states_77_pad_type_0, strides = hidden_states_77_strides_0, weight = var_3810_to_fp16, x = x_231_cast_fp16)[name = string("hidden_states_77_cast_fp16")]; tensor x_233_cast_fp16 = add(x = x_223_cast_fp16, y = hidden_states_77_cast_fp16)[name = string("x_233_cast_fp16")]; int32 var_3841 = const()[name = string("op_3841"), val = int32(1)]; fp16 const_134_promoted_to_fp16 = const()[name = string("const_134_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_3844_cast_fp16 = mul(x = x_233_cast_fp16, y = const_134_promoted_to_fp16)[name = string("op_3844_cast_fp16")]; bool x_235_interleave_0 = const()[name = string("x_235_interleave_0"), val = bool(false)]; tensor x_235_cast_fp16 = concat(axis = var_3841, interleave = x_235_interleave_0, values = (x_233_cast_fp16, var_3844_cast_fp16))[name = string("x_235_cast_fp16")]; tensor out_157_axes_0 = const()[name = string("out_157_axes_0"), val = tensor([1])]; fp16 var_3854_to_fp16 = const()[name = string("op_3854_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_157_cast_fp16 = layer_norm(axes = out_157_axes_0, epsilon = var_3854_to_fp16, x = x_235_cast_fp16)[name = string("out_157_cast_fp16")]; tensor layer_layers_13_input_layernorm_weight_to_fp16 = const()[name = string("layer_layers_13_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(397012608)))]; tensor out_159_cast_fp16 = mul(x = out_157_cast_fp16, y = layer_layers_13_input_layernorm_weight_to_fp16)[name = string("out_159_cast_fp16")]; tensor var_3860_split_sizes_0 = const()[name = string("op_3860_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_3860_axis_0 = const()[name = string("op_3860_axis_0"), val = int32(1)]; tensor var_3860_cast_fp16_0, tensor var_3860_cast_fp16_1 = split(axis = var_3860_axis_0, split_sizes = var_3860_split_sizes_0, x = out_159_cast_fp16)[name = string("op_3860_cast_fp16")]; string query_states_53_pad_type_0 = const()[name = string("query_states_53_pad_type_0"), val = string("valid")]; tensor query_states_53_strides_0 = const()[name = string("query_states_53_strides_0"), val = tensor([1, 1])]; tensor query_states_53_pad_0 = const()[name = string("query_states_53_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_states_53_dilations_0 = const()[name = string("query_states_53_dilations_0"), val = tensor([1, 1])]; int32 query_states_53_groups_0 = const()[name = string("query_states_53_groups_0"), val = int32(1)]; tensor var_3882_to_fp16 = const()[name = string("op_3882_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(397016768)))]; tensor query_states_53_cast_fp16 = conv(dilations = query_states_53_dilations_0, groups = query_states_53_groups_0, pad = query_states_53_pad_0, pad_type = query_states_53_pad_type_0, strides = query_states_53_strides_0, weight = var_3882_to_fp16, x = var_3860_cast_fp16_0)[name = string("query_states_53_cast_fp16")]; string key_states_53_pad_type_0 = const()[name = string("key_states_53_pad_type_0"), val = string("valid")]; tensor key_states_53_strides_0 = const()[name = string("key_states_53_strides_0"), val = tensor([1, 1])]; tensor key_states_53_pad_0 = const()[name = string("key_states_53_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_states_53_dilations_0 = const()[name = string("key_states_53_dilations_0"), val = tensor([1, 1])]; int32 key_states_53_groups_0 = const()[name = string("key_states_53_groups_0"), val = int32(1)]; tensor var_3893_to_fp16 = const()[name = string("op_3893_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(399113984)))]; tensor key_states_53_cast_fp16 = conv(dilations = key_states_53_dilations_0, groups = key_states_53_groups_0, pad = key_states_53_pad_0, pad_type = key_states_53_pad_type_0, strides = key_states_53_strides_0, weight = var_3893_to_fp16, x = var_3860_cast_fp16_0)[name = string("key_states_53_cast_fp16")]; string value_states_53_pad_type_0 = const()[name = string("value_states_53_pad_type_0"), val = string("valid")]; tensor value_states_53_strides_0 = const()[name = string("value_states_53_strides_0"), val = tensor([1, 1])]; tensor value_states_53_pad_0 = const()[name = string("value_states_53_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_states_53_dilations_0 = const()[name = string("value_states_53_dilations_0"), val = tensor([1, 1])]; int32 value_states_53_groups_0 = const()[name = string("value_states_53_groups_0"), val = int32(1)]; tensor var_3904_to_fp16 = const()[name = string("op_3904_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(399376192)))]; tensor value_states_53_cast_fp16 = conv(dilations = value_states_53_dilations_0, groups = value_states_53_groups_0, pad = value_states_53_pad_0, pad_type = value_states_53_pad_type_0, strides = value_states_53_strides_0, weight = var_3904_to_fp16, x = var_3860_cast_fp16_0)[name = string("value_states_53_cast_fp16")]; tensor var_3912 = const()[name = string("op_3912"), val = tensor([1, 16, 64, 64])]; tensor embed_53_cast_fp16 = reshape(shape = var_3912, x = query_states_53_cast_fp16)[name = string("embed_53_cast_fp16")]; tensor var_3916 = const()[name = string("op_3916"), val = tensor([1, 2, 64, 64])]; tensor var_3917_cast_fp16 = reshape(shape = var_3916, x = key_states_53_cast_fp16)[name = string("op_3917_cast_fp16")]; tensor embed_55_perm_0 = const()[name = string("embed_55_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_3923 = const()[name = string("op_3923"), val = tensor([1, 2, 64, 64])]; tensor var_3924_cast_fp16 = reshape(shape = var_3923, x = value_states_53_cast_fp16)[name = string("op_3924_cast_fp16")]; tensor value_states_55_perm_0 = const()[name = string("value_states_55_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_3928_cast_fp16 = mul(x = embed_53_cast_fp16, y = cos_1_cast_fp16)[name = string("op_3928_cast_fp16")]; tensor var_3929_split_sizes_0 = const()[name = string("op_3929_split_sizes_0"), val = tensor([32, 32])]; int32 var_3929_axis_0 = const()[name = string("op_3929_axis_0"), val = int32(-2)]; tensor var_3929_cast_fp16_0, tensor var_3929_cast_fp16_1 = split(axis = var_3929_axis_0, split_sizes = var_3929_split_sizes_0, x = embed_53_cast_fp16)[name = string("op_3929_cast_fp16")]; fp16 const_137_promoted_to_fp16 = const()[name = string("const_137_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_3931_cast_fp16 = mul(x = var_3929_cast_fp16_1, y = const_137_promoted_to_fp16)[name = string("op_3931_cast_fp16")]; int32 var_3933 = const()[name = string("op_3933"), val = int32(-2)]; bool var_3934_interleave_0 = const()[name = string("op_3934_interleave_0"), val = bool(false)]; tensor var_3934_cast_fp16 = concat(axis = var_3933, interleave = var_3934_interleave_0, values = (var_3931_cast_fp16, var_3929_cast_fp16_0))[name = string("op_3934_cast_fp16")]; tensor var_3935_cast_fp16 = mul(x = var_3934_cast_fp16, y = sin_1_cast_fp16)[name = string("op_3935_cast_fp16")]; tensor query_states_55_cast_fp16 = add(x = var_3928_cast_fp16, y = var_3935_cast_fp16)[name = string("query_states_55_cast_fp16")]; tensor embed_55_cast_fp16 = transpose(perm = embed_55_perm_0, x = var_3917_cast_fp16)[name = string("transpose_32")]; tensor var_3938_cast_fp16 = mul(x = embed_55_cast_fp16, y = cos_cast_fp16)[name = string("op_3938_cast_fp16")]; tensor var_3939_split_sizes_0 = const()[name = string("op_3939_split_sizes_0"), val = tensor([32, 32])]; int32 var_3939_axis_0 = const()[name = string("op_3939_axis_0"), val = int32(-1)]; tensor var_3939_cast_fp16_0, tensor var_3939_cast_fp16_1 = split(axis = var_3939_axis_0, split_sizes = var_3939_split_sizes_0, x = embed_55_cast_fp16)[name = string("op_3939_cast_fp16")]; fp16 const_138_promoted_to_fp16 = const()[name = string("const_138_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_3941_cast_fp16 = mul(x = var_3939_cast_fp16_1, y = const_138_promoted_to_fp16)[name = string("op_3941_cast_fp16")]; int32 var_3943 = const()[name = string("op_3943"), val = int32(-1)]; bool var_3944_interleave_0 = const()[name = string("op_3944_interleave_0"), val = bool(false)]; tensor var_3944_cast_fp16 = concat(axis = var_3943, interleave = var_3944_interleave_0, values = (var_3941_cast_fp16, var_3939_cast_fp16_0))[name = string("op_3944_cast_fp16")]; tensor var_3945_cast_fp16 = mul(x = var_3944_cast_fp16, y = sin_cast_fp16)[name = string("op_3945_cast_fp16")]; tensor key_states_55_cast_fp16 = add(x = var_3938_cast_fp16, y = var_3945_cast_fp16)[name = string("key_states_55_cast_fp16")]; tensor expand_dims_131 = const()[name = string("expand_dims_131"), val = tensor([13])]; tensor expand_dims_132 = const()[name = string("expand_dims_132"), val = tensor([0])]; tensor expand_dims_134 = const()[name = string("expand_dims_134"), val = tensor([14])]; tensor concat_107_values3_0 = const()[name = string("concat_107_values3_0"), val = tensor([0])]; int32 concat_107_axis_0 = const()[name = string("concat_107_axis_0"), val = int32(0)]; bool concat_107_interleave_0 = const()[name = string("concat_107_interleave_0"), val = bool(false)]; tensor concat_107 = concat(axis = concat_107_axis_0, interleave = concat_107_interleave_0, values = (expand_dims_131, expand_dims_132, position_id, concat_107_values3_0))[name = string("concat_107")]; tensor concat_108_values1_0 = const()[name = string("concat_108_values1_0"), val = tensor([0])]; tensor concat_108_values3_0 = const()[name = string("concat_108_values3_0"), val = tensor([0])]; int32 concat_108_axis_0 = const()[name = string("concat_108_axis_0"), val = int32(0)]; bool concat_108_interleave_0 = const()[name = string("concat_108_interleave_0"), val = bool(false)]; tensor concat_108 = concat(axis = concat_108_axis_0, interleave = concat_108_interleave_0, values = (expand_dims_134, concat_108_values1_0, var_426, concat_108_values3_0))[name = string("concat_108")]; tensor key_cache_internal_tensor_assign_14_stride_0 = const()[name = string("key_cache_internal_tensor_assign_14_stride_0"), val = tensor([1, 1, 1, 1])]; tensor key_cache_internal_tensor_assign_14_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_14_begin_mask_0"), val = tensor([false, false, false, true])]; tensor key_cache_internal_tensor_assign_14_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_14_end_mask_0"), val = tensor([false, true, false, true])]; tensor key_cache_internal_tensor_assign_14_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_14_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor key_cache_internal_tensor_assign_14_cast_fp16 = slice_update(begin = concat_107, begin_mask = key_cache_internal_tensor_assign_14_begin_mask_0, end = concat_108, end_mask = key_cache_internal_tensor_assign_14_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_14_squeeze_mask_0, stride = key_cache_internal_tensor_assign_14_stride_0, update = key_states_55_cast_fp16, x = coreml_update_state_72)[name = string("key_cache_internal_tensor_assign_14_cast_fp16")]; write_state(data = key_cache_internal_tensor_assign_14_cast_fp16, input = key_cache)[name = string("coreml_update_state_384_write_state")]; tensor coreml_update_state_74 = read_state(input = key_cache)[name = string("coreml_update_state_384")]; tensor value_cache_internal_tensor_assign_14_stride_0 = const()[name = string("value_cache_internal_tensor_assign_14_stride_0"), val = tensor([1, 1, 1, 1])]; tensor value_cache_internal_tensor_assign_14_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_14_begin_mask_0"), val = tensor([false, false, false, true])]; tensor value_cache_internal_tensor_assign_14_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_14_end_mask_0"), val = tensor([false, true, false, true])]; tensor value_cache_internal_tensor_assign_14_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_14_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor value_states_55_cast_fp16 = transpose(perm = value_states_55_perm_0, x = var_3924_cast_fp16)[name = string("transpose_31")]; tensor value_cache_internal_tensor_assign_14_cast_fp16 = slice_update(begin = concat_107, begin_mask = value_cache_internal_tensor_assign_14_begin_mask_0, end = concat_108, end_mask = value_cache_internal_tensor_assign_14_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_14_squeeze_mask_0, stride = value_cache_internal_tensor_assign_14_stride_0, update = value_states_55_cast_fp16, x = coreml_update_state_73)[name = string("value_cache_internal_tensor_assign_14_cast_fp16")]; write_state(data = value_cache_internal_tensor_assign_14_cast_fp16, input = value_cache)[name = string("coreml_update_state_385_write_state")]; tensor coreml_update_state_75 = read_state(input = value_cache)[name = string("coreml_update_state_385")]; tensor var_3988_begin_0 = const()[name = string("op_3988_begin_0"), val = tensor([13, 0, 0, 0])]; tensor var_3988_end_0 = const()[name = string("op_3988_end_0"), val = tensor([14, 2, 2048, 64])]; tensor var_3988_end_mask_0 = const()[name = string("op_3988_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_3988_cast_fp16 = slice_by_index(begin = var_3988_begin_0, end = var_3988_end_0, end_mask = var_3988_end_mask_0, x = coreml_update_state_74)[name = string("op_3988_cast_fp16")]; tensor tile_26 = const()[name = string("tile_26"), val = tensor([1, 1])]; int32 var_3991_axis_0 = const()[name = string("op_3991_axis_0"), val = int32(1)]; tensor var_3991_cast_fp16_0, tensor var_3991_cast_fp16_1 = split(axis = var_3991_axis_0, split_sizes = tile_26, x = var_3988_cast_fp16)[name = string("op_3991_cast_fp16")]; tensor var_3998_begin_0 = const()[name = string("op_3998_begin_0"), val = tensor([13, 0, 0, 0])]; tensor var_3998_end_0 = const()[name = string("op_3998_end_0"), val = tensor([14, 2, 2048, 64])]; tensor var_3998_end_mask_0 = const()[name = string("op_3998_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_3998_cast_fp16 = slice_by_index(begin = var_3998_begin_0, end = var_3998_end_0, end_mask = var_3998_end_mask_0, x = coreml_update_state_75)[name = string("op_3998_cast_fp16")]; tensor tile_27 = const()[name = string("tile_27"), val = tensor([1, 1])]; int32 var_4001_axis_0 = const()[name = string("op_4001_axis_0"), val = int32(1)]; tensor var_4001_cast_fp16_0, tensor var_4001_cast_fp16_1 = split(axis = var_4001_axis_0, split_sizes = tile_27, x = var_3998_cast_fp16)[name = string("op_4001_cast_fp16")]; tensor var_4004_split_sizes_0 = const()[name = string("op_4004_split_sizes_0"), val = tensor([8, 8])]; int32 var_4004_axis_0 = const()[name = string("op_4004_axis_0"), val = int32(1)]; tensor var_4004_cast_fp16_0, tensor var_4004_cast_fp16_1 = split(axis = var_4004_axis_0, split_sizes = var_4004_split_sizes_0, x = query_states_55_cast_fp16)[name = string("op_4004_cast_fp16")]; bool attn_weights_209_transpose_x_0 = const()[name = string("attn_weights_209_transpose_x_0"), val = bool(false)]; bool attn_weights_209_transpose_y_0 = const()[name = string("attn_weights_209_transpose_y_0"), val = bool(false)]; tensor attn_weights_209_cast_fp16 = matmul(transpose_x = attn_weights_209_transpose_x_0, transpose_y = attn_weights_209_transpose_y_0, x = var_3991_cast_fp16_0, y = var_4004_cast_fp16_0)[name = string("attn_weights_209_cast_fp16")]; fp16 _inversed_attn_weights_211_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_211_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_211_cast_fp16 = mul(x = attn_weights_209_cast_fp16, y = _inversed_attn_weights_211_y_0_to_fp16)[name = string("_inversed_attn_weights_211_cast_fp16")]; tensor attn_weights_213_cast_fp16 = add(x = _inversed_attn_weights_211_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_213_cast_fp16")]; int32 var_4011 = const()[name = string("op_4011"), val = int32(2)]; tensor attn_weights_215_cast_fp16 = softmax(axis = var_4011, x = attn_weights_213_cast_fp16)[name = string("attn_weights_215_cast_fp16")]; bool var_4017_transpose_x_1 = const()[name = string("op_4017_transpose_x_1"), val = bool(true)]; bool var_4017_transpose_y_1 = const()[name = string("op_4017_transpose_y_1"), val = bool(false)]; tensor var_4017_cast_fp16 = matmul(transpose_x = var_4017_transpose_x_1, transpose_y = var_4017_transpose_y_1, x = attn_weights_215_cast_fp16, y = var_4001_cast_fp16_0)[name = string("op_4017_cast_fp16")]; bool attn_weights_217_transpose_x_0 = const()[name = string("attn_weights_217_transpose_x_0"), val = bool(false)]; bool attn_weights_217_transpose_y_0 = const()[name = string("attn_weights_217_transpose_y_0"), val = bool(false)]; tensor attn_weights_217_cast_fp16 = matmul(transpose_x = attn_weights_217_transpose_x_0, transpose_y = attn_weights_217_transpose_y_0, x = var_3991_cast_fp16_1, y = var_4004_cast_fp16_1)[name = string("attn_weights_217_cast_fp16")]; fp16 _inversed_attn_weights_219_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_219_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_219_cast_fp16 = mul(x = attn_weights_217_cast_fp16, y = _inversed_attn_weights_219_y_0_to_fp16)[name = string("_inversed_attn_weights_219_cast_fp16")]; tensor attn_weights_221_cast_fp16 = add(x = _inversed_attn_weights_219_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_221_cast_fp16")]; int32 var_4023 = const()[name = string("op_4023"), val = int32(2)]; tensor attn_weights_223_cast_fp16 = softmax(axis = var_4023, x = attn_weights_221_cast_fp16)[name = string("attn_weights_223_cast_fp16")]; bool attn_output_79_transpose_x_1 = const()[name = string("attn_output_79_transpose_x_1"), val = bool(true)]; bool attn_output_79_transpose_y_1 = const()[name = string("attn_output_79_transpose_y_1"), val = bool(false)]; tensor attn_output_79_cast_fp16 = matmul(transpose_x = attn_output_79_transpose_x_1, transpose_y = attn_output_79_transpose_y_1, x = attn_weights_223_cast_fp16, y = var_4001_cast_fp16_1)[name = string("attn_output_79_cast_fp16")]; int32 var_4031 = const()[name = string("op_4031"), val = int32(1)]; bool attn_output_81_interleave_0 = const()[name = string("attn_output_81_interleave_0"), val = bool(false)]; tensor attn_output_81_cast_fp16 = concat(axis = var_4031, interleave = attn_output_81_interleave_0, values = (var_4017_cast_fp16, attn_output_79_cast_fp16))[name = string("attn_output_81_cast_fp16")]; tensor var_4035_perm_0 = const()[name = string("op_4035_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_4040 = const()[name = string("op_4040"), val = tensor([1, 1024, 1, 64])]; tensor var_4035_cast_fp16 = transpose(perm = var_4035_perm_0, x = attn_output_81_cast_fp16)[name = string("transpose_30")]; tensor x_239_cast_fp16 = reshape(shape = var_4040, x = var_4035_cast_fp16)[name = string("x_239_cast_fp16")]; string hidden_states_81_pad_type_0 = const()[name = string("hidden_states_81_pad_type_0"), val = string("valid")]; tensor hidden_states_81_strides_0 = const()[name = string("hidden_states_81_strides_0"), val = tensor([1, 1])]; tensor hidden_states_81_pad_0 = const()[name = string("hidden_states_81_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_81_dilations_0 = const()[name = string("hidden_states_81_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_81_groups_0 = const()[name = string("hidden_states_81_groups_0"), val = int32(1)]; tensor var_4047_to_fp16 = const()[name = string("op_4047_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(399638400)))]; tensor hidden_states_81_cast_fp16 = conv(dilations = hidden_states_81_dilations_0, groups = hidden_states_81_groups_0, pad = hidden_states_81_pad_0, pad_type = hidden_states_81_pad_type_0, strides = hidden_states_81_strides_0, weight = var_4047_to_fp16, x = x_239_cast_fp16)[name = string("hidden_states_81_cast_fp16")]; tensor x_241_cast_fp16 = add(x = x_233_cast_fp16, y = hidden_states_81_cast_fp16)[name = string("x_241_cast_fp16")]; int32 var_4059 = const()[name = string("op_4059"), val = int32(1)]; fp16 const_143_promoted_to_fp16 = const()[name = string("const_143_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_4062_cast_fp16 = mul(x = x_241_cast_fp16, y = const_143_promoted_to_fp16)[name = string("op_4062_cast_fp16")]; bool x_243_interleave_0 = const()[name = string("x_243_interleave_0"), val = bool(false)]; tensor x_243_cast_fp16 = concat(axis = var_4059, interleave = x_243_interleave_0, values = (x_241_cast_fp16, var_4062_cast_fp16))[name = string("x_243_cast_fp16")]; tensor out_163_axes_0 = const()[name = string("out_163_axes_0"), val = tensor([1])]; fp16 var_4072_to_fp16 = const()[name = string("op_4072_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_163_cast_fp16 = layer_norm(axes = out_163_axes_0, epsilon = var_4072_to_fp16, x = x_243_cast_fp16)[name = string("out_163_cast_fp16")]; tensor layer_layers_13_post_attention_layernorm_weight_to_fp16 = const()[name = string("layer_layers_13_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(401735616)))]; tensor out_165_cast_fp16 = mul(x = out_163_cast_fp16, y = layer_layers_13_post_attention_layernorm_weight_to_fp16)[name = string("out_165_cast_fp16")]; tensor var_4078_split_sizes_0 = const()[name = string("op_4078_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_4078_axis_0 = const()[name = string("op_4078_axis_0"), val = int32(1)]; tensor var_4078_cast_fp16_0, tensor var_4078_cast_fp16_1 = split(axis = var_4078_axis_0, split_sizes = var_4078_split_sizes_0, x = out_165_cast_fp16)[name = string("op_4078_cast_fp16")]; string input_27_pad_type_0 = const()[name = string("input_27_pad_type_0"), val = string("valid")]; tensor input_27_strides_0 = const()[name = string("input_27_strides_0"), val = tensor([1, 1])]; tensor input_27_pad_0 = const()[name = string("input_27_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_27_dilations_0 = const()[name = string("input_27_dilations_0"), val = tensor([1, 1])]; int32 input_27_groups_0 = const()[name = string("input_27_groups_0"), val = int32(1)]; tensor var_4083_to_fp16 = const()[name = string("op_4083_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(401739776)))]; tensor input_27_cast_fp16 = conv(dilations = input_27_dilations_0, groups = input_27_groups_0, pad = input_27_pad_0, pad_type = input_27_pad_type_0, strides = input_27_strides_0, weight = var_4083_to_fp16, x = var_4078_cast_fp16_0)[name = string("input_27_cast_fp16")]; tensor var_4094_cast_fp16 = silu(x = input_27_cast_fp16)[name = string("op_4094_cast_fp16")]; string var_4099_pad_type_0 = const()[name = string("op_4099_pad_type_0"), val = string("valid")]; tensor var_4099_strides_0 = const()[name = string("op_4099_strides_0"), val = tensor([1, 1])]; tensor var_4099_pad_0 = const()[name = string("op_4099_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_4099_dilations_0 = const()[name = string("op_4099_dilations_0"), val = tensor([1, 1])]; int32 var_4099_groups_0 = const()[name = string("op_4099_groups_0"), val = int32(1)]; tensor var_4082_to_fp16 = const()[name = string("op_4082_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(410128448)))]; tensor var_4099_cast_fp16 = conv(dilations = var_4099_dilations_0, groups = var_4099_groups_0, pad = var_4099_pad_0, pad_type = var_4099_pad_type_0, strides = var_4099_strides_0, weight = var_4082_to_fp16, x = var_4078_cast_fp16_0)[name = string("op_4099_cast_fp16")]; tensor x_249_cast_fp16 = mul(x = var_4094_cast_fp16, y = var_4099_cast_fp16)[name = string("x_249_cast_fp16")]; string hidden_states_83_pad_type_0 = const()[name = string("hidden_states_83_pad_type_0"), val = string("valid")]; tensor hidden_states_83_strides_0 = const()[name = string("hidden_states_83_strides_0"), val = tensor([1, 1])]; tensor hidden_states_83_pad_0 = const()[name = string("hidden_states_83_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_83_dilations_0 = const()[name = string("hidden_states_83_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_83_groups_0 = const()[name = string("hidden_states_83_groups_0"), val = int32(1)]; tensor var_4081_to_fp16 = const()[name = string("op_4081_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(418517120)))]; tensor hidden_states_83_cast_fp16 = conv(dilations = hidden_states_83_dilations_0, groups = hidden_states_83_groups_0, pad = hidden_states_83_pad_0, pad_type = hidden_states_83_pad_type_0, strides = hidden_states_83_strides_0, weight = var_4081_to_fp16, x = x_249_cast_fp16)[name = string("hidden_states_83_cast_fp16")]; tensor x_251_cast_fp16 = add(x = x_241_cast_fp16, y = hidden_states_83_cast_fp16)[name = string("x_251_cast_fp16")]; int32 var_4112 = const()[name = string("op_4112"), val = int32(1)]; fp16 const_144_promoted_to_fp16 = const()[name = string("const_144_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_4115_cast_fp16 = mul(x = x_251_cast_fp16, y = const_144_promoted_to_fp16)[name = string("op_4115_cast_fp16")]; bool x_253_interleave_0 = const()[name = string("x_253_interleave_0"), val = bool(false)]; tensor x_253_cast_fp16 = concat(axis = var_4112, interleave = x_253_interleave_0, values = (x_251_cast_fp16, var_4115_cast_fp16))[name = string("x_253_cast_fp16")]; tensor out_169_axes_0 = const()[name = string("out_169_axes_0"), val = tensor([1])]; fp16 var_4125_to_fp16 = const()[name = string("op_4125_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_169_cast_fp16 = layer_norm(axes = out_169_axes_0, epsilon = var_4125_to_fp16, x = x_253_cast_fp16)[name = string("out_169_cast_fp16")]; tensor layer_layers_14_input_layernorm_weight_to_fp16 = const()[name = string("layer_layers_14_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(426905792)))]; tensor out_171_cast_fp16 = mul(x = out_169_cast_fp16, y = layer_layers_14_input_layernorm_weight_to_fp16)[name = string("out_171_cast_fp16")]; tensor var_4131_split_sizes_0 = const()[name = string("op_4131_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_4131_axis_0 = const()[name = string("op_4131_axis_0"), val = int32(1)]; tensor var_4131_cast_fp16_0, tensor var_4131_cast_fp16_1 = split(axis = var_4131_axis_0, split_sizes = var_4131_split_sizes_0, x = out_171_cast_fp16)[name = string("op_4131_cast_fp16")]; string query_states_57_pad_type_0 = const()[name = string("query_states_57_pad_type_0"), val = string("valid")]; tensor query_states_57_strides_0 = const()[name = string("query_states_57_strides_0"), val = tensor([1, 1])]; tensor query_states_57_pad_0 = const()[name = string("query_states_57_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_states_57_dilations_0 = const()[name = string("query_states_57_dilations_0"), val = tensor([1, 1])]; int32 query_states_57_groups_0 = const()[name = string("query_states_57_groups_0"), val = int32(1)]; tensor var_4153_to_fp16 = const()[name = string("op_4153_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(426909952)))]; tensor query_states_57_cast_fp16 = conv(dilations = query_states_57_dilations_0, groups = query_states_57_groups_0, pad = query_states_57_pad_0, pad_type = query_states_57_pad_type_0, strides = query_states_57_strides_0, weight = var_4153_to_fp16, x = var_4131_cast_fp16_0)[name = string("query_states_57_cast_fp16")]; string key_states_57_pad_type_0 = const()[name = string("key_states_57_pad_type_0"), val = string("valid")]; tensor key_states_57_strides_0 = const()[name = string("key_states_57_strides_0"), val = tensor([1, 1])]; tensor key_states_57_pad_0 = const()[name = string("key_states_57_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_states_57_dilations_0 = const()[name = string("key_states_57_dilations_0"), val = tensor([1, 1])]; int32 key_states_57_groups_0 = const()[name = string("key_states_57_groups_0"), val = int32(1)]; tensor var_4164_to_fp16 = const()[name = string("op_4164_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(429007168)))]; tensor key_states_57_cast_fp16 = conv(dilations = key_states_57_dilations_0, groups = key_states_57_groups_0, pad = key_states_57_pad_0, pad_type = key_states_57_pad_type_0, strides = key_states_57_strides_0, weight = var_4164_to_fp16, x = var_4131_cast_fp16_0)[name = string("key_states_57_cast_fp16")]; string value_states_57_pad_type_0 = const()[name = string("value_states_57_pad_type_0"), val = string("valid")]; tensor value_states_57_strides_0 = const()[name = string("value_states_57_strides_0"), val = tensor([1, 1])]; tensor value_states_57_pad_0 = const()[name = string("value_states_57_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_states_57_dilations_0 = const()[name = string("value_states_57_dilations_0"), val = tensor([1, 1])]; int32 value_states_57_groups_0 = const()[name = string("value_states_57_groups_0"), val = int32(1)]; tensor var_4175_to_fp16 = const()[name = string("op_4175_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(429269376)))]; tensor value_states_57_cast_fp16 = conv(dilations = value_states_57_dilations_0, groups = value_states_57_groups_0, pad = value_states_57_pad_0, pad_type = value_states_57_pad_type_0, strides = value_states_57_strides_0, weight = var_4175_to_fp16, x = var_4131_cast_fp16_0)[name = string("value_states_57_cast_fp16")]; tensor var_4183 = const()[name = string("op_4183"), val = tensor([1, 16, 64, 64])]; tensor embed_57_cast_fp16 = reshape(shape = var_4183, x = query_states_57_cast_fp16)[name = string("embed_57_cast_fp16")]; tensor var_4187 = const()[name = string("op_4187"), val = tensor([1, 2, 64, 64])]; tensor var_4188_cast_fp16 = reshape(shape = var_4187, x = key_states_57_cast_fp16)[name = string("op_4188_cast_fp16")]; tensor embed_59_perm_0 = const()[name = string("embed_59_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_4194 = const()[name = string("op_4194"), val = tensor([1, 2, 64, 64])]; tensor var_4195_cast_fp16 = reshape(shape = var_4194, x = value_states_57_cast_fp16)[name = string("op_4195_cast_fp16")]; tensor value_states_59_perm_0 = const()[name = string("value_states_59_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_4199_cast_fp16 = mul(x = embed_57_cast_fp16, y = cos_1_cast_fp16)[name = string("op_4199_cast_fp16")]; tensor var_4200_split_sizes_0 = const()[name = string("op_4200_split_sizes_0"), val = tensor([32, 32])]; int32 var_4200_axis_0 = const()[name = string("op_4200_axis_0"), val = int32(-2)]; tensor var_4200_cast_fp16_0, tensor var_4200_cast_fp16_1 = split(axis = var_4200_axis_0, split_sizes = var_4200_split_sizes_0, x = embed_57_cast_fp16)[name = string("op_4200_cast_fp16")]; fp16 const_147_promoted_to_fp16 = const()[name = string("const_147_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_4202_cast_fp16 = mul(x = var_4200_cast_fp16_1, y = const_147_promoted_to_fp16)[name = string("op_4202_cast_fp16")]; int32 var_4204 = const()[name = string("op_4204"), val = int32(-2)]; bool var_4205_interleave_0 = const()[name = string("op_4205_interleave_0"), val = bool(false)]; tensor var_4205_cast_fp16 = concat(axis = var_4204, interleave = var_4205_interleave_0, values = (var_4202_cast_fp16, var_4200_cast_fp16_0))[name = string("op_4205_cast_fp16")]; tensor var_4206_cast_fp16 = mul(x = var_4205_cast_fp16, y = sin_1_cast_fp16)[name = string("op_4206_cast_fp16")]; tensor query_states_59_cast_fp16 = add(x = var_4199_cast_fp16, y = var_4206_cast_fp16)[name = string("query_states_59_cast_fp16")]; tensor embed_59_cast_fp16 = transpose(perm = embed_59_perm_0, x = var_4188_cast_fp16)[name = string("transpose_29")]; tensor var_4209_cast_fp16 = mul(x = embed_59_cast_fp16, y = cos_cast_fp16)[name = string("op_4209_cast_fp16")]; tensor var_4210_split_sizes_0 = const()[name = string("op_4210_split_sizes_0"), val = tensor([32, 32])]; int32 var_4210_axis_0 = const()[name = string("op_4210_axis_0"), val = int32(-1)]; tensor var_4210_cast_fp16_0, tensor var_4210_cast_fp16_1 = split(axis = var_4210_axis_0, split_sizes = var_4210_split_sizes_0, x = embed_59_cast_fp16)[name = string("op_4210_cast_fp16")]; fp16 const_148_promoted_to_fp16 = const()[name = string("const_148_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_4212_cast_fp16 = mul(x = var_4210_cast_fp16_1, y = const_148_promoted_to_fp16)[name = string("op_4212_cast_fp16")]; int32 var_4214 = const()[name = string("op_4214"), val = int32(-1)]; bool var_4215_interleave_0 = const()[name = string("op_4215_interleave_0"), val = bool(false)]; tensor var_4215_cast_fp16 = concat(axis = var_4214, interleave = var_4215_interleave_0, values = (var_4212_cast_fp16, var_4210_cast_fp16_0))[name = string("op_4215_cast_fp16")]; tensor var_4216_cast_fp16 = mul(x = var_4215_cast_fp16, y = sin_cast_fp16)[name = string("op_4216_cast_fp16")]; tensor key_states_59_cast_fp16 = add(x = var_4209_cast_fp16, y = var_4216_cast_fp16)[name = string("key_states_59_cast_fp16")]; tensor expand_dims_141 = const()[name = string("expand_dims_141"), val = tensor([14])]; tensor expand_dims_142 = const()[name = string("expand_dims_142"), val = tensor([0])]; tensor expand_dims_144 = const()[name = string("expand_dims_144"), val = tensor([15])]; tensor concat_115_values3_0 = const()[name = string("concat_115_values3_0"), val = tensor([0])]; int32 concat_115_axis_0 = const()[name = string("concat_115_axis_0"), val = int32(0)]; bool concat_115_interleave_0 = const()[name = string("concat_115_interleave_0"), val = bool(false)]; tensor concat_115 = concat(axis = concat_115_axis_0, interleave = concat_115_interleave_0, values = (expand_dims_141, expand_dims_142, position_id, concat_115_values3_0))[name = string("concat_115")]; tensor concat_116_values1_0 = const()[name = string("concat_116_values1_0"), val = tensor([0])]; tensor concat_116_values3_0 = const()[name = string("concat_116_values3_0"), val = tensor([0])]; int32 concat_116_axis_0 = const()[name = string("concat_116_axis_0"), val = int32(0)]; bool concat_116_interleave_0 = const()[name = string("concat_116_interleave_0"), val = bool(false)]; tensor concat_116 = concat(axis = concat_116_axis_0, interleave = concat_116_interleave_0, values = (expand_dims_144, concat_116_values1_0, var_426, concat_116_values3_0))[name = string("concat_116")]; tensor key_cache_internal_tensor_assign_15_stride_0 = const()[name = string("key_cache_internal_tensor_assign_15_stride_0"), val = tensor([1, 1, 1, 1])]; tensor key_cache_internal_tensor_assign_15_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_15_begin_mask_0"), val = tensor([false, false, false, true])]; tensor key_cache_internal_tensor_assign_15_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_15_end_mask_0"), val = tensor([false, true, false, true])]; tensor key_cache_internal_tensor_assign_15_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_15_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor key_cache_internal_tensor_assign_15_cast_fp16 = slice_update(begin = concat_115, begin_mask = key_cache_internal_tensor_assign_15_begin_mask_0, end = concat_116, end_mask = key_cache_internal_tensor_assign_15_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_15_squeeze_mask_0, stride = key_cache_internal_tensor_assign_15_stride_0, update = key_states_59_cast_fp16, x = coreml_update_state_74)[name = string("key_cache_internal_tensor_assign_15_cast_fp16")]; write_state(data = key_cache_internal_tensor_assign_15_cast_fp16, input = key_cache)[name = string("coreml_update_state_386_write_state")]; tensor coreml_update_state_76 = read_state(input = key_cache)[name = string("coreml_update_state_386")]; tensor value_cache_internal_tensor_assign_15_stride_0 = const()[name = string("value_cache_internal_tensor_assign_15_stride_0"), val = tensor([1, 1, 1, 1])]; tensor value_cache_internal_tensor_assign_15_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_15_begin_mask_0"), val = tensor([false, false, false, true])]; tensor value_cache_internal_tensor_assign_15_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_15_end_mask_0"), val = tensor([false, true, false, true])]; tensor value_cache_internal_tensor_assign_15_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_15_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor value_states_59_cast_fp16 = transpose(perm = value_states_59_perm_0, x = var_4195_cast_fp16)[name = string("transpose_28")]; tensor value_cache_internal_tensor_assign_15_cast_fp16 = slice_update(begin = concat_115, begin_mask = value_cache_internal_tensor_assign_15_begin_mask_0, end = concat_116, end_mask = value_cache_internal_tensor_assign_15_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_15_squeeze_mask_0, stride = value_cache_internal_tensor_assign_15_stride_0, update = value_states_59_cast_fp16, x = coreml_update_state_75)[name = string("value_cache_internal_tensor_assign_15_cast_fp16")]; write_state(data = value_cache_internal_tensor_assign_15_cast_fp16, input = value_cache)[name = string("coreml_update_state_387_write_state")]; tensor coreml_update_state_77 = read_state(input = value_cache)[name = string("coreml_update_state_387")]; tensor var_4259_begin_0 = const()[name = string("op_4259_begin_0"), val = tensor([14, 0, 0, 0])]; tensor var_4259_end_0 = const()[name = string("op_4259_end_0"), val = tensor([15, 2, 2048, 64])]; tensor var_4259_end_mask_0 = const()[name = string("op_4259_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_4259_cast_fp16 = slice_by_index(begin = var_4259_begin_0, end = var_4259_end_0, end_mask = var_4259_end_mask_0, x = coreml_update_state_76)[name = string("op_4259_cast_fp16")]; tensor tile_28 = const()[name = string("tile_28"), val = tensor([1, 1])]; int32 var_4262_axis_0 = const()[name = string("op_4262_axis_0"), val = int32(1)]; tensor var_4262_cast_fp16_0, tensor var_4262_cast_fp16_1 = split(axis = var_4262_axis_0, split_sizes = tile_28, x = var_4259_cast_fp16)[name = string("op_4262_cast_fp16")]; tensor var_4269_begin_0 = const()[name = string("op_4269_begin_0"), val = tensor([14, 0, 0, 0])]; tensor var_4269_end_0 = const()[name = string("op_4269_end_0"), val = tensor([15, 2, 2048, 64])]; tensor var_4269_end_mask_0 = const()[name = string("op_4269_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_4269_cast_fp16 = slice_by_index(begin = var_4269_begin_0, end = var_4269_end_0, end_mask = var_4269_end_mask_0, x = coreml_update_state_77)[name = string("op_4269_cast_fp16")]; tensor tile_29 = const()[name = string("tile_29"), val = tensor([1, 1])]; int32 var_4272_axis_0 = const()[name = string("op_4272_axis_0"), val = int32(1)]; tensor var_4272_cast_fp16_0, tensor var_4272_cast_fp16_1 = split(axis = var_4272_axis_0, split_sizes = tile_29, x = var_4269_cast_fp16)[name = string("op_4272_cast_fp16")]; tensor var_4275_split_sizes_0 = const()[name = string("op_4275_split_sizes_0"), val = tensor([8, 8])]; int32 var_4275_axis_0 = const()[name = string("op_4275_axis_0"), val = int32(1)]; tensor var_4275_cast_fp16_0, tensor var_4275_cast_fp16_1 = split(axis = var_4275_axis_0, split_sizes = var_4275_split_sizes_0, x = query_states_59_cast_fp16)[name = string("op_4275_cast_fp16")]; bool attn_weights_225_transpose_x_0 = const()[name = string("attn_weights_225_transpose_x_0"), val = bool(false)]; bool attn_weights_225_transpose_y_0 = const()[name = string("attn_weights_225_transpose_y_0"), val = bool(false)]; tensor attn_weights_225_cast_fp16 = matmul(transpose_x = attn_weights_225_transpose_x_0, transpose_y = attn_weights_225_transpose_y_0, x = var_4262_cast_fp16_0, y = var_4275_cast_fp16_0)[name = string("attn_weights_225_cast_fp16")]; fp16 _inversed_attn_weights_227_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_227_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_227_cast_fp16 = mul(x = attn_weights_225_cast_fp16, y = _inversed_attn_weights_227_y_0_to_fp16)[name = string("_inversed_attn_weights_227_cast_fp16")]; tensor attn_weights_229_cast_fp16 = add(x = _inversed_attn_weights_227_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_229_cast_fp16")]; int32 var_4282 = const()[name = string("op_4282"), val = int32(2)]; tensor attn_weights_231_cast_fp16 = softmax(axis = var_4282, x = attn_weights_229_cast_fp16)[name = string("attn_weights_231_cast_fp16")]; bool var_4288_transpose_x_1 = const()[name = string("op_4288_transpose_x_1"), val = bool(true)]; bool var_4288_transpose_y_1 = const()[name = string("op_4288_transpose_y_1"), val = bool(false)]; tensor var_4288_cast_fp16 = matmul(transpose_x = var_4288_transpose_x_1, transpose_y = var_4288_transpose_y_1, x = attn_weights_231_cast_fp16, y = var_4272_cast_fp16_0)[name = string("op_4288_cast_fp16")]; bool attn_weights_233_transpose_x_0 = const()[name = string("attn_weights_233_transpose_x_0"), val = bool(false)]; bool attn_weights_233_transpose_y_0 = const()[name = string("attn_weights_233_transpose_y_0"), val = bool(false)]; tensor attn_weights_233_cast_fp16 = matmul(transpose_x = attn_weights_233_transpose_x_0, transpose_y = attn_weights_233_transpose_y_0, x = var_4262_cast_fp16_1, y = var_4275_cast_fp16_1)[name = string("attn_weights_233_cast_fp16")]; fp16 _inversed_attn_weights_235_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_235_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_235_cast_fp16 = mul(x = attn_weights_233_cast_fp16, y = _inversed_attn_weights_235_y_0_to_fp16)[name = string("_inversed_attn_weights_235_cast_fp16")]; tensor attn_weights_237_cast_fp16 = add(x = _inversed_attn_weights_235_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_237_cast_fp16")]; int32 var_4294 = const()[name = string("op_4294"), val = int32(2)]; tensor attn_weights_239_cast_fp16 = softmax(axis = var_4294, x = attn_weights_237_cast_fp16)[name = string("attn_weights_239_cast_fp16")]; bool attn_output_85_transpose_x_1 = const()[name = string("attn_output_85_transpose_x_1"), val = bool(true)]; bool attn_output_85_transpose_y_1 = const()[name = string("attn_output_85_transpose_y_1"), val = bool(false)]; tensor attn_output_85_cast_fp16 = matmul(transpose_x = attn_output_85_transpose_x_1, transpose_y = attn_output_85_transpose_y_1, x = attn_weights_239_cast_fp16, y = var_4272_cast_fp16_1)[name = string("attn_output_85_cast_fp16")]; int32 var_4302 = const()[name = string("op_4302"), val = int32(1)]; bool attn_output_87_interleave_0 = const()[name = string("attn_output_87_interleave_0"), val = bool(false)]; tensor attn_output_87_cast_fp16 = concat(axis = var_4302, interleave = attn_output_87_interleave_0, values = (var_4288_cast_fp16, attn_output_85_cast_fp16))[name = string("attn_output_87_cast_fp16")]; tensor var_4306_perm_0 = const()[name = string("op_4306_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_4311 = const()[name = string("op_4311"), val = tensor([1, 1024, 1, 64])]; tensor var_4306_cast_fp16 = transpose(perm = var_4306_perm_0, x = attn_output_87_cast_fp16)[name = string("transpose_27")]; tensor x_257_cast_fp16 = reshape(shape = var_4311, x = var_4306_cast_fp16)[name = string("x_257_cast_fp16")]; string hidden_states_87_pad_type_0 = const()[name = string("hidden_states_87_pad_type_0"), val = string("valid")]; tensor hidden_states_87_strides_0 = const()[name = string("hidden_states_87_strides_0"), val = tensor([1, 1])]; tensor hidden_states_87_pad_0 = const()[name = string("hidden_states_87_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_87_dilations_0 = const()[name = string("hidden_states_87_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_87_groups_0 = const()[name = string("hidden_states_87_groups_0"), val = int32(1)]; tensor var_4318_to_fp16 = const()[name = string("op_4318_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(429531584)))]; tensor hidden_states_87_cast_fp16 = conv(dilations = hidden_states_87_dilations_0, groups = hidden_states_87_groups_0, pad = hidden_states_87_pad_0, pad_type = hidden_states_87_pad_type_0, strides = hidden_states_87_strides_0, weight = var_4318_to_fp16, x = x_257_cast_fp16)[name = string("hidden_states_87_cast_fp16")]; tensor x_259_cast_fp16 = add(x = x_251_cast_fp16, y = hidden_states_87_cast_fp16)[name = string("x_259_cast_fp16")]; int32 var_4330 = const()[name = string("op_4330"), val = int32(1)]; fp16 const_153_promoted_to_fp16 = const()[name = string("const_153_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_4333_cast_fp16 = mul(x = x_259_cast_fp16, y = const_153_promoted_to_fp16)[name = string("op_4333_cast_fp16")]; bool x_261_interleave_0 = const()[name = string("x_261_interleave_0"), val = bool(false)]; tensor x_261_cast_fp16 = concat(axis = var_4330, interleave = x_261_interleave_0, values = (x_259_cast_fp16, var_4333_cast_fp16))[name = string("x_261_cast_fp16")]; tensor out_175_axes_0 = const()[name = string("out_175_axes_0"), val = tensor([1])]; fp16 var_4343_to_fp16 = const()[name = string("op_4343_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_175_cast_fp16 = layer_norm(axes = out_175_axes_0, epsilon = var_4343_to_fp16, x = x_261_cast_fp16)[name = string("out_175_cast_fp16")]; tensor layer_layers_14_post_attention_layernorm_weight_to_fp16 = const()[name = string("layer_layers_14_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(431628800)))]; tensor out_177_cast_fp16 = mul(x = out_175_cast_fp16, y = layer_layers_14_post_attention_layernorm_weight_to_fp16)[name = string("out_177_cast_fp16")]; tensor var_4349_split_sizes_0 = const()[name = string("op_4349_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_4349_axis_0 = const()[name = string("op_4349_axis_0"), val = int32(1)]; tensor var_4349_cast_fp16_0, tensor var_4349_cast_fp16_1 = split(axis = var_4349_axis_0, split_sizes = var_4349_split_sizes_0, x = out_177_cast_fp16)[name = string("op_4349_cast_fp16")]; string input_29_pad_type_0 = const()[name = string("input_29_pad_type_0"), val = string("valid")]; tensor input_29_strides_0 = const()[name = string("input_29_strides_0"), val = tensor([1, 1])]; tensor input_29_pad_0 = const()[name = string("input_29_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_29_dilations_0 = const()[name = string("input_29_dilations_0"), val = tensor([1, 1])]; int32 input_29_groups_0 = const()[name = string("input_29_groups_0"), val = int32(1)]; tensor var_4354_to_fp16 = const()[name = string("op_4354_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(431632960)))]; tensor input_29_cast_fp16 = conv(dilations = input_29_dilations_0, groups = input_29_groups_0, pad = input_29_pad_0, pad_type = input_29_pad_type_0, strides = input_29_strides_0, weight = var_4354_to_fp16, x = var_4349_cast_fp16_0)[name = string("input_29_cast_fp16")]; tensor var_4365_cast_fp16 = silu(x = input_29_cast_fp16)[name = string("op_4365_cast_fp16")]; string var_4370_pad_type_0 = const()[name = string("op_4370_pad_type_0"), val = string("valid")]; tensor var_4370_strides_0 = const()[name = string("op_4370_strides_0"), val = tensor([1, 1])]; tensor var_4370_pad_0 = const()[name = string("op_4370_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_4370_dilations_0 = const()[name = string("op_4370_dilations_0"), val = tensor([1, 1])]; int32 var_4370_groups_0 = const()[name = string("op_4370_groups_0"), val = int32(1)]; tensor var_4353_to_fp16 = const()[name = string("op_4353_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(440021632)))]; tensor var_4370_cast_fp16 = conv(dilations = var_4370_dilations_0, groups = var_4370_groups_0, pad = var_4370_pad_0, pad_type = var_4370_pad_type_0, strides = var_4370_strides_0, weight = var_4353_to_fp16, x = var_4349_cast_fp16_0)[name = string("op_4370_cast_fp16")]; tensor x_267_cast_fp16 = mul(x = var_4365_cast_fp16, y = var_4370_cast_fp16)[name = string("x_267_cast_fp16")]; string hidden_states_89_pad_type_0 = const()[name = string("hidden_states_89_pad_type_0"), val = string("valid")]; tensor hidden_states_89_strides_0 = const()[name = string("hidden_states_89_strides_0"), val = tensor([1, 1])]; tensor hidden_states_89_pad_0 = const()[name = string("hidden_states_89_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_89_dilations_0 = const()[name = string("hidden_states_89_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_89_groups_0 = const()[name = string("hidden_states_89_groups_0"), val = int32(1)]; tensor var_4352_to_fp16 = const()[name = string("op_4352_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(448410304)))]; tensor hidden_states_89_cast_fp16 = conv(dilations = hidden_states_89_dilations_0, groups = hidden_states_89_groups_0, pad = hidden_states_89_pad_0, pad_type = hidden_states_89_pad_type_0, strides = hidden_states_89_strides_0, weight = var_4352_to_fp16, x = x_267_cast_fp16)[name = string("hidden_states_89_cast_fp16")]; tensor x_269_cast_fp16 = add(x = x_259_cast_fp16, y = hidden_states_89_cast_fp16)[name = string("x_269_cast_fp16")]; int32 var_4383 = const()[name = string("op_4383"), val = int32(1)]; fp16 const_154_promoted_to_fp16 = const()[name = string("const_154_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_4386_cast_fp16 = mul(x = x_269_cast_fp16, y = const_154_promoted_to_fp16)[name = string("op_4386_cast_fp16")]; bool x_271_interleave_0 = const()[name = string("x_271_interleave_0"), val = bool(false)]; tensor x_271_cast_fp16 = concat(axis = var_4383, interleave = x_271_interleave_0, values = (x_269_cast_fp16, var_4386_cast_fp16))[name = string("x_271_cast_fp16")]; tensor out_181_axes_0 = const()[name = string("out_181_axes_0"), val = tensor([1])]; fp16 var_4396_to_fp16 = const()[name = string("op_4396_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_181_cast_fp16 = layer_norm(axes = out_181_axes_0, epsilon = var_4396_to_fp16, x = x_271_cast_fp16)[name = string("out_181_cast_fp16")]; tensor layer_layers_15_input_layernorm_weight_to_fp16 = const()[name = string("layer_layers_15_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(456798976)))]; tensor out_183_cast_fp16 = mul(x = out_181_cast_fp16, y = layer_layers_15_input_layernorm_weight_to_fp16)[name = string("out_183_cast_fp16")]; tensor var_4402_split_sizes_0 = const()[name = string("op_4402_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_4402_axis_0 = const()[name = string("op_4402_axis_0"), val = int32(1)]; tensor var_4402_cast_fp16_0, tensor var_4402_cast_fp16_1 = split(axis = var_4402_axis_0, split_sizes = var_4402_split_sizes_0, x = out_183_cast_fp16)[name = string("op_4402_cast_fp16")]; string query_states_61_pad_type_0 = const()[name = string("query_states_61_pad_type_0"), val = string("valid")]; tensor query_states_61_strides_0 = const()[name = string("query_states_61_strides_0"), val = tensor([1, 1])]; tensor query_states_61_pad_0 = const()[name = string("query_states_61_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_states_61_dilations_0 = const()[name = string("query_states_61_dilations_0"), val = tensor([1, 1])]; int32 query_states_61_groups_0 = const()[name = string("query_states_61_groups_0"), val = int32(1)]; tensor var_4424_to_fp16 = const()[name = string("op_4424_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(456803136)))]; tensor query_states_61_cast_fp16 = conv(dilations = query_states_61_dilations_0, groups = query_states_61_groups_0, pad = query_states_61_pad_0, pad_type = query_states_61_pad_type_0, strides = query_states_61_strides_0, weight = var_4424_to_fp16, x = var_4402_cast_fp16_0)[name = string("query_states_61_cast_fp16")]; string key_states_61_pad_type_0 = const()[name = string("key_states_61_pad_type_0"), val = string("valid")]; tensor key_states_61_strides_0 = const()[name = string("key_states_61_strides_0"), val = tensor([1, 1])]; tensor key_states_61_pad_0 = const()[name = string("key_states_61_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_states_61_dilations_0 = const()[name = string("key_states_61_dilations_0"), val = tensor([1, 1])]; int32 key_states_61_groups_0 = const()[name = string("key_states_61_groups_0"), val = int32(1)]; tensor var_4435_to_fp16 = const()[name = string("op_4435_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(458900352)))]; tensor key_states_61_cast_fp16 = conv(dilations = key_states_61_dilations_0, groups = key_states_61_groups_0, pad = key_states_61_pad_0, pad_type = key_states_61_pad_type_0, strides = key_states_61_strides_0, weight = var_4435_to_fp16, x = var_4402_cast_fp16_0)[name = string("key_states_61_cast_fp16")]; string value_states_61_pad_type_0 = const()[name = string("value_states_61_pad_type_0"), val = string("valid")]; tensor value_states_61_strides_0 = const()[name = string("value_states_61_strides_0"), val = tensor([1, 1])]; tensor value_states_61_pad_0 = const()[name = string("value_states_61_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_states_61_dilations_0 = const()[name = string("value_states_61_dilations_0"), val = tensor([1, 1])]; int32 value_states_61_groups_0 = const()[name = string("value_states_61_groups_0"), val = int32(1)]; tensor var_4446_to_fp16 = const()[name = string("op_4446_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(459162560)))]; tensor value_states_61_cast_fp16 = conv(dilations = value_states_61_dilations_0, groups = value_states_61_groups_0, pad = value_states_61_pad_0, pad_type = value_states_61_pad_type_0, strides = value_states_61_strides_0, weight = var_4446_to_fp16, x = var_4402_cast_fp16_0)[name = string("value_states_61_cast_fp16")]; tensor var_4454 = const()[name = string("op_4454"), val = tensor([1, 16, 64, 64])]; tensor embed_61_cast_fp16 = reshape(shape = var_4454, x = query_states_61_cast_fp16)[name = string("embed_61_cast_fp16")]; tensor var_4458 = const()[name = string("op_4458"), val = tensor([1, 2, 64, 64])]; tensor var_4459_cast_fp16 = reshape(shape = var_4458, x = key_states_61_cast_fp16)[name = string("op_4459_cast_fp16")]; tensor embed_63_perm_0 = const()[name = string("embed_63_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_4465 = const()[name = string("op_4465"), val = tensor([1, 2, 64, 64])]; tensor var_4466_cast_fp16 = reshape(shape = var_4465, x = value_states_61_cast_fp16)[name = string("op_4466_cast_fp16")]; tensor value_states_63_perm_0 = const()[name = string("value_states_63_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_4470_cast_fp16 = mul(x = embed_61_cast_fp16, y = cos_1_cast_fp16)[name = string("op_4470_cast_fp16")]; tensor var_4471_split_sizes_0 = const()[name = string("op_4471_split_sizes_0"), val = tensor([32, 32])]; int32 var_4471_axis_0 = const()[name = string("op_4471_axis_0"), val = int32(-2)]; tensor var_4471_cast_fp16_0, tensor var_4471_cast_fp16_1 = split(axis = var_4471_axis_0, split_sizes = var_4471_split_sizes_0, x = embed_61_cast_fp16)[name = string("op_4471_cast_fp16")]; fp16 const_157_promoted_to_fp16 = const()[name = string("const_157_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_4473_cast_fp16 = mul(x = var_4471_cast_fp16_1, y = const_157_promoted_to_fp16)[name = string("op_4473_cast_fp16")]; int32 var_4475 = const()[name = string("op_4475"), val = int32(-2)]; bool var_4476_interleave_0 = const()[name = string("op_4476_interleave_0"), val = bool(false)]; tensor var_4476_cast_fp16 = concat(axis = var_4475, interleave = var_4476_interleave_0, values = (var_4473_cast_fp16, var_4471_cast_fp16_0))[name = string("op_4476_cast_fp16")]; tensor var_4477_cast_fp16 = mul(x = var_4476_cast_fp16, y = sin_1_cast_fp16)[name = string("op_4477_cast_fp16")]; tensor query_states_63_cast_fp16 = add(x = var_4470_cast_fp16, y = var_4477_cast_fp16)[name = string("query_states_63_cast_fp16")]; tensor embed_63_cast_fp16 = transpose(perm = embed_63_perm_0, x = var_4459_cast_fp16)[name = string("transpose_26")]; tensor var_4480_cast_fp16 = mul(x = embed_63_cast_fp16, y = cos_cast_fp16)[name = string("op_4480_cast_fp16")]; tensor var_4481_split_sizes_0 = const()[name = string("op_4481_split_sizes_0"), val = tensor([32, 32])]; int32 var_4481_axis_0 = const()[name = string("op_4481_axis_0"), val = int32(-1)]; tensor var_4481_cast_fp16_0, tensor var_4481_cast_fp16_1 = split(axis = var_4481_axis_0, split_sizes = var_4481_split_sizes_0, x = embed_63_cast_fp16)[name = string("op_4481_cast_fp16")]; fp16 const_158_promoted_to_fp16 = const()[name = string("const_158_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_4483_cast_fp16 = mul(x = var_4481_cast_fp16_1, y = const_158_promoted_to_fp16)[name = string("op_4483_cast_fp16")]; int32 var_4485 = const()[name = string("op_4485"), val = int32(-1)]; bool var_4486_interleave_0 = const()[name = string("op_4486_interleave_0"), val = bool(false)]; tensor var_4486_cast_fp16 = concat(axis = var_4485, interleave = var_4486_interleave_0, values = (var_4483_cast_fp16, var_4481_cast_fp16_0))[name = string("op_4486_cast_fp16")]; tensor var_4487_cast_fp16 = mul(x = var_4486_cast_fp16, y = sin_cast_fp16)[name = string("op_4487_cast_fp16")]; tensor key_states_63_cast_fp16 = add(x = var_4480_cast_fp16, y = var_4487_cast_fp16)[name = string("key_states_63_cast_fp16")]; tensor expand_dims_151 = const()[name = string("expand_dims_151"), val = tensor([15])]; tensor expand_dims_152 = const()[name = string("expand_dims_152"), val = tensor([0])]; tensor expand_dims_154 = const()[name = string("expand_dims_154"), val = tensor([16])]; tensor concat_123_values3_0 = const()[name = string("concat_123_values3_0"), val = tensor([0])]; int32 concat_123_axis_0 = const()[name = string("concat_123_axis_0"), val = int32(0)]; bool concat_123_interleave_0 = const()[name = string("concat_123_interleave_0"), val = bool(false)]; tensor concat_123 = concat(axis = concat_123_axis_0, interleave = concat_123_interleave_0, values = (expand_dims_151, expand_dims_152, position_id, concat_123_values3_0))[name = string("concat_123")]; tensor concat_124_values1_0 = const()[name = string("concat_124_values1_0"), val = tensor([0])]; tensor concat_124_values3_0 = const()[name = string("concat_124_values3_0"), val = tensor([0])]; int32 concat_124_axis_0 = const()[name = string("concat_124_axis_0"), val = int32(0)]; bool concat_124_interleave_0 = const()[name = string("concat_124_interleave_0"), val = bool(false)]; tensor concat_124 = concat(axis = concat_124_axis_0, interleave = concat_124_interleave_0, values = (expand_dims_154, concat_124_values1_0, var_426, concat_124_values3_0))[name = string("concat_124")]; tensor key_cache_internal_tensor_assign_16_stride_0 = const()[name = string("key_cache_internal_tensor_assign_16_stride_0"), val = tensor([1, 1, 1, 1])]; tensor key_cache_internal_tensor_assign_16_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_16_begin_mask_0"), val = tensor([false, false, false, true])]; tensor key_cache_internal_tensor_assign_16_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_16_end_mask_0"), val = tensor([false, true, false, true])]; tensor key_cache_internal_tensor_assign_16_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_16_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor key_cache_internal_tensor_assign_16_cast_fp16 = slice_update(begin = concat_123, begin_mask = key_cache_internal_tensor_assign_16_begin_mask_0, end = concat_124, end_mask = key_cache_internal_tensor_assign_16_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_16_squeeze_mask_0, stride = key_cache_internal_tensor_assign_16_stride_0, update = key_states_63_cast_fp16, x = coreml_update_state_76)[name = string("key_cache_internal_tensor_assign_16_cast_fp16")]; write_state(data = key_cache_internal_tensor_assign_16_cast_fp16, input = key_cache)[name = string("coreml_update_state_388_write_state")]; tensor coreml_update_state_78 = read_state(input = key_cache)[name = string("coreml_update_state_388")]; tensor value_cache_internal_tensor_assign_16_stride_0 = const()[name = string("value_cache_internal_tensor_assign_16_stride_0"), val = tensor([1, 1, 1, 1])]; tensor value_cache_internal_tensor_assign_16_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_16_begin_mask_0"), val = tensor([false, false, false, true])]; tensor value_cache_internal_tensor_assign_16_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_16_end_mask_0"), val = tensor([false, true, false, true])]; tensor value_cache_internal_tensor_assign_16_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_16_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor value_states_63_cast_fp16 = transpose(perm = value_states_63_perm_0, x = var_4466_cast_fp16)[name = string("transpose_25")]; tensor value_cache_internal_tensor_assign_16_cast_fp16 = slice_update(begin = concat_123, begin_mask = value_cache_internal_tensor_assign_16_begin_mask_0, end = concat_124, end_mask = value_cache_internal_tensor_assign_16_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_16_squeeze_mask_0, stride = value_cache_internal_tensor_assign_16_stride_0, update = value_states_63_cast_fp16, x = coreml_update_state_77)[name = string("value_cache_internal_tensor_assign_16_cast_fp16")]; write_state(data = value_cache_internal_tensor_assign_16_cast_fp16, input = value_cache)[name = string("coreml_update_state_389_write_state")]; tensor coreml_update_state_79 = read_state(input = value_cache)[name = string("coreml_update_state_389")]; tensor var_4530_begin_0 = const()[name = string("op_4530_begin_0"), val = tensor([15, 0, 0, 0])]; tensor var_4530_end_0 = const()[name = string("op_4530_end_0"), val = tensor([16, 2, 2048, 64])]; tensor var_4530_end_mask_0 = const()[name = string("op_4530_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_4530_cast_fp16 = slice_by_index(begin = var_4530_begin_0, end = var_4530_end_0, end_mask = var_4530_end_mask_0, x = coreml_update_state_78)[name = string("op_4530_cast_fp16")]; tensor tile_30 = const()[name = string("tile_30"), val = tensor([1, 1])]; int32 var_4533_axis_0 = const()[name = string("op_4533_axis_0"), val = int32(1)]; tensor var_4533_cast_fp16_0, tensor var_4533_cast_fp16_1 = split(axis = var_4533_axis_0, split_sizes = tile_30, x = var_4530_cast_fp16)[name = string("op_4533_cast_fp16")]; tensor var_4540_begin_0 = const()[name = string("op_4540_begin_0"), val = tensor([15, 0, 0, 0])]; tensor var_4540_end_0 = const()[name = string("op_4540_end_0"), val = tensor([16, 2, 2048, 64])]; tensor var_4540_end_mask_0 = const()[name = string("op_4540_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_4540_cast_fp16 = slice_by_index(begin = var_4540_begin_0, end = var_4540_end_0, end_mask = var_4540_end_mask_0, x = coreml_update_state_79)[name = string("op_4540_cast_fp16")]; tensor tile_31 = const()[name = string("tile_31"), val = tensor([1, 1])]; int32 var_4543_axis_0 = const()[name = string("op_4543_axis_0"), val = int32(1)]; tensor var_4543_cast_fp16_0, tensor var_4543_cast_fp16_1 = split(axis = var_4543_axis_0, split_sizes = tile_31, x = var_4540_cast_fp16)[name = string("op_4543_cast_fp16")]; tensor var_4546_split_sizes_0 = const()[name = string("op_4546_split_sizes_0"), val = tensor([8, 8])]; int32 var_4546_axis_0 = const()[name = string("op_4546_axis_0"), val = int32(1)]; tensor var_4546_cast_fp16_0, tensor var_4546_cast_fp16_1 = split(axis = var_4546_axis_0, split_sizes = var_4546_split_sizes_0, x = query_states_63_cast_fp16)[name = string("op_4546_cast_fp16")]; bool attn_weights_241_transpose_x_0 = const()[name = string("attn_weights_241_transpose_x_0"), val = bool(false)]; bool attn_weights_241_transpose_y_0 = const()[name = string("attn_weights_241_transpose_y_0"), val = bool(false)]; tensor attn_weights_241_cast_fp16 = matmul(transpose_x = attn_weights_241_transpose_x_0, transpose_y = attn_weights_241_transpose_y_0, x = var_4533_cast_fp16_0, y = var_4546_cast_fp16_0)[name = string("attn_weights_241_cast_fp16")]; fp16 _inversed_attn_weights_243_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_243_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_243_cast_fp16 = mul(x = attn_weights_241_cast_fp16, y = _inversed_attn_weights_243_y_0_to_fp16)[name = string("_inversed_attn_weights_243_cast_fp16")]; tensor attn_weights_245_cast_fp16 = add(x = _inversed_attn_weights_243_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_245_cast_fp16")]; int32 var_4553 = const()[name = string("op_4553"), val = int32(2)]; tensor attn_weights_247_cast_fp16 = softmax(axis = var_4553, x = attn_weights_245_cast_fp16)[name = string("attn_weights_247_cast_fp16")]; bool var_4559_transpose_x_1 = const()[name = string("op_4559_transpose_x_1"), val = bool(true)]; bool var_4559_transpose_y_1 = const()[name = string("op_4559_transpose_y_1"), val = bool(false)]; tensor var_4559_cast_fp16 = matmul(transpose_x = var_4559_transpose_x_1, transpose_y = var_4559_transpose_y_1, x = attn_weights_247_cast_fp16, y = var_4543_cast_fp16_0)[name = string("op_4559_cast_fp16")]; bool attn_weights_249_transpose_x_0 = const()[name = string("attn_weights_249_transpose_x_0"), val = bool(false)]; bool attn_weights_249_transpose_y_0 = const()[name = string("attn_weights_249_transpose_y_0"), val = bool(false)]; tensor attn_weights_249_cast_fp16 = matmul(transpose_x = attn_weights_249_transpose_x_0, transpose_y = attn_weights_249_transpose_y_0, x = var_4533_cast_fp16_1, y = var_4546_cast_fp16_1)[name = string("attn_weights_249_cast_fp16")]; fp16 _inversed_attn_weights_251_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_251_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_251_cast_fp16 = mul(x = attn_weights_249_cast_fp16, y = _inversed_attn_weights_251_y_0_to_fp16)[name = string("_inversed_attn_weights_251_cast_fp16")]; tensor attn_weights_253_cast_fp16 = add(x = _inversed_attn_weights_251_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_253_cast_fp16")]; int32 var_4565 = const()[name = string("op_4565"), val = int32(2)]; tensor attn_weights_255_cast_fp16 = softmax(axis = var_4565, x = attn_weights_253_cast_fp16)[name = string("attn_weights_255_cast_fp16")]; bool attn_output_91_transpose_x_1 = const()[name = string("attn_output_91_transpose_x_1"), val = bool(true)]; bool attn_output_91_transpose_y_1 = const()[name = string("attn_output_91_transpose_y_1"), val = bool(false)]; tensor attn_output_91_cast_fp16 = matmul(transpose_x = attn_output_91_transpose_x_1, transpose_y = attn_output_91_transpose_y_1, x = attn_weights_255_cast_fp16, y = var_4543_cast_fp16_1)[name = string("attn_output_91_cast_fp16")]; int32 var_4573 = const()[name = string("op_4573"), val = int32(1)]; bool attn_output_93_interleave_0 = const()[name = string("attn_output_93_interleave_0"), val = bool(false)]; tensor attn_output_93_cast_fp16 = concat(axis = var_4573, interleave = attn_output_93_interleave_0, values = (var_4559_cast_fp16, attn_output_91_cast_fp16))[name = string("attn_output_93_cast_fp16")]; tensor var_4577_perm_0 = const()[name = string("op_4577_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_4582 = const()[name = string("op_4582"), val = tensor([1, 1024, 1, 64])]; tensor var_4577_cast_fp16 = transpose(perm = var_4577_perm_0, x = attn_output_93_cast_fp16)[name = string("transpose_24")]; tensor x_275_cast_fp16 = reshape(shape = var_4582, x = var_4577_cast_fp16)[name = string("x_275_cast_fp16")]; string hidden_states_93_pad_type_0 = const()[name = string("hidden_states_93_pad_type_0"), val = string("valid")]; tensor hidden_states_93_strides_0 = const()[name = string("hidden_states_93_strides_0"), val = tensor([1, 1])]; tensor hidden_states_93_pad_0 = const()[name = string("hidden_states_93_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_93_dilations_0 = const()[name = string("hidden_states_93_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_93_groups_0 = const()[name = string("hidden_states_93_groups_0"), val = int32(1)]; tensor var_4589_to_fp16 = const()[name = string("op_4589_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(459424768)))]; tensor hidden_states_93_cast_fp16 = conv(dilations = hidden_states_93_dilations_0, groups = hidden_states_93_groups_0, pad = hidden_states_93_pad_0, pad_type = hidden_states_93_pad_type_0, strides = hidden_states_93_strides_0, weight = var_4589_to_fp16, x = x_275_cast_fp16)[name = string("hidden_states_93_cast_fp16")]; tensor x_277_cast_fp16 = add(x = x_269_cast_fp16, y = hidden_states_93_cast_fp16)[name = string("x_277_cast_fp16")]; int32 var_4601 = const()[name = string("op_4601"), val = int32(1)]; fp16 const_163_promoted_to_fp16 = const()[name = string("const_163_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_4604_cast_fp16 = mul(x = x_277_cast_fp16, y = const_163_promoted_to_fp16)[name = string("op_4604_cast_fp16")]; bool x_279_interleave_0 = const()[name = string("x_279_interleave_0"), val = bool(false)]; tensor x_279_cast_fp16 = concat(axis = var_4601, interleave = x_279_interleave_0, values = (x_277_cast_fp16, var_4604_cast_fp16))[name = string("x_279_cast_fp16")]; tensor out_187_axes_0 = const()[name = string("out_187_axes_0"), val = tensor([1])]; fp16 var_4614_to_fp16 = const()[name = string("op_4614_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_187_cast_fp16 = layer_norm(axes = out_187_axes_0, epsilon = var_4614_to_fp16, x = x_279_cast_fp16)[name = string("out_187_cast_fp16")]; tensor layer_layers_15_post_attention_layernorm_weight_to_fp16 = const()[name = string("layer_layers_15_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(461521984)))]; tensor out_189_cast_fp16 = mul(x = out_187_cast_fp16, y = layer_layers_15_post_attention_layernorm_weight_to_fp16)[name = string("out_189_cast_fp16")]; tensor var_4620_split_sizes_0 = const()[name = string("op_4620_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_4620_axis_0 = const()[name = string("op_4620_axis_0"), val = int32(1)]; tensor var_4620_cast_fp16_0, tensor var_4620_cast_fp16_1 = split(axis = var_4620_axis_0, split_sizes = var_4620_split_sizes_0, x = out_189_cast_fp16)[name = string("op_4620_cast_fp16")]; string input_31_pad_type_0 = const()[name = string("input_31_pad_type_0"), val = string("valid")]; tensor input_31_strides_0 = const()[name = string("input_31_strides_0"), val = tensor([1, 1])]; tensor input_31_pad_0 = const()[name = string("input_31_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_31_dilations_0 = const()[name = string("input_31_dilations_0"), val = tensor([1, 1])]; int32 input_31_groups_0 = const()[name = string("input_31_groups_0"), val = int32(1)]; tensor var_4625_to_fp16 = const()[name = string("op_4625_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(461526144)))]; tensor input_31_cast_fp16 = conv(dilations = input_31_dilations_0, groups = input_31_groups_0, pad = input_31_pad_0, pad_type = input_31_pad_type_0, strides = input_31_strides_0, weight = var_4625_to_fp16, x = var_4620_cast_fp16_0)[name = string("input_31_cast_fp16")]; tensor var_4636_cast_fp16 = silu(x = input_31_cast_fp16)[name = string("op_4636_cast_fp16")]; string var_4641_pad_type_0 = const()[name = string("op_4641_pad_type_0"), val = string("valid")]; tensor var_4641_strides_0 = const()[name = string("op_4641_strides_0"), val = tensor([1, 1])]; tensor var_4641_pad_0 = const()[name = string("op_4641_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_4641_dilations_0 = const()[name = string("op_4641_dilations_0"), val = tensor([1, 1])]; int32 var_4641_groups_0 = const()[name = string("op_4641_groups_0"), val = int32(1)]; tensor var_4624_to_fp16 = const()[name = string("op_4624_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(469914816)))]; tensor var_4641_cast_fp16 = conv(dilations = var_4641_dilations_0, groups = var_4641_groups_0, pad = var_4641_pad_0, pad_type = var_4641_pad_type_0, strides = var_4641_strides_0, weight = var_4624_to_fp16, x = var_4620_cast_fp16_0)[name = string("op_4641_cast_fp16")]; tensor x_285_cast_fp16 = mul(x = var_4636_cast_fp16, y = var_4641_cast_fp16)[name = string("x_285_cast_fp16")]; string hidden_states_95_pad_type_0 = const()[name = string("hidden_states_95_pad_type_0"), val = string("valid")]; tensor hidden_states_95_strides_0 = const()[name = string("hidden_states_95_strides_0"), val = tensor([1, 1])]; tensor hidden_states_95_pad_0 = const()[name = string("hidden_states_95_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_95_dilations_0 = const()[name = string("hidden_states_95_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_95_groups_0 = const()[name = string("hidden_states_95_groups_0"), val = int32(1)]; tensor var_4623_to_fp16 = const()[name = string("op_4623_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(478303488)))]; tensor hidden_states_95_cast_fp16 = conv(dilations = hidden_states_95_dilations_0, groups = hidden_states_95_groups_0, pad = hidden_states_95_pad_0, pad_type = hidden_states_95_pad_type_0, strides = hidden_states_95_strides_0, weight = var_4623_to_fp16, x = x_285_cast_fp16)[name = string("hidden_states_95_cast_fp16")]; tensor x_287_cast_fp16 = add(x = x_277_cast_fp16, y = hidden_states_95_cast_fp16)[name = string("x_287_cast_fp16")]; int32 var_4654 = const()[name = string("op_4654"), val = int32(1)]; fp16 const_164_promoted_to_fp16 = const()[name = string("const_164_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_4657_cast_fp16 = mul(x = x_287_cast_fp16, y = const_164_promoted_to_fp16)[name = string("op_4657_cast_fp16")]; bool x_289_interleave_0 = const()[name = string("x_289_interleave_0"), val = bool(false)]; tensor x_289_cast_fp16 = concat(axis = var_4654, interleave = x_289_interleave_0, values = (x_287_cast_fp16, var_4657_cast_fp16))[name = string("x_289_cast_fp16")]; tensor out_193_axes_0 = const()[name = string("out_193_axes_0"), val = tensor([1])]; fp16 var_4667_to_fp16 = const()[name = string("op_4667_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_193_cast_fp16 = layer_norm(axes = out_193_axes_0, epsilon = var_4667_to_fp16, x = x_289_cast_fp16)[name = string("out_193_cast_fp16")]; tensor layer_layers_16_input_layernorm_weight_to_fp16 = const()[name = string("layer_layers_16_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(486692160)))]; tensor out_195_cast_fp16 = mul(x = out_193_cast_fp16, y = layer_layers_16_input_layernorm_weight_to_fp16)[name = string("out_195_cast_fp16")]; tensor var_4673_split_sizes_0 = const()[name = string("op_4673_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_4673_axis_0 = const()[name = string("op_4673_axis_0"), val = int32(1)]; tensor var_4673_cast_fp16_0, tensor var_4673_cast_fp16_1 = split(axis = var_4673_axis_0, split_sizes = var_4673_split_sizes_0, x = out_195_cast_fp16)[name = string("op_4673_cast_fp16")]; string query_states_65_pad_type_0 = const()[name = string("query_states_65_pad_type_0"), val = string("valid")]; tensor query_states_65_strides_0 = const()[name = string("query_states_65_strides_0"), val = tensor([1, 1])]; tensor query_states_65_pad_0 = const()[name = string("query_states_65_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_states_65_dilations_0 = const()[name = string("query_states_65_dilations_0"), val = tensor([1, 1])]; int32 query_states_65_groups_0 = const()[name = string("query_states_65_groups_0"), val = int32(1)]; tensor var_4695_to_fp16 = const()[name = string("op_4695_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(486696320)))]; tensor query_states_65_cast_fp16 = conv(dilations = query_states_65_dilations_0, groups = query_states_65_groups_0, pad = query_states_65_pad_0, pad_type = query_states_65_pad_type_0, strides = query_states_65_strides_0, weight = var_4695_to_fp16, x = var_4673_cast_fp16_0)[name = string("query_states_65_cast_fp16")]; string key_states_65_pad_type_0 = const()[name = string("key_states_65_pad_type_0"), val = string("valid")]; tensor key_states_65_strides_0 = const()[name = string("key_states_65_strides_0"), val = tensor([1, 1])]; tensor key_states_65_pad_0 = const()[name = string("key_states_65_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_states_65_dilations_0 = const()[name = string("key_states_65_dilations_0"), val = tensor([1, 1])]; int32 key_states_65_groups_0 = const()[name = string("key_states_65_groups_0"), val = int32(1)]; tensor var_4706_to_fp16 = const()[name = string("op_4706_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(488793536)))]; tensor key_states_65_cast_fp16 = conv(dilations = key_states_65_dilations_0, groups = key_states_65_groups_0, pad = key_states_65_pad_0, pad_type = key_states_65_pad_type_0, strides = key_states_65_strides_0, weight = var_4706_to_fp16, x = var_4673_cast_fp16_0)[name = string("key_states_65_cast_fp16")]; string value_states_65_pad_type_0 = const()[name = string("value_states_65_pad_type_0"), val = string("valid")]; tensor value_states_65_strides_0 = const()[name = string("value_states_65_strides_0"), val = tensor([1, 1])]; tensor value_states_65_pad_0 = const()[name = string("value_states_65_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_states_65_dilations_0 = const()[name = string("value_states_65_dilations_0"), val = tensor([1, 1])]; int32 value_states_65_groups_0 = const()[name = string("value_states_65_groups_0"), val = int32(1)]; tensor var_4717_to_fp16 = const()[name = string("op_4717_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(489055744)))]; tensor value_states_65_cast_fp16 = conv(dilations = value_states_65_dilations_0, groups = value_states_65_groups_0, pad = value_states_65_pad_0, pad_type = value_states_65_pad_type_0, strides = value_states_65_strides_0, weight = var_4717_to_fp16, x = var_4673_cast_fp16_0)[name = string("value_states_65_cast_fp16")]; tensor var_4725 = const()[name = string("op_4725"), val = tensor([1, 16, 64, 64])]; tensor embed_65_cast_fp16 = reshape(shape = var_4725, x = query_states_65_cast_fp16)[name = string("embed_65_cast_fp16")]; tensor var_4729 = const()[name = string("op_4729"), val = tensor([1, 2, 64, 64])]; tensor var_4730_cast_fp16 = reshape(shape = var_4729, x = key_states_65_cast_fp16)[name = string("op_4730_cast_fp16")]; tensor embed_67_perm_0 = const()[name = string("embed_67_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_4736 = const()[name = string("op_4736"), val = tensor([1, 2, 64, 64])]; tensor var_4737_cast_fp16 = reshape(shape = var_4736, x = value_states_65_cast_fp16)[name = string("op_4737_cast_fp16")]; tensor value_states_67_perm_0 = const()[name = string("value_states_67_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_4741_cast_fp16 = mul(x = embed_65_cast_fp16, y = cos_1_cast_fp16)[name = string("op_4741_cast_fp16")]; tensor var_4742_split_sizes_0 = const()[name = string("op_4742_split_sizes_0"), val = tensor([32, 32])]; int32 var_4742_axis_0 = const()[name = string("op_4742_axis_0"), val = int32(-2)]; tensor var_4742_cast_fp16_0, tensor var_4742_cast_fp16_1 = split(axis = var_4742_axis_0, split_sizes = var_4742_split_sizes_0, x = embed_65_cast_fp16)[name = string("op_4742_cast_fp16")]; fp16 const_167_promoted_to_fp16 = const()[name = string("const_167_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_4744_cast_fp16 = mul(x = var_4742_cast_fp16_1, y = const_167_promoted_to_fp16)[name = string("op_4744_cast_fp16")]; int32 var_4746 = const()[name = string("op_4746"), val = int32(-2)]; bool var_4747_interleave_0 = const()[name = string("op_4747_interleave_0"), val = bool(false)]; tensor var_4747_cast_fp16 = concat(axis = var_4746, interleave = var_4747_interleave_0, values = (var_4744_cast_fp16, var_4742_cast_fp16_0))[name = string("op_4747_cast_fp16")]; tensor var_4748_cast_fp16 = mul(x = var_4747_cast_fp16, y = sin_1_cast_fp16)[name = string("op_4748_cast_fp16")]; tensor query_states_67_cast_fp16 = add(x = var_4741_cast_fp16, y = var_4748_cast_fp16)[name = string("query_states_67_cast_fp16")]; tensor embed_67_cast_fp16 = transpose(perm = embed_67_perm_0, x = var_4730_cast_fp16)[name = string("transpose_23")]; tensor var_4751_cast_fp16 = mul(x = embed_67_cast_fp16, y = cos_cast_fp16)[name = string("op_4751_cast_fp16")]; tensor var_4752_split_sizes_0 = const()[name = string("op_4752_split_sizes_0"), val = tensor([32, 32])]; int32 var_4752_axis_0 = const()[name = string("op_4752_axis_0"), val = int32(-1)]; tensor var_4752_cast_fp16_0, tensor var_4752_cast_fp16_1 = split(axis = var_4752_axis_0, split_sizes = var_4752_split_sizes_0, x = embed_67_cast_fp16)[name = string("op_4752_cast_fp16")]; fp16 const_168_promoted_to_fp16 = const()[name = string("const_168_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_4754_cast_fp16 = mul(x = var_4752_cast_fp16_1, y = const_168_promoted_to_fp16)[name = string("op_4754_cast_fp16")]; int32 var_4756 = const()[name = string("op_4756"), val = int32(-1)]; bool var_4757_interleave_0 = const()[name = string("op_4757_interleave_0"), val = bool(false)]; tensor var_4757_cast_fp16 = concat(axis = var_4756, interleave = var_4757_interleave_0, values = (var_4754_cast_fp16, var_4752_cast_fp16_0))[name = string("op_4757_cast_fp16")]; tensor var_4758_cast_fp16 = mul(x = var_4757_cast_fp16, y = sin_cast_fp16)[name = string("op_4758_cast_fp16")]; tensor key_states_67_cast_fp16 = add(x = var_4751_cast_fp16, y = var_4758_cast_fp16)[name = string("key_states_67_cast_fp16")]; tensor expand_dims_161 = const()[name = string("expand_dims_161"), val = tensor([16])]; tensor expand_dims_162 = const()[name = string("expand_dims_162"), val = tensor([0])]; tensor expand_dims_164 = const()[name = string("expand_dims_164"), val = tensor([17])]; tensor concat_131_values3_0 = const()[name = string("concat_131_values3_0"), val = tensor([0])]; int32 concat_131_axis_0 = const()[name = string("concat_131_axis_0"), val = int32(0)]; bool concat_131_interleave_0 = const()[name = string("concat_131_interleave_0"), val = bool(false)]; tensor concat_131 = concat(axis = concat_131_axis_0, interleave = concat_131_interleave_0, values = (expand_dims_161, expand_dims_162, position_id, concat_131_values3_0))[name = string("concat_131")]; tensor concat_132_values1_0 = const()[name = string("concat_132_values1_0"), val = tensor([0])]; tensor concat_132_values3_0 = const()[name = string("concat_132_values3_0"), val = tensor([0])]; int32 concat_132_axis_0 = const()[name = string("concat_132_axis_0"), val = int32(0)]; bool concat_132_interleave_0 = const()[name = string("concat_132_interleave_0"), val = bool(false)]; tensor concat_132 = concat(axis = concat_132_axis_0, interleave = concat_132_interleave_0, values = (expand_dims_164, concat_132_values1_0, var_426, concat_132_values3_0))[name = string("concat_132")]; tensor key_cache_internal_tensor_assign_17_stride_0 = const()[name = string("key_cache_internal_tensor_assign_17_stride_0"), val = tensor([1, 1, 1, 1])]; tensor key_cache_internal_tensor_assign_17_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_17_begin_mask_0"), val = tensor([false, false, false, true])]; tensor key_cache_internal_tensor_assign_17_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_17_end_mask_0"), val = tensor([false, true, false, true])]; tensor key_cache_internal_tensor_assign_17_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_17_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor key_cache_internal_tensor_assign_17_cast_fp16 = slice_update(begin = concat_131, begin_mask = key_cache_internal_tensor_assign_17_begin_mask_0, end = concat_132, end_mask = key_cache_internal_tensor_assign_17_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_17_squeeze_mask_0, stride = key_cache_internal_tensor_assign_17_stride_0, update = key_states_67_cast_fp16, x = coreml_update_state_78)[name = string("key_cache_internal_tensor_assign_17_cast_fp16")]; write_state(data = key_cache_internal_tensor_assign_17_cast_fp16, input = key_cache)[name = string("coreml_update_state_390_write_state")]; tensor coreml_update_state_80 = read_state(input = key_cache)[name = string("coreml_update_state_390")]; tensor value_cache_internal_tensor_assign_17_stride_0 = const()[name = string("value_cache_internal_tensor_assign_17_stride_0"), val = tensor([1, 1, 1, 1])]; tensor value_cache_internal_tensor_assign_17_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_17_begin_mask_0"), val = tensor([false, false, false, true])]; tensor value_cache_internal_tensor_assign_17_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_17_end_mask_0"), val = tensor([false, true, false, true])]; tensor value_cache_internal_tensor_assign_17_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_17_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor value_states_67_cast_fp16 = transpose(perm = value_states_67_perm_0, x = var_4737_cast_fp16)[name = string("transpose_22")]; tensor value_cache_internal_tensor_assign_17_cast_fp16 = slice_update(begin = concat_131, begin_mask = value_cache_internal_tensor_assign_17_begin_mask_0, end = concat_132, end_mask = value_cache_internal_tensor_assign_17_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_17_squeeze_mask_0, stride = value_cache_internal_tensor_assign_17_stride_0, update = value_states_67_cast_fp16, x = coreml_update_state_79)[name = string("value_cache_internal_tensor_assign_17_cast_fp16")]; write_state(data = value_cache_internal_tensor_assign_17_cast_fp16, input = value_cache)[name = string("coreml_update_state_391_write_state")]; tensor coreml_update_state_81 = read_state(input = value_cache)[name = string("coreml_update_state_391")]; tensor var_4801_begin_0 = const()[name = string("op_4801_begin_0"), val = tensor([16, 0, 0, 0])]; tensor var_4801_end_0 = const()[name = string("op_4801_end_0"), val = tensor([17, 2, 2048, 64])]; tensor var_4801_end_mask_0 = const()[name = string("op_4801_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_4801_cast_fp16 = slice_by_index(begin = var_4801_begin_0, end = var_4801_end_0, end_mask = var_4801_end_mask_0, x = coreml_update_state_80)[name = string("op_4801_cast_fp16")]; tensor tile_32 = const()[name = string("tile_32"), val = tensor([1, 1])]; int32 var_4804_axis_0 = const()[name = string("op_4804_axis_0"), val = int32(1)]; tensor var_4804_cast_fp16_0, tensor var_4804_cast_fp16_1 = split(axis = var_4804_axis_0, split_sizes = tile_32, x = var_4801_cast_fp16)[name = string("op_4804_cast_fp16")]; tensor var_4811_begin_0 = const()[name = string("op_4811_begin_0"), val = tensor([16, 0, 0, 0])]; tensor var_4811_end_0 = const()[name = string("op_4811_end_0"), val = tensor([17, 2, 2048, 64])]; tensor var_4811_end_mask_0 = const()[name = string("op_4811_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_4811_cast_fp16 = slice_by_index(begin = var_4811_begin_0, end = var_4811_end_0, end_mask = var_4811_end_mask_0, x = coreml_update_state_81)[name = string("op_4811_cast_fp16")]; tensor tile_33 = const()[name = string("tile_33"), val = tensor([1, 1])]; int32 var_4814_axis_0 = const()[name = string("op_4814_axis_0"), val = int32(1)]; tensor var_4814_cast_fp16_0, tensor var_4814_cast_fp16_1 = split(axis = var_4814_axis_0, split_sizes = tile_33, x = var_4811_cast_fp16)[name = string("op_4814_cast_fp16")]; tensor var_4817_split_sizes_0 = const()[name = string("op_4817_split_sizes_0"), val = tensor([8, 8])]; int32 var_4817_axis_0 = const()[name = string("op_4817_axis_0"), val = int32(1)]; tensor var_4817_cast_fp16_0, tensor var_4817_cast_fp16_1 = split(axis = var_4817_axis_0, split_sizes = var_4817_split_sizes_0, x = query_states_67_cast_fp16)[name = string("op_4817_cast_fp16")]; bool attn_weights_257_transpose_x_0 = const()[name = string("attn_weights_257_transpose_x_0"), val = bool(false)]; bool attn_weights_257_transpose_y_0 = const()[name = string("attn_weights_257_transpose_y_0"), val = bool(false)]; tensor attn_weights_257_cast_fp16 = matmul(transpose_x = attn_weights_257_transpose_x_0, transpose_y = attn_weights_257_transpose_y_0, x = var_4804_cast_fp16_0, y = var_4817_cast_fp16_0)[name = string("attn_weights_257_cast_fp16")]; fp16 _inversed_attn_weights_259_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_259_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_259_cast_fp16 = mul(x = attn_weights_257_cast_fp16, y = _inversed_attn_weights_259_y_0_to_fp16)[name = string("_inversed_attn_weights_259_cast_fp16")]; tensor attn_weights_261_cast_fp16 = add(x = _inversed_attn_weights_259_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_261_cast_fp16")]; int32 var_4824 = const()[name = string("op_4824"), val = int32(2)]; tensor attn_weights_263_cast_fp16 = softmax(axis = var_4824, x = attn_weights_261_cast_fp16)[name = string("attn_weights_263_cast_fp16")]; bool var_4830_transpose_x_1 = const()[name = string("op_4830_transpose_x_1"), val = bool(true)]; bool var_4830_transpose_y_1 = const()[name = string("op_4830_transpose_y_1"), val = bool(false)]; tensor var_4830_cast_fp16 = matmul(transpose_x = var_4830_transpose_x_1, transpose_y = var_4830_transpose_y_1, x = attn_weights_263_cast_fp16, y = var_4814_cast_fp16_0)[name = string("op_4830_cast_fp16")]; bool attn_weights_265_transpose_x_0 = const()[name = string("attn_weights_265_transpose_x_0"), val = bool(false)]; bool attn_weights_265_transpose_y_0 = const()[name = string("attn_weights_265_transpose_y_0"), val = bool(false)]; tensor attn_weights_265_cast_fp16 = matmul(transpose_x = attn_weights_265_transpose_x_0, transpose_y = attn_weights_265_transpose_y_0, x = var_4804_cast_fp16_1, y = var_4817_cast_fp16_1)[name = string("attn_weights_265_cast_fp16")]; fp16 _inversed_attn_weights_267_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_267_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_267_cast_fp16 = mul(x = attn_weights_265_cast_fp16, y = _inversed_attn_weights_267_y_0_to_fp16)[name = string("_inversed_attn_weights_267_cast_fp16")]; tensor attn_weights_269_cast_fp16 = add(x = _inversed_attn_weights_267_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_269_cast_fp16")]; int32 var_4836 = const()[name = string("op_4836"), val = int32(2)]; tensor attn_weights_271_cast_fp16 = softmax(axis = var_4836, x = attn_weights_269_cast_fp16)[name = string("attn_weights_271_cast_fp16")]; bool attn_output_97_transpose_x_1 = const()[name = string("attn_output_97_transpose_x_1"), val = bool(true)]; bool attn_output_97_transpose_y_1 = const()[name = string("attn_output_97_transpose_y_1"), val = bool(false)]; tensor attn_output_97_cast_fp16 = matmul(transpose_x = attn_output_97_transpose_x_1, transpose_y = attn_output_97_transpose_y_1, x = attn_weights_271_cast_fp16, y = var_4814_cast_fp16_1)[name = string("attn_output_97_cast_fp16")]; int32 var_4844 = const()[name = string("op_4844"), val = int32(1)]; bool attn_output_99_interleave_0 = const()[name = string("attn_output_99_interleave_0"), val = bool(false)]; tensor attn_output_99_cast_fp16 = concat(axis = var_4844, interleave = attn_output_99_interleave_0, values = (var_4830_cast_fp16, attn_output_97_cast_fp16))[name = string("attn_output_99_cast_fp16")]; tensor var_4848_perm_0 = const()[name = string("op_4848_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_4853 = const()[name = string("op_4853"), val = tensor([1, 1024, 1, 64])]; tensor var_4848_cast_fp16 = transpose(perm = var_4848_perm_0, x = attn_output_99_cast_fp16)[name = string("transpose_21")]; tensor x_293_cast_fp16 = reshape(shape = var_4853, x = var_4848_cast_fp16)[name = string("x_293_cast_fp16")]; string hidden_states_99_pad_type_0 = const()[name = string("hidden_states_99_pad_type_0"), val = string("valid")]; tensor hidden_states_99_strides_0 = const()[name = string("hidden_states_99_strides_0"), val = tensor([1, 1])]; tensor hidden_states_99_pad_0 = const()[name = string("hidden_states_99_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_99_dilations_0 = const()[name = string("hidden_states_99_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_99_groups_0 = const()[name = string("hidden_states_99_groups_0"), val = int32(1)]; tensor var_4860_to_fp16 = const()[name = string("op_4860_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(489317952)))]; tensor hidden_states_99_cast_fp16 = conv(dilations = hidden_states_99_dilations_0, groups = hidden_states_99_groups_0, pad = hidden_states_99_pad_0, pad_type = hidden_states_99_pad_type_0, strides = hidden_states_99_strides_0, weight = var_4860_to_fp16, x = x_293_cast_fp16)[name = string("hidden_states_99_cast_fp16")]; tensor x_295_cast_fp16 = add(x = x_287_cast_fp16, y = hidden_states_99_cast_fp16)[name = string("x_295_cast_fp16")]; int32 var_4872 = const()[name = string("op_4872"), val = int32(1)]; fp16 const_173_promoted_to_fp16 = const()[name = string("const_173_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_4875_cast_fp16 = mul(x = x_295_cast_fp16, y = const_173_promoted_to_fp16)[name = string("op_4875_cast_fp16")]; bool x_297_interleave_0 = const()[name = string("x_297_interleave_0"), val = bool(false)]; tensor x_297_cast_fp16 = concat(axis = var_4872, interleave = x_297_interleave_0, values = (x_295_cast_fp16, var_4875_cast_fp16))[name = string("x_297_cast_fp16")]; tensor out_199_axes_0 = const()[name = string("out_199_axes_0"), val = tensor([1])]; fp16 var_4885_to_fp16 = const()[name = string("op_4885_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_199_cast_fp16 = layer_norm(axes = out_199_axes_0, epsilon = var_4885_to_fp16, x = x_297_cast_fp16)[name = string("out_199_cast_fp16")]; tensor layer_layers_16_post_attention_layernorm_weight_to_fp16 = const()[name = string("layer_layers_16_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(491415168)))]; tensor out_201_cast_fp16 = mul(x = out_199_cast_fp16, y = layer_layers_16_post_attention_layernorm_weight_to_fp16)[name = string("out_201_cast_fp16")]; tensor var_4891_split_sizes_0 = const()[name = string("op_4891_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_4891_axis_0 = const()[name = string("op_4891_axis_0"), val = int32(1)]; tensor var_4891_cast_fp16_0, tensor var_4891_cast_fp16_1 = split(axis = var_4891_axis_0, split_sizes = var_4891_split_sizes_0, x = out_201_cast_fp16)[name = string("op_4891_cast_fp16")]; string input_33_pad_type_0 = const()[name = string("input_33_pad_type_0"), val = string("valid")]; tensor input_33_strides_0 = const()[name = string("input_33_strides_0"), val = tensor([1, 1])]; tensor input_33_pad_0 = const()[name = string("input_33_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_33_dilations_0 = const()[name = string("input_33_dilations_0"), val = tensor([1, 1])]; int32 input_33_groups_0 = const()[name = string("input_33_groups_0"), val = int32(1)]; tensor var_4896_to_fp16 = const()[name = string("op_4896_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(491419328)))]; tensor input_33_cast_fp16 = conv(dilations = input_33_dilations_0, groups = input_33_groups_0, pad = input_33_pad_0, pad_type = input_33_pad_type_0, strides = input_33_strides_0, weight = var_4896_to_fp16, x = var_4891_cast_fp16_0)[name = string("input_33_cast_fp16")]; tensor var_4907_cast_fp16 = silu(x = input_33_cast_fp16)[name = string("op_4907_cast_fp16")]; string var_4912_pad_type_0 = const()[name = string("op_4912_pad_type_0"), val = string("valid")]; tensor var_4912_strides_0 = const()[name = string("op_4912_strides_0"), val = tensor([1, 1])]; tensor var_4912_pad_0 = const()[name = string("op_4912_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_4912_dilations_0 = const()[name = string("op_4912_dilations_0"), val = tensor([1, 1])]; int32 var_4912_groups_0 = const()[name = string("op_4912_groups_0"), val = int32(1)]; tensor var_4895_to_fp16 = const()[name = string("op_4895_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(499808000)))]; tensor var_4912_cast_fp16 = conv(dilations = var_4912_dilations_0, groups = var_4912_groups_0, pad = var_4912_pad_0, pad_type = var_4912_pad_type_0, strides = var_4912_strides_0, weight = var_4895_to_fp16, x = var_4891_cast_fp16_0)[name = string("op_4912_cast_fp16")]; tensor x_303_cast_fp16 = mul(x = var_4907_cast_fp16, y = var_4912_cast_fp16)[name = string("x_303_cast_fp16")]; string hidden_states_101_pad_type_0 = const()[name = string("hidden_states_101_pad_type_0"), val = string("valid")]; tensor hidden_states_101_strides_0 = const()[name = string("hidden_states_101_strides_0"), val = tensor([1, 1])]; tensor hidden_states_101_pad_0 = const()[name = string("hidden_states_101_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_101_dilations_0 = const()[name = string("hidden_states_101_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_101_groups_0 = const()[name = string("hidden_states_101_groups_0"), val = int32(1)]; tensor var_4894_to_fp16 = const()[name = string("op_4894_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(508196672)))]; tensor hidden_states_101_cast_fp16 = conv(dilations = hidden_states_101_dilations_0, groups = hidden_states_101_groups_0, pad = hidden_states_101_pad_0, pad_type = hidden_states_101_pad_type_0, strides = hidden_states_101_strides_0, weight = var_4894_to_fp16, x = x_303_cast_fp16)[name = string("hidden_states_101_cast_fp16")]; tensor x_305_cast_fp16 = add(x = x_295_cast_fp16, y = hidden_states_101_cast_fp16)[name = string("x_305_cast_fp16")]; int32 var_4925 = const()[name = string("op_4925"), val = int32(1)]; fp16 const_174_promoted_to_fp16 = const()[name = string("const_174_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_4928_cast_fp16 = mul(x = x_305_cast_fp16, y = const_174_promoted_to_fp16)[name = string("op_4928_cast_fp16")]; bool x_307_interleave_0 = const()[name = string("x_307_interleave_0"), val = bool(false)]; tensor x_307_cast_fp16 = concat(axis = var_4925, interleave = x_307_interleave_0, values = (x_305_cast_fp16, var_4928_cast_fp16))[name = string("x_307_cast_fp16")]; tensor out_205_axes_0 = const()[name = string("out_205_axes_0"), val = tensor([1])]; fp16 var_4938_to_fp16 = const()[name = string("op_4938_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_205_cast_fp16 = layer_norm(axes = out_205_axes_0, epsilon = var_4938_to_fp16, x = x_307_cast_fp16)[name = string("out_205_cast_fp16")]; tensor layer_layers_17_input_layernorm_weight_to_fp16 = const()[name = string("layer_layers_17_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(516585344)))]; tensor out_207_cast_fp16 = mul(x = out_205_cast_fp16, y = layer_layers_17_input_layernorm_weight_to_fp16)[name = string("out_207_cast_fp16")]; tensor var_4944_split_sizes_0 = const()[name = string("op_4944_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_4944_axis_0 = const()[name = string("op_4944_axis_0"), val = int32(1)]; tensor var_4944_cast_fp16_0, tensor var_4944_cast_fp16_1 = split(axis = var_4944_axis_0, split_sizes = var_4944_split_sizes_0, x = out_207_cast_fp16)[name = string("op_4944_cast_fp16")]; string query_states_69_pad_type_0 = const()[name = string("query_states_69_pad_type_0"), val = string("valid")]; tensor query_states_69_strides_0 = const()[name = string("query_states_69_strides_0"), val = tensor([1, 1])]; tensor query_states_69_pad_0 = const()[name = string("query_states_69_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_states_69_dilations_0 = const()[name = string("query_states_69_dilations_0"), val = tensor([1, 1])]; int32 query_states_69_groups_0 = const()[name = string("query_states_69_groups_0"), val = int32(1)]; tensor var_4966_to_fp16 = const()[name = string("op_4966_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(516589504)))]; tensor query_states_69_cast_fp16 = conv(dilations = query_states_69_dilations_0, groups = query_states_69_groups_0, pad = query_states_69_pad_0, pad_type = query_states_69_pad_type_0, strides = query_states_69_strides_0, weight = var_4966_to_fp16, x = var_4944_cast_fp16_0)[name = string("query_states_69_cast_fp16")]; string key_states_69_pad_type_0 = const()[name = string("key_states_69_pad_type_0"), val = string("valid")]; tensor key_states_69_strides_0 = const()[name = string("key_states_69_strides_0"), val = tensor([1, 1])]; tensor key_states_69_pad_0 = const()[name = string("key_states_69_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_states_69_dilations_0 = const()[name = string("key_states_69_dilations_0"), val = tensor([1, 1])]; int32 key_states_69_groups_0 = const()[name = string("key_states_69_groups_0"), val = int32(1)]; tensor var_4977_to_fp16 = const()[name = string("op_4977_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(518686720)))]; tensor key_states_69_cast_fp16 = conv(dilations = key_states_69_dilations_0, groups = key_states_69_groups_0, pad = key_states_69_pad_0, pad_type = key_states_69_pad_type_0, strides = key_states_69_strides_0, weight = var_4977_to_fp16, x = var_4944_cast_fp16_0)[name = string("key_states_69_cast_fp16")]; string value_states_69_pad_type_0 = const()[name = string("value_states_69_pad_type_0"), val = string("valid")]; tensor value_states_69_strides_0 = const()[name = string("value_states_69_strides_0"), val = tensor([1, 1])]; tensor value_states_69_pad_0 = const()[name = string("value_states_69_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_states_69_dilations_0 = const()[name = string("value_states_69_dilations_0"), val = tensor([1, 1])]; int32 value_states_69_groups_0 = const()[name = string("value_states_69_groups_0"), val = int32(1)]; tensor var_4988_to_fp16 = const()[name = string("op_4988_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(518948928)))]; tensor value_states_69_cast_fp16 = conv(dilations = value_states_69_dilations_0, groups = value_states_69_groups_0, pad = value_states_69_pad_0, pad_type = value_states_69_pad_type_0, strides = value_states_69_strides_0, weight = var_4988_to_fp16, x = var_4944_cast_fp16_0)[name = string("value_states_69_cast_fp16")]; tensor var_4996 = const()[name = string("op_4996"), val = tensor([1, 16, 64, 64])]; tensor embed_69_cast_fp16 = reshape(shape = var_4996, x = query_states_69_cast_fp16)[name = string("embed_69_cast_fp16")]; tensor var_5000 = const()[name = string("op_5000"), val = tensor([1, 2, 64, 64])]; tensor var_5001_cast_fp16 = reshape(shape = var_5000, x = key_states_69_cast_fp16)[name = string("op_5001_cast_fp16")]; tensor embed_71_perm_0 = const()[name = string("embed_71_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_5007 = const()[name = string("op_5007"), val = tensor([1, 2, 64, 64])]; tensor var_5008_cast_fp16 = reshape(shape = var_5007, x = value_states_69_cast_fp16)[name = string("op_5008_cast_fp16")]; tensor value_states_71_perm_0 = const()[name = string("value_states_71_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_5012_cast_fp16 = mul(x = embed_69_cast_fp16, y = cos_1_cast_fp16)[name = string("op_5012_cast_fp16")]; tensor var_5013_split_sizes_0 = const()[name = string("op_5013_split_sizes_0"), val = tensor([32, 32])]; int32 var_5013_axis_0 = const()[name = string("op_5013_axis_0"), val = int32(-2)]; tensor var_5013_cast_fp16_0, tensor var_5013_cast_fp16_1 = split(axis = var_5013_axis_0, split_sizes = var_5013_split_sizes_0, x = embed_69_cast_fp16)[name = string("op_5013_cast_fp16")]; fp16 const_177_promoted_to_fp16 = const()[name = string("const_177_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_5015_cast_fp16 = mul(x = var_5013_cast_fp16_1, y = const_177_promoted_to_fp16)[name = string("op_5015_cast_fp16")]; int32 var_5017 = const()[name = string("op_5017"), val = int32(-2)]; bool var_5018_interleave_0 = const()[name = string("op_5018_interleave_0"), val = bool(false)]; tensor var_5018_cast_fp16 = concat(axis = var_5017, interleave = var_5018_interleave_0, values = (var_5015_cast_fp16, var_5013_cast_fp16_0))[name = string("op_5018_cast_fp16")]; tensor var_5019_cast_fp16 = mul(x = var_5018_cast_fp16, y = sin_1_cast_fp16)[name = string("op_5019_cast_fp16")]; tensor query_states_71_cast_fp16 = add(x = var_5012_cast_fp16, y = var_5019_cast_fp16)[name = string("query_states_71_cast_fp16")]; tensor embed_71_cast_fp16 = transpose(perm = embed_71_perm_0, x = var_5001_cast_fp16)[name = string("transpose_20")]; tensor var_5022_cast_fp16 = mul(x = embed_71_cast_fp16, y = cos_cast_fp16)[name = string("op_5022_cast_fp16")]; tensor var_5023_split_sizes_0 = const()[name = string("op_5023_split_sizes_0"), val = tensor([32, 32])]; int32 var_5023_axis_0 = const()[name = string("op_5023_axis_0"), val = int32(-1)]; tensor var_5023_cast_fp16_0, tensor var_5023_cast_fp16_1 = split(axis = var_5023_axis_0, split_sizes = var_5023_split_sizes_0, x = embed_71_cast_fp16)[name = string("op_5023_cast_fp16")]; fp16 const_178_promoted_to_fp16 = const()[name = string("const_178_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_5025_cast_fp16 = mul(x = var_5023_cast_fp16_1, y = const_178_promoted_to_fp16)[name = string("op_5025_cast_fp16")]; int32 var_5027 = const()[name = string("op_5027"), val = int32(-1)]; bool var_5028_interleave_0 = const()[name = string("op_5028_interleave_0"), val = bool(false)]; tensor var_5028_cast_fp16 = concat(axis = var_5027, interleave = var_5028_interleave_0, values = (var_5025_cast_fp16, var_5023_cast_fp16_0))[name = string("op_5028_cast_fp16")]; tensor var_5029_cast_fp16 = mul(x = var_5028_cast_fp16, y = sin_cast_fp16)[name = string("op_5029_cast_fp16")]; tensor key_states_71_cast_fp16 = add(x = var_5022_cast_fp16, y = var_5029_cast_fp16)[name = string("key_states_71_cast_fp16")]; tensor expand_dims_171 = const()[name = string("expand_dims_171"), val = tensor([17])]; tensor expand_dims_172 = const()[name = string("expand_dims_172"), val = tensor([0])]; tensor expand_dims_174 = const()[name = string("expand_dims_174"), val = tensor([18])]; tensor concat_139_values3_0 = const()[name = string("concat_139_values3_0"), val = tensor([0])]; int32 concat_139_axis_0 = const()[name = string("concat_139_axis_0"), val = int32(0)]; bool concat_139_interleave_0 = const()[name = string("concat_139_interleave_0"), val = bool(false)]; tensor concat_139 = concat(axis = concat_139_axis_0, interleave = concat_139_interleave_0, values = (expand_dims_171, expand_dims_172, position_id, concat_139_values3_0))[name = string("concat_139")]; tensor concat_140_values1_0 = const()[name = string("concat_140_values1_0"), val = tensor([0])]; tensor concat_140_values3_0 = const()[name = string("concat_140_values3_0"), val = tensor([0])]; int32 concat_140_axis_0 = const()[name = string("concat_140_axis_0"), val = int32(0)]; bool concat_140_interleave_0 = const()[name = string("concat_140_interleave_0"), val = bool(false)]; tensor concat_140 = concat(axis = concat_140_axis_0, interleave = concat_140_interleave_0, values = (expand_dims_174, concat_140_values1_0, var_426, concat_140_values3_0))[name = string("concat_140")]; tensor key_cache_internal_tensor_assign_18_stride_0 = const()[name = string("key_cache_internal_tensor_assign_18_stride_0"), val = tensor([1, 1, 1, 1])]; tensor key_cache_internal_tensor_assign_18_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_18_begin_mask_0"), val = tensor([false, false, false, true])]; tensor key_cache_internal_tensor_assign_18_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_18_end_mask_0"), val = tensor([false, true, false, true])]; tensor key_cache_internal_tensor_assign_18_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_18_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor key_cache_internal_tensor_assign_18_cast_fp16 = slice_update(begin = concat_139, begin_mask = key_cache_internal_tensor_assign_18_begin_mask_0, end = concat_140, end_mask = key_cache_internal_tensor_assign_18_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_18_squeeze_mask_0, stride = key_cache_internal_tensor_assign_18_stride_0, update = key_states_71_cast_fp16, x = coreml_update_state_80)[name = string("key_cache_internal_tensor_assign_18_cast_fp16")]; write_state(data = key_cache_internal_tensor_assign_18_cast_fp16, input = key_cache)[name = string("coreml_update_state_392_write_state")]; tensor coreml_update_state_82 = read_state(input = key_cache)[name = string("coreml_update_state_392")]; tensor value_cache_internal_tensor_assign_18_stride_0 = const()[name = string("value_cache_internal_tensor_assign_18_stride_0"), val = tensor([1, 1, 1, 1])]; tensor value_cache_internal_tensor_assign_18_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_18_begin_mask_0"), val = tensor([false, false, false, true])]; tensor value_cache_internal_tensor_assign_18_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_18_end_mask_0"), val = tensor([false, true, false, true])]; tensor value_cache_internal_tensor_assign_18_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_18_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor value_states_71_cast_fp16 = transpose(perm = value_states_71_perm_0, x = var_5008_cast_fp16)[name = string("transpose_19")]; tensor value_cache_internal_tensor_assign_18_cast_fp16 = slice_update(begin = concat_139, begin_mask = value_cache_internal_tensor_assign_18_begin_mask_0, end = concat_140, end_mask = value_cache_internal_tensor_assign_18_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_18_squeeze_mask_0, stride = value_cache_internal_tensor_assign_18_stride_0, update = value_states_71_cast_fp16, x = coreml_update_state_81)[name = string("value_cache_internal_tensor_assign_18_cast_fp16")]; write_state(data = value_cache_internal_tensor_assign_18_cast_fp16, input = value_cache)[name = string("coreml_update_state_393_write_state")]; tensor coreml_update_state_83 = read_state(input = value_cache)[name = string("coreml_update_state_393")]; tensor var_5072_begin_0 = const()[name = string("op_5072_begin_0"), val = tensor([17, 0, 0, 0])]; tensor var_5072_end_0 = const()[name = string("op_5072_end_0"), val = tensor([18, 2, 2048, 64])]; tensor var_5072_end_mask_0 = const()[name = string("op_5072_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_5072_cast_fp16 = slice_by_index(begin = var_5072_begin_0, end = var_5072_end_0, end_mask = var_5072_end_mask_0, x = coreml_update_state_82)[name = string("op_5072_cast_fp16")]; tensor tile_34 = const()[name = string("tile_34"), val = tensor([1, 1])]; int32 var_5075_axis_0 = const()[name = string("op_5075_axis_0"), val = int32(1)]; tensor var_5075_cast_fp16_0, tensor var_5075_cast_fp16_1 = split(axis = var_5075_axis_0, split_sizes = tile_34, x = var_5072_cast_fp16)[name = string("op_5075_cast_fp16")]; tensor var_5082_begin_0 = const()[name = string("op_5082_begin_0"), val = tensor([17, 0, 0, 0])]; tensor var_5082_end_0 = const()[name = string("op_5082_end_0"), val = tensor([18, 2, 2048, 64])]; tensor var_5082_end_mask_0 = const()[name = string("op_5082_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_5082_cast_fp16 = slice_by_index(begin = var_5082_begin_0, end = var_5082_end_0, end_mask = var_5082_end_mask_0, x = coreml_update_state_83)[name = string("op_5082_cast_fp16")]; tensor tile_35 = const()[name = string("tile_35"), val = tensor([1, 1])]; int32 var_5085_axis_0 = const()[name = string("op_5085_axis_0"), val = int32(1)]; tensor var_5085_cast_fp16_0, tensor var_5085_cast_fp16_1 = split(axis = var_5085_axis_0, split_sizes = tile_35, x = var_5082_cast_fp16)[name = string("op_5085_cast_fp16")]; tensor var_5088_split_sizes_0 = const()[name = string("op_5088_split_sizes_0"), val = tensor([8, 8])]; int32 var_5088_axis_0 = const()[name = string("op_5088_axis_0"), val = int32(1)]; tensor var_5088_cast_fp16_0, tensor var_5088_cast_fp16_1 = split(axis = var_5088_axis_0, split_sizes = var_5088_split_sizes_0, x = query_states_71_cast_fp16)[name = string("op_5088_cast_fp16")]; bool attn_weights_273_transpose_x_0 = const()[name = string("attn_weights_273_transpose_x_0"), val = bool(false)]; bool attn_weights_273_transpose_y_0 = const()[name = string("attn_weights_273_transpose_y_0"), val = bool(false)]; tensor attn_weights_273_cast_fp16 = matmul(transpose_x = attn_weights_273_transpose_x_0, transpose_y = attn_weights_273_transpose_y_0, x = var_5075_cast_fp16_0, y = var_5088_cast_fp16_0)[name = string("attn_weights_273_cast_fp16")]; fp16 _inversed_attn_weights_275_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_275_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_275_cast_fp16 = mul(x = attn_weights_273_cast_fp16, y = _inversed_attn_weights_275_y_0_to_fp16)[name = string("_inversed_attn_weights_275_cast_fp16")]; tensor attn_weights_277_cast_fp16 = add(x = _inversed_attn_weights_275_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_277_cast_fp16")]; int32 var_5095 = const()[name = string("op_5095"), val = int32(2)]; tensor attn_weights_279_cast_fp16 = softmax(axis = var_5095, x = attn_weights_277_cast_fp16)[name = string("attn_weights_279_cast_fp16")]; bool var_5101_transpose_x_1 = const()[name = string("op_5101_transpose_x_1"), val = bool(true)]; bool var_5101_transpose_y_1 = const()[name = string("op_5101_transpose_y_1"), val = bool(false)]; tensor var_5101_cast_fp16 = matmul(transpose_x = var_5101_transpose_x_1, transpose_y = var_5101_transpose_y_1, x = attn_weights_279_cast_fp16, y = var_5085_cast_fp16_0)[name = string("op_5101_cast_fp16")]; bool attn_weights_281_transpose_x_0 = const()[name = string("attn_weights_281_transpose_x_0"), val = bool(false)]; bool attn_weights_281_transpose_y_0 = const()[name = string("attn_weights_281_transpose_y_0"), val = bool(false)]; tensor attn_weights_281_cast_fp16 = matmul(transpose_x = attn_weights_281_transpose_x_0, transpose_y = attn_weights_281_transpose_y_0, x = var_5075_cast_fp16_1, y = var_5088_cast_fp16_1)[name = string("attn_weights_281_cast_fp16")]; fp16 _inversed_attn_weights_283_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_283_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_283_cast_fp16 = mul(x = attn_weights_281_cast_fp16, y = _inversed_attn_weights_283_y_0_to_fp16)[name = string("_inversed_attn_weights_283_cast_fp16")]; tensor attn_weights_285_cast_fp16 = add(x = _inversed_attn_weights_283_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_285_cast_fp16")]; int32 var_5107 = const()[name = string("op_5107"), val = int32(2)]; tensor attn_weights_287_cast_fp16 = softmax(axis = var_5107, x = attn_weights_285_cast_fp16)[name = string("attn_weights_287_cast_fp16")]; bool attn_output_103_transpose_x_1 = const()[name = string("attn_output_103_transpose_x_1"), val = bool(true)]; bool attn_output_103_transpose_y_1 = const()[name = string("attn_output_103_transpose_y_1"), val = bool(false)]; tensor attn_output_103_cast_fp16 = matmul(transpose_x = attn_output_103_transpose_x_1, transpose_y = attn_output_103_transpose_y_1, x = attn_weights_287_cast_fp16, y = var_5085_cast_fp16_1)[name = string("attn_output_103_cast_fp16")]; int32 var_5115 = const()[name = string("op_5115"), val = int32(1)]; bool attn_output_105_interleave_0 = const()[name = string("attn_output_105_interleave_0"), val = bool(false)]; tensor attn_output_105_cast_fp16 = concat(axis = var_5115, interleave = attn_output_105_interleave_0, values = (var_5101_cast_fp16, attn_output_103_cast_fp16))[name = string("attn_output_105_cast_fp16")]; tensor var_5119_perm_0 = const()[name = string("op_5119_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_5124 = const()[name = string("op_5124"), val = tensor([1, 1024, 1, 64])]; tensor var_5119_cast_fp16 = transpose(perm = var_5119_perm_0, x = attn_output_105_cast_fp16)[name = string("transpose_18")]; tensor x_311_cast_fp16 = reshape(shape = var_5124, x = var_5119_cast_fp16)[name = string("x_311_cast_fp16")]; string hidden_states_105_pad_type_0 = const()[name = string("hidden_states_105_pad_type_0"), val = string("valid")]; tensor hidden_states_105_strides_0 = const()[name = string("hidden_states_105_strides_0"), val = tensor([1, 1])]; tensor hidden_states_105_pad_0 = const()[name = string("hidden_states_105_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_105_dilations_0 = const()[name = string("hidden_states_105_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_105_groups_0 = const()[name = string("hidden_states_105_groups_0"), val = int32(1)]; tensor var_5131_to_fp16 = const()[name = string("op_5131_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(519211136)))]; tensor hidden_states_105_cast_fp16 = conv(dilations = hidden_states_105_dilations_0, groups = hidden_states_105_groups_0, pad = hidden_states_105_pad_0, pad_type = hidden_states_105_pad_type_0, strides = hidden_states_105_strides_0, weight = var_5131_to_fp16, x = x_311_cast_fp16)[name = string("hidden_states_105_cast_fp16")]; tensor x_313_cast_fp16 = add(x = x_305_cast_fp16, y = hidden_states_105_cast_fp16)[name = string("x_313_cast_fp16")]; int32 var_5143 = const()[name = string("op_5143"), val = int32(1)]; fp16 const_183_promoted_to_fp16 = const()[name = string("const_183_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_5146_cast_fp16 = mul(x = x_313_cast_fp16, y = const_183_promoted_to_fp16)[name = string("op_5146_cast_fp16")]; bool x_315_interleave_0 = const()[name = string("x_315_interleave_0"), val = bool(false)]; tensor x_315_cast_fp16 = concat(axis = var_5143, interleave = x_315_interleave_0, values = (x_313_cast_fp16, var_5146_cast_fp16))[name = string("x_315_cast_fp16")]; tensor out_211_axes_0 = const()[name = string("out_211_axes_0"), val = tensor([1])]; fp16 var_5156_to_fp16 = const()[name = string("op_5156_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_211_cast_fp16 = layer_norm(axes = out_211_axes_0, epsilon = var_5156_to_fp16, x = x_315_cast_fp16)[name = string("out_211_cast_fp16")]; tensor layer_layers_17_post_attention_layernorm_weight_to_fp16 = const()[name = string("layer_layers_17_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(521308352)))]; tensor out_213_cast_fp16 = mul(x = out_211_cast_fp16, y = layer_layers_17_post_attention_layernorm_weight_to_fp16)[name = string("out_213_cast_fp16")]; tensor var_5162_split_sizes_0 = const()[name = string("op_5162_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_5162_axis_0 = const()[name = string("op_5162_axis_0"), val = int32(1)]; tensor var_5162_cast_fp16_0, tensor var_5162_cast_fp16_1 = split(axis = var_5162_axis_0, split_sizes = var_5162_split_sizes_0, x = out_213_cast_fp16)[name = string("op_5162_cast_fp16")]; string input_35_pad_type_0 = const()[name = string("input_35_pad_type_0"), val = string("valid")]; tensor input_35_strides_0 = const()[name = string("input_35_strides_0"), val = tensor([1, 1])]; tensor input_35_pad_0 = const()[name = string("input_35_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_35_dilations_0 = const()[name = string("input_35_dilations_0"), val = tensor([1, 1])]; int32 input_35_groups_0 = const()[name = string("input_35_groups_0"), val = int32(1)]; tensor var_5167_to_fp16 = const()[name = string("op_5167_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(521312512)))]; tensor input_35_cast_fp16 = conv(dilations = input_35_dilations_0, groups = input_35_groups_0, pad = input_35_pad_0, pad_type = input_35_pad_type_0, strides = input_35_strides_0, weight = var_5167_to_fp16, x = var_5162_cast_fp16_0)[name = string("input_35_cast_fp16")]; tensor var_5178_cast_fp16 = silu(x = input_35_cast_fp16)[name = string("op_5178_cast_fp16")]; string var_5183_pad_type_0 = const()[name = string("op_5183_pad_type_0"), val = string("valid")]; tensor var_5183_strides_0 = const()[name = string("op_5183_strides_0"), val = tensor([1, 1])]; tensor var_5183_pad_0 = const()[name = string("op_5183_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_5183_dilations_0 = const()[name = string("op_5183_dilations_0"), val = tensor([1, 1])]; int32 var_5183_groups_0 = const()[name = string("op_5183_groups_0"), val = int32(1)]; tensor var_5166_to_fp16 = const()[name = string("op_5166_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(529701184)))]; tensor var_5183_cast_fp16 = conv(dilations = var_5183_dilations_0, groups = var_5183_groups_0, pad = var_5183_pad_0, pad_type = var_5183_pad_type_0, strides = var_5183_strides_0, weight = var_5166_to_fp16, x = var_5162_cast_fp16_0)[name = string("op_5183_cast_fp16")]; tensor x_321_cast_fp16 = mul(x = var_5178_cast_fp16, y = var_5183_cast_fp16)[name = string("x_321_cast_fp16")]; string hidden_states_107_pad_type_0 = const()[name = string("hidden_states_107_pad_type_0"), val = string("valid")]; tensor hidden_states_107_strides_0 = const()[name = string("hidden_states_107_strides_0"), val = tensor([1, 1])]; tensor hidden_states_107_pad_0 = const()[name = string("hidden_states_107_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_107_dilations_0 = const()[name = string("hidden_states_107_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_107_groups_0 = const()[name = string("hidden_states_107_groups_0"), val = int32(1)]; tensor var_5165_to_fp16 = const()[name = string("op_5165_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(538089856)))]; tensor hidden_states_107_cast_fp16 = conv(dilations = hidden_states_107_dilations_0, groups = hidden_states_107_groups_0, pad = hidden_states_107_pad_0, pad_type = hidden_states_107_pad_type_0, strides = hidden_states_107_strides_0, weight = var_5165_to_fp16, x = x_321_cast_fp16)[name = string("hidden_states_107_cast_fp16")]; tensor x_323_cast_fp16 = add(x = x_313_cast_fp16, y = hidden_states_107_cast_fp16)[name = string("x_323_cast_fp16")]; int32 var_5196 = const()[name = string("op_5196"), val = int32(1)]; fp16 const_184_promoted_to_fp16 = const()[name = string("const_184_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_5199_cast_fp16 = mul(x = x_323_cast_fp16, y = const_184_promoted_to_fp16)[name = string("op_5199_cast_fp16")]; bool x_325_interleave_0 = const()[name = string("x_325_interleave_0"), val = bool(false)]; tensor x_325_cast_fp16 = concat(axis = var_5196, interleave = x_325_interleave_0, values = (x_323_cast_fp16, var_5199_cast_fp16))[name = string("x_325_cast_fp16")]; tensor out_217_axes_0 = const()[name = string("out_217_axes_0"), val = tensor([1])]; fp16 var_5209_to_fp16 = const()[name = string("op_5209_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_217_cast_fp16 = layer_norm(axes = out_217_axes_0, epsilon = var_5209_to_fp16, x = x_325_cast_fp16)[name = string("out_217_cast_fp16")]; tensor layer_layers_18_input_layernorm_weight_to_fp16 = const()[name = string("layer_layers_18_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(546478528)))]; tensor out_219_cast_fp16 = mul(x = out_217_cast_fp16, y = layer_layers_18_input_layernorm_weight_to_fp16)[name = string("out_219_cast_fp16")]; tensor var_5215_split_sizes_0 = const()[name = string("op_5215_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_5215_axis_0 = const()[name = string("op_5215_axis_0"), val = int32(1)]; tensor var_5215_cast_fp16_0, tensor var_5215_cast_fp16_1 = split(axis = var_5215_axis_0, split_sizes = var_5215_split_sizes_0, x = out_219_cast_fp16)[name = string("op_5215_cast_fp16")]; string query_states_73_pad_type_0 = const()[name = string("query_states_73_pad_type_0"), val = string("valid")]; tensor query_states_73_strides_0 = const()[name = string("query_states_73_strides_0"), val = tensor([1, 1])]; tensor query_states_73_pad_0 = const()[name = string("query_states_73_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_states_73_dilations_0 = const()[name = string("query_states_73_dilations_0"), val = tensor([1, 1])]; int32 query_states_73_groups_0 = const()[name = string("query_states_73_groups_0"), val = int32(1)]; tensor var_5237_to_fp16 = const()[name = string("op_5237_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(546482688)))]; tensor query_states_73_cast_fp16 = conv(dilations = query_states_73_dilations_0, groups = query_states_73_groups_0, pad = query_states_73_pad_0, pad_type = query_states_73_pad_type_0, strides = query_states_73_strides_0, weight = var_5237_to_fp16, x = var_5215_cast_fp16_0)[name = string("query_states_73_cast_fp16")]; string key_states_73_pad_type_0 = const()[name = string("key_states_73_pad_type_0"), val = string("valid")]; tensor key_states_73_strides_0 = const()[name = string("key_states_73_strides_0"), val = tensor([1, 1])]; tensor key_states_73_pad_0 = const()[name = string("key_states_73_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_states_73_dilations_0 = const()[name = string("key_states_73_dilations_0"), val = tensor([1, 1])]; int32 key_states_73_groups_0 = const()[name = string("key_states_73_groups_0"), val = int32(1)]; tensor var_5248_to_fp16 = const()[name = string("op_5248_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(548579904)))]; tensor key_states_73_cast_fp16 = conv(dilations = key_states_73_dilations_0, groups = key_states_73_groups_0, pad = key_states_73_pad_0, pad_type = key_states_73_pad_type_0, strides = key_states_73_strides_0, weight = var_5248_to_fp16, x = var_5215_cast_fp16_0)[name = string("key_states_73_cast_fp16")]; string value_states_73_pad_type_0 = const()[name = string("value_states_73_pad_type_0"), val = string("valid")]; tensor value_states_73_strides_0 = const()[name = string("value_states_73_strides_0"), val = tensor([1, 1])]; tensor value_states_73_pad_0 = const()[name = string("value_states_73_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_states_73_dilations_0 = const()[name = string("value_states_73_dilations_0"), val = tensor([1, 1])]; int32 value_states_73_groups_0 = const()[name = string("value_states_73_groups_0"), val = int32(1)]; tensor var_5259_to_fp16 = const()[name = string("op_5259_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(548842112)))]; tensor value_states_73_cast_fp16 = conv(dilations = value_states_73_dilations_0, groups = value_states_73_groups_0, pad = value_states_73_pad_0, pad_type = value_states_73_pad_type_0, strides = value_states_73_strides_0, weight = var_5259_to_fp16, x = var_5215_cast_fp16_0)[name = string("value_states_73_cast_fp16")]; tensor var_5267 = const()[name = string("op_5267"), val = tensor([1, 16, 64, 64])]; tensor embed_73_cast_fp16 = reshape(shape = var_5267, x = query_states_73_cast_fp16)[name = string("embed_73_cast_fp16")]; tensor var_5271 = const()[name = string("op_5271"), val = tensor([1, 2, 64, 64])]; tensor var_5272_cast_fp16 = reshape(shape = var_5271, x = key_states_73_cast_fp16)[name = string("op_5272_cast_fp16")]; tensor embed_75_perm_0 = const()[name = string("embed_75_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_5278 = const()[name = string("op_5278"), val = tensor([1, 2, 64, 64])]; tensor var_5279_cast_fp16 = reshape(shape = var_5278, x = value_states_73_cast_fp16)[name = string("op_5279_cast_fp16")]; tensor value_states_75_perm_0 = const()[name = string("value_states_75_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_5283_cast_fp16 = mul(x = embed_73_cast_fp16, y = cos_1_cast_fp16)[name = string("op_5283_cast_fp16")]; tensor var_5284_split_sizes_0 = const()[name = string("op_5284_split_sizes_0"), val = tensor([32, 32])]; int32 var_5284_axis_0 = const()[name = string("op_5284_axis_0"), val = int32(-2)]; tensor var_5284_cast_fp16_0, tensor var_5284_cast_fp16_1 = split(axis = var_5284_axis_0, split_sizes = var_5284_split_sizes_0, x = embed_73_cast_fp16)[name = string("op_5284_cast_fp16")]; fp16 const_187_promoted_to_fp16 = const()[name = string("const_187_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_5286_cast_fp16 = mul(x = var_5284_cast_fp16_1, y = const_187_promoted_to_fp16)[name = string("op_5286_cast_fp16")]; int32 var_5288 = const()[name = string("op_5288"), val = int32(-2)]; bool var_5289_interleave_0 = const()[name = string("op_5289_interleave_0"), val = bool(false)]; tensor var_5289_cast_fp16 = concat(axis = var_5288, interleave = var_5289_interleave_0, values = (var_5286_cast_fp16, var_5284_cast_fp16_0))[name = string("op_5289_cast_fp16")]; tensor var_5290_cast_fp16 = mul(x = var_5289_cast_fp16, y = sin_1_cast_fp16)[name = string("op_5290_cast_fp16")]; tensor query_states_75_cast_fp16 = add(x = var_5283_cast_fp16, y = var_5290_cast_fp16)[name = string("query_states_75_cast_fp16")]; tensor embed_75_cast_fp16 = transpose(perm = embed_75_perm_0, x = var_5272_cast_fp16)[name = string("transpose_17")]; tensor var_5293_cast_fp16 = mul(x = embed_75_cast_fp16, y = cos_cast_fp16)[name = string("op_5293_cast_fp16")]; tensor var_5294_split_sizes_0 = const()[name = string("op_5294_split_sizes_0"), val = tensor([32, 32])]; int32 var_5294_axis_0 = const()[name = string("op_5294_axis_0"), val = int32(-1)]; tensor var_5294_cast_fp16_0, tensor var_5294_cast_fp16_1 = split(axis = var_5294_axis_0, split_sizes = var_5294_split_sizes_0, x = embed_75_cast_fp16)[name = string("op_5294_cast_fp16")]; fp16 const_188_promoted_to_fp16 = const()[name = string("const_188_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_5296_cast_fp16 = mul(x = var_5294_cast_fp16_1, y = const_188_promoted_to_fp16)[name = string("op_5296_cast_fp16")]; int32 var_5298 = const()[name = string("op_5298"), val = int32(-1)]; bool var_5299_interleave_0 = const()[name = string("op_5299_interleave_0"), val = bool(false)]; tensor var_5299_cast_fp16 = concat(axis = var_5298, interleave = var_5299_interleave_0, values = (var_5296_cast_fp16, var_5294_cast_fp16_0))[name = string("op_5299_cast_fp16")]; tensor var_5300_cast_fp16 = mul(x = var_5299_cast_fp16, y = sin_cast_fp16)[name = string("op_5300_cast_fp16")]; tensor key_states_75_cast_fp16 = add(x = var_5293_cast_fp16, y = var_5300_cast_fp16)[name = string("key_states_75_cast_fp16")]; tensor expand_dims_181 = const()[name = string("expand_dims_181"), val = tensor([18])]; tensor expand_dims_182 = const()[name = string("expand_dims_182"), val = tensor([0])]; tensor expand_dims_184 = const()[name = string("expand_dims_184"), val = tensor([19])]; tensor concat_147_values3_0 = const()[name = string("concat_147_values3_0"), val = tensor([0])]; int32 concat_147_axis_0 = const()[name = string("concat_147_axis_0"), val = int32(0)]; bool concat_147_interleave_0 = const()[name = string("concat_147_interleave_0"), val = bool(false)]; tensor concat_147 = concat(axis = concat_147_axis_0, interleave = concat_147_interleave_0, values = (expand_dims_181, expand_dims_182, position_id, concat_147_values3_0))[name = string("concat_147")]; tensor concat_148_values1_0 = const()[name = string("concat_148_values1_0"), val = tensor([0])]; tensor concat_148_values3_0 = const()[name = string("concat_148_values3_0"), val = tensor([0])]; int32 concat_148_axis_0 = const()[name = string("concat_148_axis_0"), val = int32(0)]; bool concat_148_interleave_0 = const()[name = string("concat_148_interleave_0"), val = bool(false)]; tensor concat_148 = concat(axis = concat_148_axis_0, interleave = concat_148_interleave_0, values = (expand_dims_184, concat_148_values1_0, var_426, concat_148_values3_0))[name = string("concat_148")]; tensor key_cache_internal_tensor_assign_19_stride_0 = const()[name = string("key_cache_internal_tensor_assign_19_stride_0"), val = tensor([1, 1, 1, 1])]; tensor key_cache_internal_tensor_assign_19_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_19_begin_mask_0"), val = tensor([false, false, false, true])]; tensor key_cache_internal_tensor_assign_19_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_19_end_mask_0"), val = tensor([false, true, false, true])]; tensor key_cache_internal_tensor_assign_19_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_19_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor key_cache_internal_tensor_assign_19_cast_fp16 = slice_update(begin = concat_147, begin_mask = key_cache_internal_tensor_assign_19_begin_mask_0, end = concat_148, end_mask = key_cache_internal_tensor_assign_19_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_19_squeeze_mask_0, stride = key_cache_internal_tensor_assign_19_stride_0, update = key_states_75_cast_fp16, x = coreml_update_state_82)[name = string("key_cache_internal_tensor_assign_19_cast_fp16")]; write_state(data = key_cache_internal_tensor_assign_19_cast_fp16, input = key_cache)[name = string("coreml_update_state_394_write_state")]; tensor coreml_update_state_84 = read_state(input = key_cache)[name = string("coreml_update_state_394")]; tensor value_cache_internal_tensor_assign_19_stride_0 = const()[name = string("value_cache_internal_tensor_assign_19_stride_0"), val = tensor([1, 1, 1, 1])]; tensor value_cache_internal_tensor_assign_19_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_19_begin_mask_0"), val = tensor([false, false, false, true])]; tensor value_cache_internal_tensor_assign_19_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_19_end_mask_0"), val = tensor([false, true, false, true])]; tensor value_cache_internal_tensor_assign_19_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_19_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor value_states_75_cast_fp16 = transpose(perm = value_states_75_perm_0, x = var_5279_cast_fp16)[name = string("transpose_16")]; tensor value_cache_internal_tensor_assign_19_cast_fp16 = slice_update(begin = concat_147, begin_mask = value_cache_internal_tensor_assign_19_begin_mask_0, end = concat_148, end_mask = value_cache_internal_tensor_assign_19_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_19_squeeze_mask_0, stride = value_cache_internal_tensor_assign_19_stride_0, update = value_states_75_cast_fp16, x = coreml_update_state_83)[name = string("value_cache_internal_tensor_assign_19_cast_fp16")]; write_state(data = value_cache_internal_tensor_assign_19_cast_fp16, input = value_cache)[name = string("coreml_update_state_395_write_state")]; tensor coreml_update_state_85 = read_state(input = value_cache)[name = string("coreml_update_state_395")]; tensor var_5343_begin_0 = const()[name = string("op_5343_begin_0"), val = tensor([18, 0, 0, 0])]; tensor var_5343_end_0 = const()[name = string("op_5343_end_0"), val = tensor([19, 2, 2048, 64])]; tensor var_5343_end_mask_0 = const()[name = string("op_5343_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_5343_cast_fp16 = slice_by_index(begin = var_5343_begin_0, end = var_5343_end_0, end_mask = var_5343_end_mask_0, x = coreml_update_state_84)[name = string("op_5343_cast_fp16")]; tensor tile_36 = const()[name = string("tile_36"), val = tensor([1, 1])]; int32 var_5346_axis_0 = const()[name = string("op_5346_axis_0"), val = int32(1)]; tensor var_5346_cast_fp16_0, tensor var_5346_cast_fp16_1 = split(axis = var_5346_axis_0, split_sizes = tile_36, x = var_5343_cast_fp16)[name = string("op_5346_cast_fp16")]; tensor var_5353_begin_0 = const()[name = string("op_5353_begin_0"), val = tensor([18, 0, 0, 0])]; tensor var_5353_end_0 = const()[name = string("op_5353_end_0"), val = tensor([19, 2, 2048, 64])]; tensor var_5353_end_mask_0 = const()[name = string("op_5353_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_5353_cast_fp16 = slice_by_index(begin = var_5353_begin_0, end = var_5353_end_0, end_mask = var_5353_end_mask_0, x = coreml_update_state_85)[name = string("op_5353_cast_fp16")]; tensor tile_37 = const()[name = string("tile_37"), val = tensor([1, 1])]; int32 var_5356_axis_0 = const()[name = string("op_5356_axis_0"), val = int32(1)]; tensor var_5356_cast_fp16_0, tensor var_5356_cast_fp16_1 = split(axis = var_5356_axis_0, split_sizes = tile_37, x = var_5353_cast_fp16)[name = string("op_5356_cast_fp16")]; tensor var_5359_split_sizes_0 = const()[name = string("op_5359_split_sizes_0"), val = tensor([8, 8])]; int32 var_5359_axis_0 = const()[name = string("op_5359_axis_0"), val = int32(1)]; tensor var_5359_cast_fp16_0, tensor var_5359_cast_fp16_1 = split(axis = var_5359_axis_0, split_sizes = var_5359_split_sizes_0, x = query_states_75_cast_fp16)[name = string("op_5359_cast_fp16")]; bool attn_weights_289_transpose_x_0 = const()[name = string("attn_weights_289_transpose_x_0"), val = bool(false)]; bool attn_weights_289_transpose_y_0 = const()[name = string("attn_weights_289_transpose_y_0"), val = bool(false)]; tensor attn_weights_289_cast_fp16 = matmul(transpose_x = attn_weights_289_transpose_x_0, transpose_y = attn_weights_289_transpose_y_0, x = var_5346_cast_fp16_0, y = var_5359_cast_fp16_0)[name = string("attn_weights_289_cast_fp16")]; fp16 _inversed_attn_weights_291_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_291_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_291_cast_fp16 = mul(x = attn_weights_289_cast_fp16, y = _inversed_attn_weights_291_y_0_to_fp16)[name = string("_inversed_attn_weights_291_cast_fp16")]; tensor attn_weights_293_cast_fp16 = add(x = _inversed_attn_weights_291_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_293_cast_fp16")]; int32 var_5366 = const()[name = string("op_5366"), val = int32(2)]; tensor attn_weights_295_cast_fp16 = softmax(axis = var_5366, x = attn_weights_293_cast_fp16)[name = string("attn_weights_295_cast_fp16")]; bool var_5372_transpose_x_1 = const()[name = string("op_5372_transpose_x_1"), val = bool(true)]; bool var_5372_transpose_y_1 = const()[name = string("op_5372_transpose_y_1"), val = bool(false)]; tensor var_5372_cast_fp16 = matmul(transpose_x = var_5372_transpose_x_1, transpose_y = var_5372_transpose_y_1, x = attn_weights_295_cast_fp16, y = var_5356_cast_fp16_0)[name = string("op_5372_cast_fp16")]; bool attn_weights_297_transpose_x_0 = const()[name = string("attn_weights_297_transpose_x_0"), val = bool(false)]; bool attn_weights_297_transpose_y_0 = const()[name = string("attn_weights_297_transpose_y_0"), val = bool(false)]; tensor attn_weights_297_cast_fp16 = matmul(transpose_x = attn_weights_297_transpose_x_0, transpose_y = attn_weights_297_transpose_y_0, x = var_5346_cast_fp16_1, y = var_5359_cast_fp16_1)[name = string("attn_weights_297_cast_fp16")]; fp16 _inversed_attn_weights_299_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_299_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_299_cast_fp16 = mul(x = attn_weights_297_cast_fp16, y = _inversed_attn_weights_299_y_0_to_fp16)[name = string("_inversed_attn_weights_299_cast_fp16")]; tensor attn_weights_301_cast_fp16 = add(x = _inversed_attn_weights_299_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_301_cast_fp16")]; int32 var_5378 = const()[name = string("op_5378"), val = int32(2)]; tensor attn_weights_303_cast_fp16 = softmax(axis = var_5378, x = attn_weights_301_cast_fp16)[name = string("attn_weights_303_cast_fp16")]; bool attn_output_109_transpose_x_1 = const()[name = string("attn_output_109_transpose_x_1"), val = bool(true)]; bool attn_output_109_transpose_y_1 = const()[name = string("attn_output_109_transpose_y_1"), val = bool(false)]; tensor attn_output_109_cast_fp16 = matmul(transpose_x = attn_output_109_transpose_x_1, transpose_y = attn_output_109_transpose_y_1, x = attn_weights_303_cast_fp16, y = var_5356_cast_fp16_1)[name = string("attn_output_109_cast_fp16")]; int32 var_5386 = const()[name = string("op_5386"), val = int32(1)]; bool attn_output_111_interleave_0 = const()[name = string("attn_output_111_interleave_0"), val = bool(false)]; tensor attn_output_111_cast_fp16 = concat(axis = var_5386, interleave = attn_output_111_interleave_0, values = (var_5372_cast_fp16, attn_output_109_cast_fp16))[name = string("attn_output_111_cast_fp16")]; tensor var_5390_perm_0 = const()[name = string("op_5390_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_5395 = const()[name = string("op_5395"), val = tensor([1, 1024, 1, 64])]; tensor var_5390_cast_fp16 = transpose(perm = var_5390_perm_0, x = attn_output_111_cast_fp16)[name = string("transpose_15")]; tensor x_329_cast_fp16 = reshape(shape = var_5395, x = var_5390_cast_fp16)[name = string("x_329_cast_fp16")]; string hidden_states_111_pad_type_0 = const()[name = string("hidden_states_111_pad_type_0"), val = string("valid")]; tensor hidden_states_111_strides_0 = const()[name = string("hidden_states_111_strides_0"), val = tensor([1, 1])]; tensor hidden_states_111_pad_0 = const()[name = string("hidden_states_111_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_111_dilations_0 = const()[name = string("hidden_states_111_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_111_groups_0 = const()[name = string("hidden_states_111_groups_0"), val = int32(1)]; tensor var_5402_to_fp16 = const()[name = string("op_5402_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(549104320)))]; tensor hidden_states_111_cast_fp16 = conv(dilations = hidden_states_111_dilations_0, groups = hidden_states_111_groups_0, pad = hidden_states_111_pad_0, pad_type = hidden_states_111_pad_type_0, strides = hidden_states_111_strides_0, weight = var_5402_to_fp16, x = x_329_cast_fp16)[name = string("hidden_states_111_cast_fp16")]; tensor x_331_cast_fp16 = add(x = x_323_cast_fp16, y = hidden_states_111_cast_fp16)[name = string("x_331_cast_fp16")]; int32 var_5414 = const()[name = string("op_5414"), val = int32(1)]; fp16 const_193_promoted_to_fp16 = const()[name = string("const_193_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_5417_cast_fp16 = mul(x = x_331_cast_fp16, y = const_193_promoted_to_fp16)[name = string("op_5417_cast_fp16")]; bool x_333_interleave_0 = const()[name = string("x_333_interleave_0"), val = bool(false)]; tensor x_333_cast_fp16 = concat(axis = var_5414, interleave = x_333_interleave_0, values = (x_331_cast_fp16, var_5417_cast_fp16))[name = string("x_333_cast_fp16")]; tensor out_223_axes_0 = const()[name = string("out_223_axes_0"), val = tensor([1])]; fp16 var_5427_to_fp16 = const()[name = string("op_5427_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_223_cast_fp16 = layer_norm(axes = out_223_axes_0, epsilon = var_5427_to_fp16, x = x_333_cast_fp16)[name = string("out_223_cast_fp16")]; tensor layer_layers_18_post_attention_layernorm_weight_to_fp16 = const()[name = string("layer_layers_18_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(551201536)))]; tensor out_225_cast_fp16 = mul(x = out_223_cast_fp16, y = layer_layers_18_post_attention_layernorm_weight_to_fp16)[name = string("out_225_cast_fp16")]; tensor var_5433_split_sizes_0 = const()[name = string("op_5433_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_5433_axis_0 = const()[name = string("op_5433_axis_0"), val = int32(1)]; tensor var_5433_cast_fp16_0, tensor var_5433_cast_fp16_1 = split(axis = var_5433_axis_0, split_sizes = var_5433_split_sizes_0, x = out_225_cast_fp16)[name = string("op_5433_cast_fp16")]; string input_37_pad_type_0 = const()[name = string("input_37_pad_type_0"), val = string("valid")]; tensor input_37_strides_0 = const()[name = string("input_37_strides_0"), val = tensor([1, 1])]; tensor input_37_pad_0 = const()[name = string("input_37_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_37_dilations_0 = const()[name = string("input_37_dilations_0"), val = tensor([1, 1])]; int32 input_37_groups_0 = const()[name = string("input_37_groups_0"), val = int32(1)]; tensor var_5438_to_fp16 = const()[name = string("op_5438_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(551205696)))]; tensor input_37_cast_fp16 = conv(dilations = input_37_dilations_0, groups = input_37_groups_0, pad = input_37_pad_0, pad_type = input_37_pad_type_0, strides = input_37_strides_0, weight = var_5438_to_fp16, x = var_5433_cast_fp16_0)[name = string("input_37_cast_fp16")]; tensor var_5449_cast_fp16 = silu(x = input_37_cast_fp16)[name = string("op_5449_cast_fp16")]; string var_5454_pad_type_0 = const()[name = string("op_5454_pad_type_0"), val = string("valid")]; tensor var_5454_strides_0 = const()[name = string("op_5454_strides_0"), val = tensor([1, 1])]; tensor var_5454_pad_0 = const()[name = string("op_5454_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_5454_dilations_0 = const()[name = string("op_5454_dilations_0"), val = tensor([1, 1])]; int32 var_5454_groups_0 = const()[name = string("op_5454_groups_0"), val = int32(1)]; tensor var_5437_to_fp16 = const()[name = string("op_5437_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(559594368)))]; tensor var_5454_cast_fp16 = conv(dilations = var_5454_dilations_0, groups = var_5454_groups_0, pad = var_5454_pad_0, pad_type = var_5454_pad_type_0, strides = var_5454_strides_0, weight = var_5437_to_fp16, x = var_5433_cast_fp16_0)[name = string("op_5454_cast_fp16")]; tensor x_339_cast_fp16 = mul(x = var_5449_cast_fp16, y = var_5454_cast_fp16)[name = string("x_339_cast_fp16")]; string hidden_states_113_pad_type_0 = const()[name = string("hidden_states_113_pad_type_0"), val = string("valid")]; tensor hidden_states_113_strides_0 = const()[name = string("hidden_states_113_strides_0"), val = tensor([1, 1])]; tensor hidden_states_113_pad_0 = const()[name = string("hidden_states_113_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_113_dilations_0 = const()[name = string("hidden_states_113_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_113_groups_0 = const()[name = string("hidden_states_113_groups_0"), val = int32(1)]; tensor var_5436_to_fp16 = const()[name = string("op_5436_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(567983040)))]; tensor hidden_states_113_cast_fp16 = conv(dilations = hidden_states_113_dilations_0, groups = hidden_states_113_groups_0, pad = hidden_states_113_pad_0, pad_type = hidden_states_113_pad_type_0, strides = hidden_states_113_strides_0, weight = var_5436_to_fp16, x = x_339_cast_fp16)[name = string("hidden_states_113_cast_fp16")]; tensor x_341_cast_fp16 = add(x = x_331_cast_fp16, y = hidden_states_113_cast_fp16)[name = string("x_341_cast_fp16")]; int32 var_5467 = const()[name = string("op_5467"), val = int32(1)]; fp16 const_194_promoted_to_fp16 = const()[name = string("const_194_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_5470_cast_fp16 = mul(x = x_341_cast_fp16, y = const_194_promoted_to_fp16)[name = string("op_5470_cast_fp16")]; bool x_343_interleave_0 = const()[name = string("x_343_interleave_0"), val = bool(false)]; tensor x_343_cast_fp16 = concat(axis = var_5467, interleave = x_343_interleave_0, values = (x_341_cast_fp16, var_5470_cast_fp16))[name = string("x_343_cast_fp16")]; tensor out_229_axes_0 = const()[name = string("out_229_axes_0"), val = tensor([1])]; fp16 var_5480_to_fp16 = const()[name = string("op_5480_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_229_cast_fp16 = layer_norm(axes = out_229_axes_0, epsilon = var_5480_to_fp16, x = x_343_cast_fp16)[name = string("out_229_cast_fp16")]; tensor layer_layers_19_input_layernorm_weight_to_fp16 = const()[name = string("layer_layers_19_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(576371712)))]; tensor out_231_cast_fp16 = mul(x = out_229_cast_fp16, y = layer_layers_19_input_layernorm_weight_to_fp16)[name = string("out_231_cast_fp16")]; tensor var_5486_split_sizes_0 = const()[name = string("op_5486_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_5486_axis_0 = const()[name = string("op_5486_axis_0"), val = int32(1)]; tensor var_5486_cast_fp16_0, tensor var_5486_cast_fp16_1 = split(axis = var_5486_axis_0, split_sizes = var_5486_split_sizes_0, x = out_231_cast_fp16)[name = string("op_5486_cast_fp16")]; string query_states_77_pad_type_0 = const()[name = string("query_states_77_pad_type_0"), val = string("valid")]; tensor query_states_77_strides_0 = const()[name = string("query_states_77_strides_0"), val = tensor([1, 1])]; tensor query_states_77_pad_0 = const()[name = string("query_states_77_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_states_77_dilations_0 = const()[name = string("query_states_77_dilations_0"), val = tensor([1, 1])]; int32 query_states_77_groups_0 = const()[name = string("query_states_77_groups_0"), val = int32(1)]; tensor var_5508_to_fp16 = const()[name = string("op_5508_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(576375872)))]; tensor query_states_77_cast_fp16 = conv(dilations = query_states_77_dilations_0, groups = query_states_77_groups_0, pad = query_states_77_pad_0, pad_type = query_states_77_pad_type_0, strides = query_states_77_strides_0, weight = var_5508_to_fp16, x = var_5486_cast_fp16_0)[name = string("query_states_77_cast_fp16")]; string key_states_77_pad_type_0 = const()[name = string("key_states_77_pad_type_0"), val = string("valid")]; tensor key_states_77_strides_0 = const()[name = string("key_states_77_strides_0"), val = tensor([1, 1])]; tensor key_states_77_pad_0 = const()[name = string("key_states_77_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_states_77_dilations_0 = const()[name = string("key_states_77_dilations_0"), val = tensor([1, 1])]; int32 key_states_77_groups_0 = const()[name = string("key_states_77_groups_0"), val = int32(1)]; tensor var_5519_to_fp16 = const()[name = string("op_5519_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(578473088)))]; tensor key_states_77_cast_fp16 = conv(dilations = key_states_77_dilations_0, groups = key_states_77_groups_0, pad = key_states_77_pad_0, pad_type = key_states_77_pad_type_0, strides = key_states_77_strides_0, weight = var_5519_to_fp16, x = var_5486_cast_fp16_0)[name = string("key_states_77_cast_fp16")]; string value_states_77_pad_type_0 = const()[name = string("value_states_77_pad_type_0"), val = string("valid")]; tensor value_states_77_strides_0 = const()[name = string("value_states_77_strides_0"), val = tensor([1, 1])]; tensor value_states_77_pad_0 = const()[name = string("value_states_77_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_states_77_dilations_0 = const()[name = string("value_states_77_dilations_0"), val = tensor([1, 1])]; int32 value_states_77_groups_0 = const()[name = string("value_states_77_groups_0"), val = int32(1)]; tensor var_5530_to_fp16 = const()[name = string("op_5530_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(578735296)))]; tensor value_states_77_cast_fp16 = conv(dilations = value_states_77_dilations_0, groups = value_states_77_groups_0, pad = value_states_77_pad_0, pad_type = value_states_77_pad_type_0, strides = value_states_77_strides_0, weight = var_5530_to_fp16, x = var_5486_cast_fp16_0)[name = string("value_states_77_cast_fp16")]; tensor var_5538 = const()[name = string("op_5538"), val = tensor([1, 16, 64, 64])]; tensor embed_77_cast_fp16 = reshape(shape = var_5538, x = query_states_77_cast_fp16)[name = string("embed_77_cast_fp16")]; tensor var_5542 = const()[name = string("op_5542"), val = tensor([1, 2, 64, 64])]; tensor var_5543_cast_fp16 = reshape(shape = var_5542, x = key_states_77_cast_fp16)[name = string("op_5543_cast_fp16")]; tensor embed_79_perm_0 = const()[name = string("embed_79_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_5549 = const()[name = string("op_5549"), val = tensor([1, 2, 64, 64])]; tensor var_5550_cast_fp16 = reshape(shape = var_5549, x = value_states_77_cast_fp16)[name = string("op_5550_cast_fp16")]; tensor value_states_79_perm_0 = const()[name = string("value_states_79_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_5554_cast_fp16 = mul(x = embed_77_cast_fp16, y = cos_1_cast_fp16)[name = string("op_5554_cast_fp16")]; tensor var_5555_split_sizes_0 = const()[name = string("op_5555_split_sizes_0"), val = tensor([32, 32])]; int32 var_5555_axis_0 = const()[name = string("op_5555_axis_0"), val = int32(-2)]; tensor var_5555_cast_fp16_0, tensor var_5555_cast_fp16_1 = split(axis = var_5555_axis_0, split_sizes = var_5555_split_sizes_0, x = embed_77_cast_fp16)[name = string("op_5555_cast_fp16")]; fp16 const_197_promoted_to_fp16 = const()[name = string("const_197_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_5557_cast_fp16 = mul(x = var_5555_cast_fp16_1, y = const_197_promoted_to_fp16)[name = string("op_5557_cast_fp16")]; int32 var_5559 = const()[name = string("op_5559"), val = int32(-2)]; bool var_5560_interleave_0 = const()[name = string("op_5560_interleave_0"), val = bool(false)]; tensor var_5560_cast_fp16 = concat(axis = var_5559, interleave = var_5560_interleave_0, values = (var_5557_cast_fp16, var_5555_cast_fp16_0))[name = string("op_5560_cast_fp16")]; tensor var_5561_cast_fp16 = mul(x = var_5560_cast_fp16, y = sin_1_cast_fp16)[name = string("op_5561_cast_fp16")]; tensor query_states_79_cast_fp16 = add(x = var_5554_cast_fp16, y = var_5561_cast_fp16)[name = string("query_states_79_cast_fp16")]; tensor embed_79_cast_fp16 = transpose(perm = embed_79_perm_0, x = var_5543_cast_fp16)[name = string("transpose_14")]; tensor var_5564_cast_fp16 = mul(x = embed_79_cast_fp16, y = cos_cast_fp16)[name = string("op_5564_cast_fp16")]; tensor var_5565_split_sizes_0 = const()[name = string("op_5565_split_sizes_0"), val = tensor([32, 32])]; int32 var_5565_axis_0 = const()[name = string("op_5565_axis_0"), val = int32(-1)]; tensor var_5565_cast_fp16_0, tensor var_5565_cast_fp16_1 = split(axis = var_5565_axis_0, split_sizes = var_5565_split_sizes_0, x = embed_79_cast_fp16)[name = string("op_5565_cast_fp16")]; fp16 const_198_promoted_to_fp16 = const()[name = string("const_198_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_5567_cast_fp16 = mul(x = var_5565_cast_fp16_1, y = const_198_promoted_to_fp16)[name = string("op_5567_cast_fp16")]; int32 var_5569 = const()[name = string("op_5569"), val = int32(-1)]; bool var_5570_interleave_0 = const()[name = string("op_5570_interleave_0"), val = bool(false)]; tensor var_5570_cast_fp16 = concat(axis = var_5569, interleave = var_5570_interleave_0, values = (var_5567_cast_fp16, var_5565_cast_fp16_0))[name = string("op_5570_cast_fp16")]; tensor var_5571_cast_fp16 = mul(x = var_5570_cast_fp16, y = sin_cast_fp16)[name = string("op_5571_cast_fp16")]; tensor key_states_79_cast_fp16 = add(x = var_5564_cast_fp16, y = var_5571_cast_fp16)[name = string("key_states_79_cast_fp16")]; tensor expand_dims_191 = const()[name = string("expand_dims_191"), val = tensor([19])]; tensor expand_dims_192 = const()[name = string("expand_dims_192"), val = tensor([0])]; tensor expand_dims_194 = const()[name = string("expand_dims_194"), val = tensor([20])]; tensor concat_155_values3_0 = const()[name = string("concat_155_values3_0"), val = tensor([0])]; int32 concat_155_axis_0 = const()[name = string("concat_155_axis_0"), val = int32(0)]; bool concat_155_interleave_0 = const()[name = string("concat_155_interleave_0"), val = bool(false)]; tensor concat_155 = concat(axis = concat_155_axis_0, interleave = concat_155_interleave_0, values = (expand_dims_191, expand_dims_192, position_id, concat_155_values3_0))[name = string("concat_155")]; tensor concat_156_values1_0 = const()[name = string("concat_156_values1_0"), val = tensor([0])]; tensor concat_156_values3_0 = const()[name = string("concat_156_values3_0"), val = tensor([0])]; int32 concat_156_axis_0 = const()[name = string("concat_156_axis_0"), val = int32(0)]; bool concat_156_interleave_0 = const()[name = string("concat_156_interleave_0"), val = bool(false)]; tensor concat_156 = concat(axis = concat_156_axis_0, interleave = concat_156_interleave_0, values = (expand_dims_194, concat_156_values1_0, var_426, concat_156_values3_0))[name = string("concat_156")]; tensor key_cache_internal_tensor_assign_20_stride_0 = const()[name = string("key_cache_internal_tensor_assign_20_stride_0"), val = tensor([1, 1, 1, 1])]; tensor key_cache_internal_tensor_assign_20_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_20_begin_mask_0"), val = tensor([false, false, false, true])]; tensor key_cache_internal_tensor_assign_20_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_20_end_mask_0"), val = tensor([false, true, false, true])]; tensor key_cache_internal_tensor_assign_20_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_20_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor key_cache_internal_tensor_assign_20_cast_fp16 = slice_update(begin = concat_155, begin_mask = key_cache_internal_tensor_assign_20_begin_mask_0, end = concat_156, end_mask = key_cache_internal_tensor_assign_20_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_20_squeeze_mask_0, stride = key_cache_internal_tensor_assign_20_stride_0, update = key_states_79_cast_fp16, x = coreml_update_state_84)[name = string("key_cache_internal_tensor_assign_20_cast_fp16")]; write_state(data = key_cache_internal_tensor_assign_20_cast_fp16, input = key_cache)[name = string("coreml_update_state_396_write_state")]; tensor coreml_update_state_86 = read_state(input = key_cache)[name = string("coreml_update_state_396")]; tensor value_cache_internal_tensor_assign_20_stride_0 = const()[name = string("value_cache_internal_tensor_assign_20_stride_0"), val = tensor([1, 1, 1, 1])]; tensor value_cache_internal_tensor_assign_20_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_20_begin_mask_0"), val = tensor([false, false, false, true])]; tensor value_cache_internal_tensor_assign_20_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_20_end_mask_0"), val = tensor([false, true, false, true])]; tensor value_cache_internal_tensor_assign_20_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_20_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor value_states_79_cast_fp16 = transpose(perm = value_states_79_perm_0, x = var_5550_cast_fp16)[name = string("transpose_13")]; tensor value_cache_internal_tensor_assign_20_cast_fp16 = slice_update(begin = concat_155, begin_mask = value_cache_internal_tensor_assign_20_begin_mask_0, end = concat_156, end_mask = value_cache_internal_tensor_assign_20_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_20_squeeze_mask_0, stride = value_cache_internal_tensor_assign_20_stride_0, update = value_states_79_cast_fp16, x = coreml_update_state_85)[name = string("value_cache_internal_tensor_assign_20_cast_fp16")]; write_state(data = value_cache_internal_tensor_assign_20_cast_fp16, input = value_cache)[name = string("coreml_update_state_397_write_state")]; tensor coreml_update_state_87 = read_state(input = value_cache)[name = string("coreml_update_state_397")]; tensor var_5614_begin_0 = const()[name = string("op_5614_begin_0"), val = tensor([19, 0, 0, 0])]; tensor var_5614_end_0 = const()[name = string("op_5614_end_0"), val = tensor([20, 2, 2048, 64])]; tensor var_5614_end_mask_0 = const()[name = string("op_5614_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_5614_cast_fp16 = slice_by_index(begin = var_5614_begin_0, end = var_5614_end_0, end_mask = var_5614_end_mask_0, x = coreml_update_state_86)[name = string("op_5614_cast_fp16")]; tensor tile_38 = const()[name = string("tile_38"), val = tensor([1, 1])]; int32 var_5617_axis_0 = const()[name = string("op_5617_axis_0"), val = int32(1)]; tensor var_5617_cast_fp16_0, tensor var_5617_cast_fp16_1 = split(axis = var_5617_axis_0, split_sizes = tile_38, x = var_5614_cast_fp16)[name = string("op_5617_cast_fp16")]; tensor var_5624_begin_0 = const()[name = string("op_5624_begin_0"), val = tensor([19, 0, 0, 0])]; tensor var_5624_end_0 = const()[name = string("op_5624_end_0"), val = tensor([20, 2, 2048, 64])]; tensor var_5624_end_mask_0 = const()[name = string("op_5624_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_5624_cast_fp16 = slice_by_index(begin = var_5624_begin_0, end = var_5624_end_0, end_mask = var_5624_end_mask_0, x = coreml_update_state_87)[name = string("op_5624_cast_fp16")]; tensor tile_39 = const()[name = string("tile_39"), val = tensor([1, 1])]; int32 var_5627_axis_0 = const()[name = string("op_5627_axis_0"), val = int32(1)]; tensor var_5627_cast_fp16_0, tensor var_5627_cast_fp16_1 = split(axis = var_5627_axis_0, split_sizes = tile_39, x = var_5624_cast_fp16)[name = string("op_5627_cast_fp16")]; tensor var_5630_split_sizes_0 = const()[name = string("op_5630_split_sizes_0"), val = tensor([8, 8])]; int32 var_5630_axis_0 = const()[name = string("op_5630_axis_0"), val = int32(1)]; tensor var_5630_cast_fp16_0, tensor var_5630_cast_fp16_1 = split(axis = var_5630_axis_0, split_sizes = var_5630_split_sizes_0, x = query_states_79_cast_fp16)[name = string("op_5630_cast_fp16")]; bool attn_weights_305_transpose_x_0 = const()[name = string("attn_weights_305_transpose_x_0"), val = bool(false)]; bool attn_weights_305_transpose_y_0 = const()[name = string("attn_weights_305_transpose_y_0"), val = bool(false)]; tensor attn_weights_305_cast_fp16 = matmul(transpose_x = attn_weights_305_transpose_x_0, transpose_y = attn_weights_305_transpose_y_0, x = var_5617_cast_fp16_0, y = var_5630_cast_fp16_0)[name = string("attn_weights_305_cast_fp16")]; fp16 _inversed_attn_weights_307_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_307_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_307_cast_fp16 = mul(x = attn_weights_305_cast_fp16, y = _inversed_attn_weights_307_y_0_to_fp16)[name = string("_inversed_attn_weights_307_cast_fp16")]; tensor attn_weights_309_cast_fp16 = add(x = _inversed_attn_weights_307_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_309_cast_fp16")]; int32 var_5637 = const()[name = string("op_5637"), val = int32(2)]; tensor attn_weights_311_cast_fp16 = softmax(axis = var_5637, x = attn_weights_309_cast_fp16)[name = string("attn_weights_311_cast_fp16")]; bool var_5643_transpose_x_1 = const()[name = string("op_5643_transpose_x_1"), val = bool(true)]; bool var_5643_transpose_y_1 = const()[name = string("op_5643_transpose_y_1"), val = bool(false)]; tensor var_5643_cast_fp16 = matmul(transpose_x = var_5643_transpose_x_1, transpose_y = var_5643_transpose_y_1, x = attn_weights_311_cast_fp16, y = var_5627_cast_fp16_0)[name = string("op_5643_cast_fp16")]; bool attn_weights_313_transpose_x_0 = const()[name = string("attn_weights_313_transpose_x_0"), val = bool(false)]; bool attn_weights_313_transpose_y_0 = const()[name = string("attn_weights_313_transpose_y_0"), val = bool(false)]; tensor attn_weights_313_cast_fp16 = matmul(transpose_x = attn_weights_313_transpose_x_0, transpose_y = attn_weights_313_transpose_y_0, x = var_5617_cast_fp16_1, y = var_5630_cast_fp16_1)[name = string("attn_weights_313_cast_fp16")]; fp16 _inversed_attn_weights_315_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_315_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_315_cast_fp16 = mul(x = attn_weights_313_cast_fp16, y = _inversed_attn_weights_315_y_0_to_fp16)[name = string("_inversed_attn_weights_315_cast_fp16")]; tensor attn_weights_317_cast_fp16 = add(x = _inversed_attn_weights_315_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_317_cast_fp16")]; int32 var_5649 = const()[name = string("op_5649"), val = int32(2)]; tensor attn_weights_319_cast_fp16 = softmax(axis = var_5649, x = attn_weights_317_cast_fp16)[name = string("attn_weights_319_cast_fp16")]; bool attn_output_115_transpose_x_1 = const()[name = string("attn_output_115_transpose_x_1"), val = bool(true)]; bool attn_output_115_transpose_y_1 = const()[name = string("attn_output_115_transpose_y_1"), val = bool(false)]; tensor attn_output_115_cast_fp16 = matmul(transpose_x = attn_output_115_transpose_x_1, transpose_y = attn_output_115_transpose_y_1, x = attn_weights_319_cast_fp16, y = var_5627_cast_fp16_1)[name = string("attn_output_115_cast_fp16")]; int32 var_5657 = const()[name = string("op_5657"), val = int32(1)]; bool attn_output_117_interleave_0 = const()[name = string("attn_output_117_interleave_0"), val = bool(false)]; tensor attn_output_117_cast_fp16 = concat(axis = var_5657, interleave = attn_output_117_interleave_0, values = (var_5643_cast_fp16, attn_output_115_cast_fp16))[name = string("attn_output_117_cast_fp16")]; tensor var_5661_perm_0 = const()[name = string("op_5661_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_5666 = const()[name = string("op_5666"), val = tensor([1, 1024, 1, 64])]; tensor var_5661_cast_fp16 = transpose(perm = var_5661_perm_0, x = attn_output_117_cast_fp16)[name = string("transpose_12")]; tensor x_347_cast_fp16 = reshape(shape = var_5666, x = var_5661_cast_fp16)[name = string("x_347_cast_fp16")]; string hidden_states_117_pad_type_0 = const()[name = string("hidden_states_117_pad_type_0"), val = string("valid")]; tensor hidden_states_117_strides_0 = const()[name = string("hidden_states_117_strides_0"), val = tensor([1, 1])]; tensor hidden_states_117_pad_0 = const()[name = string("hidden_states_117_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_117_dilations_0 = const()[name = string("hidden_states_117_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_117_groups_0 = const()[name = string("hidden_states_117_groups_0"), val = int32(1)]; tensor var_5673_to_fp16 = const()[name = string("op_5673_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(578997504)))]; tensor hidden_states_117_cast_fp16 = conv(dilations = hidden_states_117_dilations_0, groups = hidden_states_117_groups_0, pad = hidden_states_117_pad_0, pad_type = hidden_states_117_pad_type_0, strides = hidden_states_117_strides_0, weight = var_5673_to_fp16, x = x_347_cast_fp16)[name = string("hidden_states_117_cast_fp16")]; tensor x_349_cast_fp16 = add(x = x_341_cast_fp16, y = hidden_states_117_cast_fp16)[name = string("x_349_cast_fp16")]; int32 var_5685 = const()[name = string("op_5685"), val = int32(1)]; fp16 const_203_promoted_to_fp16 = const()[name = string("const_203_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_5688_cast_fp16 = mul(x = x_349_cast_fp16, y = const_203_promoted_to_fp16)[name = string("op_5688_cast_fp16")]; bool x_351_interleave_0 = const()[name = string("x_351_interleave_0"), val = bool(false)]; tensor x_351_cast_fp16 = concat(axis = var_5685, interleave = x_351_interleave_0, values = (x_349_cast_fp16, var_5688_cast_fp16))[name = string("x_351_cast_fp16")]; tensor out_235_axes_0 = const()[name = string("out_235_axes_0"), val = tensor([1])]; fp16 var_5698_to_fp16 = const()[name = string("op_5698_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_235_cast_fp16 = layer_norm(axes = out_235_axes_0, epsilon = var_5698_to_fp16, x = x_351_cast_fp16)[name = string("out_235_cast_fp16")]; tensor layer_layers_19_post_attention_layernorm_weight_to_fp16 = const()[name = string("layer_layers_19_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(581094720)))]; tensor out_237_cast_fp16 = mul(x = out_235_cast_fp16, y = layer_layers_19_post_attention_layernorm_weight_to_fp16)[name = string("out_237_cast_fp16")]; tensor var_5704_split_sizes_0 = const()[name = string("op_5704_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_5704_axis_0 = const()[name = string("op_5704_axis_0"), val = int32(1)]; tensor var_5704_cast_fp16_0, tensor var_5704_cast_fp16_1 = split(axis = var_5704_axis_0, split_sizes = var_5704_split_sizes_0, x = out_237_cast_fp16)[name = string("op_5704_cast_fp16")]; string input_39_pad_type_0 = const()[name = string("input_39_pad_type_0"), val = string("valid")]; tensor input_39_strides_0 = const()[name = string("input_39_strides_0"), val = tensor([1, 1])]; tensor input_39_pad_0 = const()[name = string("input_39_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_39_dilations_0 = const()[name = string("input_39_dilations_0"), val = tensor([1, 1])]; int32 input_39_groups_0 = const()[name = string("input_39_groups_0"), val = int32(1)]; tensor var_5709_to_fp16 = const()[name = string("op_5709_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(581098880)))]; tensor input_39_cast_fp16 = conv(dilations = input_39_dilations_0, groups = input_39_groups_0, pad = input_39_pad_0, pad_type = input_39_pad_type_0, strides = input_39_strides_0, weight = var_5709_to_fp16, x = var_5704_cast_fp16_0)[name = string("input_39_cast_fp16")]; tensor var_5720_cast_fp16 = silu(x = input_39_cast_fp16)[name = string("op_5720_cast_fp16")]; string var_5725_pad_type_0 = const()[name = string("op_5725_pad_type_0"), val = string("valid")]; tensor var_5725_strides_0 = const()[name = string("op_5725_strides_0"), val = tensor([1, 1])]; tensor var_5725_pad_0 = const()[name = string("op_5725_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_5725_dilations_0 = const()[name = string("op_5725_dilations_0"), val = tensor([1, 1])]; int32 var_5725_groups_0 = const()[name = string("op_5725_groups_0"), val = int32(1)]; tensor var_5708_to_fp16 = const()[name = string("op_5708_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(589487552)))]; tensor var_5725_cast_fp16 = conv(dilations = var_5725_dilations_0, groups = var_5725_groups_0, pad = var_5725_pad_0, pad_type = var_5725_pad_type_0, strides = var_5725_strides_0, weight = var_5708_to_fp16, x = var_5704_cast_fp16_0)[name = string("op_5725_cast_fp16")]; tensor x_357_cast_fp16 = mul(x = var_5720_cast_fp16, y = var_5725_cast_fp16)[name = string("x_357_cast_fp16")]; string hidden_states_119_pad_type_0 = const()[name = string("hidden_states_119_pad_type_0"), val = string("valid")]; tensor hidden_states_119_strides_0 = const()[name = string("hidden_states_119_strides_0"), val = tensor([1, 1])]; tensor hidden_states_119_pad_0 = const()[name = string("hidden_states_119_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_119_dilations_0 = const()[name = string("hidden_states_119_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_119_groups_0 = const()[name = string("hidden_states_119_groups_0"), val = int32(1)]; tensor var_5707_to_fp16 = const()[name = string("op_5707_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(597876224)))]; tensor hidden_states_119_cast_fp16 = conv(dilations = hidden_states_119_dilations_0, groups = hidden_states_119_groups_0, pad = hidden_states_119_pad_0, pad_type = hidden_states_119_pad_type_0, strides = hidden_states_119_strides_0, weight = var_5707_to_fp16, x = x_357_cast_fp16)[name = string("hidden_states_119_cast_fp16")]; tensor x_359_cast_fp16 = add(x = x_349_cast_fp16, y = hidden_states_119_cast_fp16)[name = string("x_359_cast_fp16")]; int32 var_5738 = const()[name = string("op_5738"), val = int32(1)]; fp16 const_204_promoted_to_fp16 = const()[name = string("const_204_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_5741_cast_fp16 = mul(x = x_359_cast_fp16, y = const_204_promoted_to_fp16)[name = string("op_5741_cast_fp16")]; bool x_361_interleave_0 = const()[name = string("x_361_interleave_0"), val = bool(false)]; tensor x_361_cast_fp16 = concat(axis = var_5738, interleave = x_361_interleave_0, values = (x_359_cast_fp16, var_5741_cast_fp16))[name = string("x_361_cast_fp16")]; tensor out_241_axes_0 = const()[name = string("out_241_axes_0"), val = tensor([1])]; fp16 var_5751_to_fp16 = const()[name = string("op_5751_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_241_cast_fp16 = layer_norm(axes = out_241_axes_0, epsilon = var_5751_to_fp16, x = x_361_cast_fp16)[name = string("out_241_cast_fp16")]; tensor layer_layers_20_input_layernorm_weight_to_fp16 = const()[name = string("layer_layers_20_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(606264896)))]; tensor out_243_cast_fp16 = mul(x = out_241_cast_fp16, y = layer_layers_20_input_layernorm_weight_to_fp16)[name = string("out_243_cast_fp16")]; tensor var_5757_split_sizes_0 = const()[name = string("op_5757_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_5757_axis_0 = const()[name = string("op_5757_axis_0"), val = int32(1)]; tensor var_5757_cast_fp16_0, tensor var_5757_cast_fp16_1 = split(axis = var_5757_axis_0, split_sizes = var_5757_split_sizes_0, x = out_243_cast_fp16)[name = string("op_5757_cast_fp16")]; string query_states_81_pad_type_0 = const()[name = string("query_states_81_pad_type_0"), val = string("valid")]; tensor query_states_81_strides_0 = const()[name = string("query_states_81_strides_0"), val = tensor([1, 1])]; tensor query_states_81_pad_0 = const()[name = string("query_states_81_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_states_81_dilations_0 = const()[name = string("query_states_81_dilations_0"), val = tensor([1, 1])]; int32 query_states_81_groups_0 = const()[name = string("query_states_81_groups_0"), val = int32(1)]; tensor var_5779_to_fp16 = const()[name = string("op_5779_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(606269056)))]; tensor query_states_81_cast_fp16 = conv(dilations = query_states_81_dilations_0, groups = query_states_81_groups_0, pad = query_states_81_pad_0, pad_type = query_states_81_pad_type_0, strides = query_states_81_strides_0, weight = var_5779_to_fp16, x = var_5757_cast_fp16_0)[name = string("query_states_81_cast_fp16")]; string key_states_81_pad_type_0 = const()[name = string("key_states_81_pad_type_0"), val = string("valid")]; tensor key_states_81_strides_0 = const()[name = string("key_states_81_strides_0"), val = tensor([1, 1])]; tensor key_states_81_pad_0 = const()[name = string("key_states_81_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_states_81_dilations_0 = const()[name = string("key_states_81_dilations_0"), val = tensor([1, 1])]; int32 key_states_81_groups_0 = const()[name = string("key_states_81_groups_0"), val = int32(1)]; tensor var_5790_to_fp16 = const()[name = string("op_5790_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(608366272)))]; tensor key_states_81_cast_fp16 = conv(dilations = key_states_81_dilations_0, groups = key_states_81_groups_0, pad = key_states_81_pad_0, pad_type = key_states_81_pad_type_0, strides = key_states_81_strides_0, weight = var_5790_to_fp16, x = var_5757_cast_fp16_0)[name = string("key_states_81_cast_fp16")]; string value_states_81_pad_type_0 = const()[name = string("value_states_81_pad_type_0"), val = string("valid")]; tensor value_states_81_strides_0 = const()[name = string("value_states_81_strides_0"), val = tensor([1, 1])]; tensor value_states_81_pad_0 = const()[name = string("value_states_81_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_states_81_dilations_0 = const()[name = string("value_states_81_dilations_0"), val = tensor([1, 1])]; int32 value_states_81_groups_0 = const()[name = string("value_states_81_groups_0"), val = int32(1)]; tensor var_5801_to_fp16 = const()[name = string("op_5801_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(608628480)))]; tensor value_states_81_cast_fp16 = conv(dilations = value_states_81_dilations_0, groups = value_states_81_groups_0, pad = value_states_81_pad_0, pad_type = value_states_81_pad_type_0, strides = value_states_81_strides_0, weight = var_5801_to_fp16, x = var_5757_cast_fp16_0)[name = string("value_states_81_cast_fp16")]; tensor var_5809 = const()[name = string("op_5809"), val = tensor([1, 16, 64, 64])]; tensor embed_81_cast_fp16 = reshape(shape = var_5809, x = query_states_81_cast_fp16)[name = string("embed_81_cast_fp16")]; tensor var_5813 = const()[name = string("op_5813"), val = tensor([1, 2, 64, 64])]; tensor var_5814_cast_fp16 = reshape(shape = var_5813, x = key_states_81_cast_fp16)[name = string("op_5814_cast_fp16")]; tensor embed_83_perm_0 = const()[name = string("embed_83_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_5820 = const()[name = string("op_5820"), val = tensor([1, 2, 64, 64])]; tensor var_5821_cast_fp16 = reshape(shape = var_5820, x = value_states_81_cast_fp16)[name = string("op_5821_cast_fp16")]; tensor value_states_83_perm_0 = const()[name = string("value_states_83_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_5825_cast_fp16 = mul(x = embed_81_cast_fp16, y = cos_1_cast_fp16)[name = string("op_5825_cast_fp16")]; tensor var_5826_split_sizes_0 = const()[name = string("op_5826_split_sizes_0"), val = tensor([32, 32])]; int32 var_5826_axis_0 = const()[name = string("op_5826_axis_0"), val = int32(-2)]; tensor var_5826_cast_fp16_0, tensor var_5826_cast_fp16_1 = split(axis = var_5826_axis_0, split_sizes = var_5826_split_sizes_0, x = embed_81_cast_fp16)[name = string("op_5826_cast_fp16")]; fp16 const_207_promoted_to_fp16 = const()[name = string("const_207_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_5828_cast_fp16 = mul(x = var_5826_cast_fp16_1, y = const_207_promoted_to_fp16)[name = string("op_5828_cast_fp16")]; int32 var_5830 = const()[name = string("op_5830"), val = int32(-2)]; bool var_5831_interleave_0 = const()[name = string("op_5831_interleave_0"), val = bool(false)]; tensor var_5831_cast_fp16 = concat(axis = var_5830, interleave = var_5831_interleave_0, values = (var_5828_cast_fp16, var_5826_cast_fp16_0))[name = string("op_5831_cast_fp16")]; tensor var_5832_cast_fp16 = mul(x = var_5831_cast_fp16, y = sin_1_cast_fp16)[name = string("op_5832_cast_fp16")]; tensor query_states_83_cast_fp16 = add(x = var_5825_cast_fp16, y = var_5832_cast_fp16)[name = string("query_states_83_cast_fp16")]; tensor embed_83_cast_fp16 = transpose(perm = embed_83_perm_0, x = var_5814_cast_fp16)[name = string("transpose_11")]; tensor var_5835_cast_fp16 = mul(x = embed_83_cast_fp16, y = cos_cast_fp16)[name = string("op_5835_cast_fp16")]; tensor var_5836_split_sizes_0 = const()[name = string("op_5836_split_sizes_0"), val = tensor([32, 32])]; int32 var_5836_axis_0 = const()[name = string("op_5836_axis_0"), val = int32(-1)]; tensor var_5836_cast_fp16_0, tensor var_5836_cast_fp16_1 = split(axis = var_5836_axis_0, split_sizes = var_5836_split_sizes_0, x = embed_83_cast_fp16)[name = string("op_5836_cast_fp16")]; fp16 const_208_promoted_to_fp16 = const()[name = string("const_208_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_5838_cast_fp16 = mul(x = var_5836_cast_fp16_1, y = const_208_promoted_to_fp16)[name = string("op_5838_cast_fp16")]; int32 var_5840 = const()[name = string("op_5840"), val = int32(-1)]; bool var_5841_interleave_0 = const()[name = string("op_5841_interleave_0"), val = bool(false)]; tensor var_5841_cast_fp16 = concat(axis = var_5840, interleave = var_5841_interleave_0, values = (var_5838_cast_fp16, var_5836_cast_fp16_0))[name = string("op_5841_cast_fp16")]; tensor var_5842_cast_fp16 = mul(x = var_5841_cast_fp16, y = sin_cast_fp16)[name = string("op_5842_cast_fp16")]; tensor key_states_83_cast_fp16 = add(x = var_5835_cast_fp16, y = var_5842_cast_fp16)[name = string("key_states_83_cast_fp16")]; tensor expand_dims_201 = const()[name = string("expand_dims_201"), val = tensor([20])]; tensor expand_dims_202 = const()[name = string("expand_dims_202"), val = tensor([0])]; tensor expand_dims_204 = const()[name = string("expand_dims_204"), val = tensor([21])]; tensor concat_163_values3_0 = const()[name = string("concat_163_values3_0"), val = tensor([0])]; int32 concat_163_axis_0 = const()[name = string("concat_163_axis_0"), val = int32(0)]; bool concat_163_interleave_0 = const()[name = string("concat_163_interleave_0"), val = bool(false)]; tensor concat_163 = concat(axis = concat_163_axis_0, interleave = concat_163_interleave_0, values = (expand_dims_201, expand_dims_202, position_id, concat_163_values3_0))[name = string("concat_163")]; tensor concat_164_values1_0 = const()[name = string("concat_164_values1_0"), val = tensor([0])]; tensor concat_164_values3_0 = const()[name = string("concat_164_values3_0"), val = tensor([0])]; int32 concat_164_axis_0 = const()[name = string("concat_164_axis_0"), val = int32(0)]; bool concat_164_interleave_0 = const()[name = string("concat_164_interleave_0"), val = bool(false)]; tensor concat_164 = concat(axis = concat_164_axis_0, interleave = concat_164_interleave_0, values = (expand_dims_204, concat_164_values1_0, var_426, concat_164_values3_0))[name = string("concat_164")]; tensor key_cache_internal_tensor_assign_21_stride_0 = const()[name = string("key_cache_internal_tensor_assign_21_stride_0"), val = tensor([1, 1, 1, 1])]; tensor key_cache_internal_tensor_assign_21_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_21_begin_mask_0"), val = tensor([false, false, false, true])]; tensor key_cache_internal_tensor_assign_21_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_21_end_mask_0"), val = tensor([false, true, false, true])]; tensor key_cache_internal_tensor_assign_21_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_21_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor key_cache_internal_tensor_assign_21_cast_fp16 = slice_update(begin = concat_163, begin_mask = key_cache_internal_tensor_assign_21_begin_mask_0, end = concat_164, end_mask = key_cache_internal_tensor_assign_21_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_21_squeeze_mask_0, stride = key_cache_internal_tensor_assign_21_stride_0, update = key_states_83_cast_fp16, x = coreml_update_state_86)[name = string("key_cache_internal_tensor_assign_21_cast_fp16")]; write_state(data = key_cache_internal_tensor_assign_21_cast_fp16, input = key_cache)[name = string("coreml_update_state_398_write_state")]; tensor coreml_update_state_88 = read_state(input = key_cache)[name = string("coreml_update_state_398")]; tensor value_cache_internal_tensor_assign_21_stride_0 = const()[name = string("value_cache_internal_tensor_assign_21_stride_0"), val = tensor([1, 1, 1, 1])]; tensor value_cache_internal_tensor_assign_21_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_21_begin_mask_0"), val = tensor([false, false, false, true])]; tensor value_cache_internal_tensor_assign_21_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_21_end_mask_0"), val = tensor([false, true, false, true])]; tensor value_cache_internal_tensor_assign_21_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_21_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor value_states_83_cast_fp16 = transpose(perm = value_states_83_perm_0, x = var_5821_cast_fp16)[name = string("transpose_10")]; tensor value_cache_internal_tensor_assign_21_cast_fp16 = slice_update(begin = concat_163, begin_mask = value_cache_internal_tensor_assign_21_begin_mask_0, end = concat_164, end_mask = value_cache_internal_tensor_assign_21_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_21_squeeze_mask_0, stride = value_cache_internal_tensor_assign_21_stride_0, update = value_states_83_cast_fp16, x = coreml_update_state_87)[name = string("value_cache_internal_tensor_assign_21_cast_fp16")]; write_state(data = value_cache_internal_tensor_assign_21_cast_fp16, input = value_cache)[name = string("coreml_update_state_399_write_state")]; tensor coreml_update_state_89 = read_state(input = value_cache)[name = string("coreml_update_state_399")]; tensor var_5885_begin_0 = const()[name = string("op_5885_begin_0"), val = tensor([20, 0, 0, 0])]; tensor var_5885_end_0 = const()[name = string("op_5885_end_0"), val = tensor([21, 2, 2048, 64])]; tensor var_5885_end_mask_0 = const()[name = string("op_5885_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_5885_cast_fp16 = slice_by_index(begin = var_5885_begin_0, end = var_5885_end_0, end_mask = var_5885_end_mask_0, x = coreml_update_state_88)[name = string("op_5885_cast_fp16")]; tensor tile_40 = const()[name = string("tile_40"), val = tensor([1, 1])]; int32 var_5888_axis_0 = const()[name = string("op_5888_axis_0"), val = int32(1)]; tensor var_5888_cast_fp16_0, tensor var_5888_cast_fp16_1 = split(axis = var_5888_axis_0, split_sizes = tile_40, x = var_5885_cast_fp16)[name = string("op_5888_cast_fp16")]; tensor var_5895_begin_0 = const()[name = string("op_5895_begin_0"), val = tensor([20, 0, 0, 0])]; tensor var_5895_end_0 = const()[name = string("op_5895_end_0"), val = tensor([21, 2, 2048, 64])]; tensor var_5895_end_mask_0 = const()[name = string("op_5895_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_5895_cast_fp16 = slice_by_index(begin = var_5895_begin_0, end = var_5895_end_0, end_mask = var_5895_end_mask_0, x = coreml_update_state_89)[name = string("op_5895_cast_fp16")]; tensor tile_41 = const()[name = string("tile_41"), val = tensor([1, 1])]; int32 var_5898_axis_0 = const()[name = string("op_5898_axis_0"), val = int32(1)]; tensor var_5898_cast_fp16_0, tensor var_5898_cast_fp16_1 = split(axis = var_5898_axis_0, split_sizes = tile_41, x = var_5895_cast_fp16)[name = string("op_5898_cast_fp16")]; tensor var_5901_split_sizes_0 = const()[name = string("op_5901_split_sizes_0"), val = tensor([8, 8])]; int32 var_5901_axis_0 = const()[name = string("op_5901_axis_0"), val = int32(1)]; tensor var_5901_cast_fp16_0, tensor var_5901_cast_fp16_1 = split(axis = var_5901_axis_0, split_sizes = var_5901_split_sizes_0, x = query_states_83_cast_fp16)[name = string("op_5901_cast_fp16")]; bool attn_weights_321_transpose_x_0 = const()[name = string("attn_weights_321_transpose_x_0"), val = bool(false)]; bool attn_weights_321_transpose_y_0 = const()[name = string("attn_weights_321_transpose_y_0"), val = bool(false)]; tensor attn_weights_321_cast_fp16 = matmul(transpose_x = attn_weights_321_transpose_x_0, transpose_y = attn_weights_321_transpose_y_0, x = var_5888_cast_fp16_0, y = var_5901_cast_fp16_0)[name = string("attn_weights_321_cast_fp16")]; fp16 _inversed_attn_weights_323_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_323_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_323_cast_fp16 = mul(x = attn_weights_321_cast_fp16, y = _inversed_attn_weights_323_y_0_to_fp16)[name = string("_inversed_attn_weights_323_cast_fp16")]; tensor attn_weights_325_cast_fp16 = add(x = _inversed_attn_weights_323_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_325_cast_fp16")]; int32 var_5908 = const()[name = string("op_5908"), val = int32(2)]; tensor attn_weights_327_cast_fp16 = softmax(axis = var_5908, x = attn_weights_325_cast_fp16)[name = string("attn_weights_327_cast_fp16")]; bool var_5914_transpose_x_1 = const()[name = string("op_5914_transpose_x_1"), val = bool(true)]; bool var_5914_transpose_y_1 = const()[name = string("op_5914_transpose_y_1"), val = bool(false)]; tensor var_5914_cast_fp16 = matmul(transpose_x = var_5914_transpose_x_1, transpose_y = var_5914_transpose_y_1, x = attn_weights_327_cast_fp16, y = var_5898_cast_fp16_0)[name = string("op_5914_cast_fp16")]; bool attn_weights_329_transpose_x_0 = const()[name = string("attn_weights_329_transpose_x_0"), val = bool(false)]; bool attn_weights_329_transpose_y_0 = const()[name = string("attn_weights_329_transpose_y_0"), val = bool(false)]; tensor attn_weights_329_cast_fp16 = matmul(transpose_x = attn_weights_329_transpose_x_0, transpose_y = attn_weights_329_transpose_y_0, x = var_5888_cast_fp16_1, y = var_5901_cast_fp16_1)[name = string("attn_weights_329_cast_fp16")]; fp16 _inversed_attn_weights_331_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_331_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_331_cast_fp16 = mul(x = attn_weights_329_cast_fp16, y = _inversed_attn_weights_331_y_0_to_fp16)[name = string("_inversed_attn_weights_331_cast_fp16")]; tensor attn_weights_333_cast_fp16 = add(x = _inversed_attn_weights_331_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_333_cast_fp16")]; int32 var_5920 = const()[name = string("op_5920"), val = int32(2)]; tensor attn_weights_335_cast_fp16 = softmax(axis = var_5920, x = attn_weights_333_cast_fp16)[name = string("attn_weights_335_cast_fp16")]; bool attn_output_121_transpose_x_1 = const()[name = string("attn_output_121_transpose_x_1"), val = bool(true)]; bool attn_output_121_transpose_y_1 = const()[name = string("attn_output_121_transpose_y_1"), val = bool(false)]; tensor attn_output_121_cast_fp16 = matmul(transpose_x = attn_output_121_transpose_x_1, transpose_y = attn_output_121_transpose_y_1, x = attn_weights_335_cast_fp16, y = var_5898_cast_fp16_1)[name = string("attn_output_121_cast_fp16")]; int32 var_5928 = const()[name = string("op_5928"), val = int32(1)]; bool attn_output_123_interleave_0 = const()[name = string("attn_output_123_interleave_0"), val = bool(false)]; tensor attn_output_123_cast_fp16 = concat(axis = var_5928, interleave = attn_output_123_interleave_0, values = (var_5914_cast_fp16, attn_output_121_cast_fp16))[name = string("attn_output_123_cast_fp16")]; tensor var_5932_perm_0 = const()[name = string("op_5932_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_5937 = const()[name = string("op_5937"), val = tensor([1, 1024, 1, 64])]; tensor var_5932_cast_fp16 = transpose(perm = var_5932_perm_0, x = attn_output_123_cast_fp16)[name = string("transpose_9")]; tensor x_365_cast_fp16 = reshape(shape = var_5937, x = var_5932_cast_fp16)[name = string("x_365_cast_fp16")]; string hidden_states_123_pad_type_0 = const()[name = string("hidden_states_123_pad_type_0"), val = string("valid")]; tensor hidden_states_123_strides_0 = const()[name = string("hidden_states_123_strides_0"), val = tensor([1, 1])]; tensor hidden_states_123_pad_0 = const()[name = string("hidden_states_123_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_123_dilations_0 = const()[name = string("hidden_states_123_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_123_groups_0 = const()[name = string("hidden_states_123_groups_0"), val = int32(1)]; tensor var_5944_to_fp16 = const()[name = string("op_5944_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(608890688)))]; tensor hidden_states_123_cast_fp16 = conv(dilations = hidden_states_123_dilations_0, groups = hidden_states_123_groups_0, pad = hidden_states_123_pad_0, pad_type = hidden_states_123_pad_type_0, strides = hidden_states_123_strides_0, weight = var_5944_to_fp16, x = x_365_cast_fp16)[name = string("hidden_states_123_cast_fp16")]; tensor x_367_cast_fp16 = add(x = x_359_cast_fp16, y = hidden_states_123_cast_fp16)[name = string("x_367_cast_fp16")]; int32 var_5956 = const()[name = string("op_5956"), val = int32(1)]; fp16 const_213_promoted_to_fp16 = const()[name = string("const_213_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_5959_cast_fp16 = mul(x = x_367_cast_fp16, y = const_213_promoted_to_fp16)[name = string("op_5959_cast_fp16")]; bool x_369_interleave_0 = const()[name = string("x_369_interleave_0"), val = bool(false)]; tensor x_369_cast_fp16 = concat(axis = var_5956, interleave = x_369_interleave_0, values = (x_367_cast_fp16, var_5959_cast_fp16))[name = string("x_369_cast_fp16")]; tensor out_247_axes_0 = const()[name = string("out_247_axes_0"), val = tensor([1])]; fp16 var_5969_to_fp16 = const()[name = string("op_5969_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_247_cast_fp16 = layer_norm(axes = out_247_axes_0, epsilon = var_5969_to_fp16, x = x_369_cast_fp16)[name = string("out_247_cast_fp16")]; tensor layer_layers_20_post_attention_layernorm_weight_to_fp16 = const()[name = string("layer_layers_20_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(610987904)))]; tensor out_249_cast_fp16 = mul(x = out_247_cast_fp16, y = layer_layers_20_post_attention_layernorm_weight_to_fp16)[name = string("out_249_cast_fp16")]; tensor var_5975_split_sizes_0 = const()[name = string("op_5975_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_5975_axis_0 = const()[name = string("op_5975_axis_0"), val = int32(1)]; tensor var_5975_cast_fp16_0, tensor var_5975_cast_fp16_1 = split(axis = var_5975_axis_0, split_sizes = var_5975_split_sizes_0, x = out_249_cast_fp16)[name = string("op_5975_cast_fp16")]; string input_41_pad_type_0 = const()[name = string("input_41_pad_type_0"), val = string("valid")]; tensor input_41_strides_0 = const()[name = string("input_41_strides_0"), val = tensor([1, 1])]; tensor input_41_pad_0 = const()[name = string("input_41_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_41_dilations_0 = const()[name = string("input_41_dilations_0"), val = tensor([1, 1])]; int32 input_41_groups_0 = const()[name = string("input_41_groups_0"), val = int32(1)]; tensor var_5980_to_fp16 = const()[name = string("op_5980_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(610992064)))]; tensor input_41_cast_fp16 = conv(dilations = input_41_dilations_0, groups = input_41_groups_0, pad = input_41_pad_0, pad_type = input_41_pad_type_0, strides = input_41_strides_0, weight = var_5980_to_fp16, x = var_5975_cast_fp16_0)[name = string("input_41_cast_fp16")]; tensor var_5991_cast_fp16 = silu(x = input_41_cast_fp16)[name = string("op_5991_cast_fp16")]; string var_5996_pad_type_0 = const()[name = string("op_5996_pad_type_0"), val = string("valid")]; tensor var_5996_strides_0 = const()[name = string("op_5996_strides_0"), val = tensor([1, 1])]; tensor var_5996_pad_0 = const()[name = string("op_5996_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_5996_dilations_0 = const()[name = string("op_5996_dilations_0"), val = tensor([1, 1])]; int32 var_5996_groups_0 = const()[name = string("op_5996_groups_0"), val = int32(1)]; tensor var_5979_to_fp16 = const()[name = string("op_5979_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(619380736)))]; tensor var_5996_cast_fp16 = conv(dilations = var_5996_dilations_0, groups = var_5996_groups_0, pad = var_5996_pad_0, pad_type = var_5996_pad_type_0, strides = var_5996_strides_0, weight = var_5979_to_fp16, x = var_5975_cast_fp16_0)[name = string("op_5996_cast_fp16")]; tensor x_375_cast_fp16 = mul(x = var_5991_cast_fp16, y = var_5996_cast_fp16)[name = string("x_375_cast_fp16")]; string hidden_states_125_pad_type_0 = const()[name = string("hidden_states_125_pad_type_0"), val = string("valid")]; tensor hidden_states_125_strides_0 = const()[name = string("hidden_states_125_strides_0"), val = tensor([1, 1])]; tensor hidden_states_125_pad_0 = const()[name = string("hidden_states_125_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_125_dilations_0 = const()[name = string("hidden_states_125_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_125_groups_0 = const()[name = string("hidden_states_125_groups_0"), val = int32(1)]; tensor var_5978_to_fp16 = const()[name = string("op_5978_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(627769408)))]; tensor hidden_states_125_cast_fp16 = conv(dilations = hidden_states_125_dilations_0, groups = hidden_states_125_groups_0, pad = hidden_states_125_pad_0, pad_type = hidden_states_125_pad_type_0, strides = hidden_states_125_strides_0, weight = var_5978_to_fp16, x = x_375_cast_fp16)[name = string("hidden_states_125_cast_fp16")]; tensor x_377_cast_fp16 = add(x = x_367_cast_fp16, y = hidden_states_125_cast_fp16)[name = string("x_377_cast_fp16")]; int32 var_6009 = const()[name = string("op_6009"), val = int32(1)]; fp16 const_214_promoted_to_fp16 = const()[name = string("const_214_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_6012_cast_fp16 = mul(x = x_377_cast_fp16, y = const_214_promoted_to_fp16)[name = string("op_6012_cast_fp16")]; bool x_379_interleave_0 = const()[name = string("x_379_interleave_0"), val = bool(false)]; tensor x_379_cast_fp16 = concat(axis = var_6009, interleave = x_379_interleave_0, values = (x_377_cast_fp16, var_6012_cast_fp16))[name = string("x_379_cast_fp16")]; tensor out_253_axes_0 = const()[name = string("out_253_axes_0"), val = tensor([1])]; fp16 var_6022_to_fp16 = const()[name = string("op_6022_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_253_cast_fp16 = layer_norm(axes = out_253_axes_0, epsilon = var_6022_to_fp16, x = x_379_cast_fp16)[name = string("out_253_cast_fp16")]; tensor layer_layers_21_input_layernorm_weight_to_fp16 = const()[name = string("layer_layers_21_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(636158080)))]; tensor out_255_cast_fp16 = mul(x = out_253_cast_fp16, y = layer_layers_21_input_layernorm_weight_to_fp16)[name = string("out_255_cast_fp16")]; tensor var_6028_split_sizes_0 = const()[name = string("op_6028_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_6028_axis_0 = const()[name = string("op_6028_axis_0"), val = int32(1)]; tensor var_6028_cast_fp16_0, tensor var_6028_cast_fp16_1 = split(axis = var_6028_axis_0, split_sizes = var_6028_split_sizes_0, x = out_255_cast_fp16)[name = string("op_6028_cast_fp16")]; string query_states_85_pad_type_0 = const()[name = string("query_states_85_pad_type_0"), val = string("valid")]; tensor query_states_85_strides_0 = const()[name = string("query_states_85_strides_0"), val = tensor([1, 1])]; tensor query_states_85_pad_0 = const()[name = string("query_states_85_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_states_85_dilations_0 = const()[name = string("query_states_85_dilations_0"), val = tensor([1, 1])]; int32 query_states_85_groups_0 = const()[name = string("query_states_85_groups_0"), val = int32(1)]; tensor var_6050_to_fp16 = const()[name = string("op_6050_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(636162240)))]; tensor query_states_85_cast_fp16 = conv(dilations = query_states_85_dilations_0, groups = query_states_85_groups_0, pad = query_states_85_pad_0, pad_type = query_states_85_pad_type_0, strides = query_states_85_strides_0, weight = var_6050_to_fp16, x = var_6028_cast_fp16_0)[name = string("query_states_85_cast_fp16")]; string key_states_85_pad_type_0 = const()[name = string("key_states_85_pad_type_0"), val = string("valid")]; tensor key_states_85_strides_0 = const()[name = string("key_states_85_strides_0"), val = tensor([1, 1])]; tensor key_states_85_pad_0 = const()[name = string("key_states_85_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_states_85_dilations_0 = const()[name = string("key_states_85_dilations_0"), val = tensor([1, 1])]; int32 key_states_85_groups_0 = const()[name = string("key_states_85_groups_0"), val = int32(1)]; tensor var_6061_to_fp16 = const()[name = string("op_6061_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(638259456)))]; tensor key_states_85_cast_fp16 = conv(dilations = key_states_85_dilations_0, groups = key_states_85_groups_0, pad = key_states_85_pad_0, pad_type = key_states_85_pad_type_0, strides = key_states_85_strides_0, weight = var_6061_to_fp16, x = var_6028_cast_fp16_0)[name = string("key_states_85_cast_fp16")]; string value_states_85_pad_type_0 = const()[name = string("value_states_85_pad_type_0"), val = string("valid")]; tensor value_states_85_strides_0 = const()[name = string("value_states_85_strides_0"), val = tensor([1, 1])]; tensor value_states_85_pad_0 = const()[name = string("value_states_85_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_states_85_dilations_0 = const()[name = string("value_states_85_dilations_0"), val = tensor([1, 1])]; int32 value_states_85_groups_0 = const()[name = string("value_states_85_groups_0"), val = int32(1)]; tensor var_6072_to_fp16 = const()[name = string("op_6072_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(638521664)))]; tensor value_states_85_cast_fp16 = conv(dilations = value_states_85_dilations_0, groups = value_states_85_groups_0, pad = value_states_85_pad_0, pad_type = value_states_85_pad_type_0, strides = value_states_85_strides_0, weight = var_6072_to_fp16, x = var_6028_cast_fp16_0)[name = string("value_states_85_cast_fp16")]; tensor var_6080 = const()[name = string("op_6080"), val = tensor([1, 16, 64, 64])]; tensor embed_85_cast_fp16 = reshape(shape = var_6080, x = query_states_85_cast_fp16)[name = string("embed_85_cast_fp16")]; tensor var_6084 = const()[name = string("op_6084"), val = tensor([1, 2, 64, 64])]; tensor var_6085_cast_fp16 = reshape(shape = var_6084, x = key_states_85_cast_fp16)[name = string("op_6085_cast_fp16")]; tensor embed_87_perm_0 = const()[name = string("embed_87_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_6091 = const()[name = string("op_6091"), val = tensor([1, 2, 64, 64])]; tensor var_6092_cast_fp16 = reshape(shape = var_6091, x = value_states_85_cast_fp16)[name = string("op_6092_cast_fp16")]; tensor value_states_87_perm_0 = const()[name = string("value_states_87_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_6096_cast_fp16 = mul(x = embed_85_cast_fp16, y = cos_1_cast_fp16)[name = string("op_6096_cast_fp16")]; tensor var_6097_split_sizes_0 = const()[name = string("op_6097_split_sizes_0"), val = tensor([32, 32])]; int32 var_6097_axis_0 = const()[name = string("op_6097_axis_0"), val = int32(-2)]; tensor var_6097_cast_fp16_0, tensor var_6097_cast_fp16_1 = split(axis = var_6097_axis_0, split_sizes = var_6097_split_sizes_0, x = embed_85_cast_fp16)[name = string("op_6097_cast_fp16")]; fp16 const_217_promoted_to_fp16 = const()[name = string("const_217_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_6099_cast_fp16 = mul(x = var_6097_cast_fp16_1, y = const_217_promoted_to_fp16)[name = string("op_6099_cast_fp16")]; int32 var_6101 = const()[name = string("op_6101"), val = int32(-2)]; bool var_6102_interleave_0 = const()[name = string("op_6102_interleave_0"), val = bool(false)]; tensor var_6102_cast_fp16 = concat(axis = var_6101, interleave = var_6102_interleave_0, values = (var_6099_cast_fp16, var_6097_cast_fp16_0))[name = string("op_6102_cast_fp16")]; tensor var_6103_cast_fp16 = mul(x = var_6102_cast_fp16, y = sin_1_cast_fp16)[name = string("op_6103_cast_fp16")]; tensor query_states_87_cast_fp16 = add(x = var_6096_cast_fp16, y = var_6103_cast_fp16)[name = string("query_states_87_cast_fp16")]; tensor embed_87_cast_fp16 = transpose(perm = embed_87_perm_0, x = var_6085_cast_fp16)[name = string("transpose_8")]; tensor var_6106_cast_fp16 = mul(x = embed_87_cast_fp16, y = cos_cast_fp16)[name = string("op_6106_cast_fp16")]; tensor var_6107_split_sizes_0 = const()[name = string("op_6107_split_sizes_0"), val = tensor([32, 32])]; int32 var_6107_axis_0 = const()[name = string("op_6107_axis_0"), val = int32(-1)]; tensor var_6107_cast_fp16_0, tensor var_6107_cast_fp16_1 = split(axis = var_6107_axis_0, split_sizes = var_6107_split_sizes_0, x = embed_87_cast_fp16)[name = string("op_6107_cast_fp16")]; fp16 const_218_promoted_to_fp16 = const()[name = string("const_218_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_6109_cast_fp16 = mul(x = var_6107_cast_fp16_1, y = const_218_promoted_to_fp16)[name = string("op_6109_cast_fp16")]; int32 var_6111 = const()[name = string("op_6111"), val = int32(-1)]; bool var_6112_interleave_0 = const()[name = string("op_6112_interleave_0"), val = bool(false)]; tensor var_6112_cast_fp16 = concat(axis = var_6111, interleave = var_6112_interleave_0, values = (var_6109_cast_fp16, var_6107_cast_fp16_0))[name = string("op_6112_cast_fp16")]; tensor var_6113_cast_fp16 = mul(x = var_6112_cast_fp16, y = sin_cast_fp16)[name = string("op_6113_cast_fp16")]; tensor key_states_87_cast_fp16 = add(x = var_6106_cast_fp16, y = var_6113_cast_fp16)[name = string("key_states_87_cast_fp16")]; tensor expand_dims_211 = const()[name = string("expand_dims_211"), val = tensor([21])]; tensor expand_dims_212 = const()[name = string("expand_dims_212"), val = tensor([0])]; tensor expand_dims_214 = const()[name = string("expand_dims_214"), val = tensor([22])]; tensor concat_171_values3_0 = const()[name = string("concat_171_values3_0"), val = tensor([0])]; int32 concat_171_axis_0 = const()[name = string("concat_171_axis_0"), val = int32(0)]; bool concat_171_interleave_0 = const()[name = string("concat_171_interleave_0"), val = bool(false)]; tensor concat_171 = concat(axis = concat_171_axis_0, interleave = concat_171_interleave_0, values = (expand_dims_211, expand_dims_212, position_id, concat_171_values3_0))[name = string("concat_171")]; tensor concat_172_values1_0 = const()[name = string("concat_172_values1_0"), val = tensor([0])]; tensor concat_172_values3_0 = const()[name = string("concat_172_values3_0"), val = tensor([0])]; int32 concat_172_axis_0 = const()[name = string("concat_172_axis_0"), val = int32(0)]; bool concat_172_interleave_0 = const()[name = string("concat_172_interleave_0"), val = bool(false)]; tensor concat_172 = concat(axis = concat_172_axis_0, interleave = concat_172_interleave_0, values = (expand_dims_214, concat_172_values1_0, var_426, concat_172_values3_0))[name = string("concat_172")]; tensor key_cache_internal_tensor_assign_22_stride_0 = const()[name = string("key_cache_internal_tensor_assign_22_stride_0"), val = tensor([1, 1, 1, 1])]; tensor key_cache_internal_tensor_assign_22_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_22_begin_mask_0"), val = tensor([false, false, false, true])]; tensor key_cache_internal_tensor_assign_22_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_22_end_mask_0"), val = tensor([false, true, false, true])]; tensor key_cache_internal_tensor_assign_22_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_22_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor key_cache_internal_tensor_assign_22_cast_fp16 = slice_update(begin = concat_171, begin_mask = key_cache_internal_tensor_assign_22_begin_mask_0, end = concat_172, end_mask = key_cache_internal_tensor_assign_22_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_22_squeeze_mask_0, stride = key_cache_internal_tensor_assign_22_stride_0, update = key_states_87_cast_fp16, x = coreml_update_state_88)[name = string("key_cache_internal_tensor_assign_22_cast_fp16")]; write_state(data = key_cache_internal_tensor_assign_22_cast_fp16, input = key_cache)[name = string("coreml_update_state_400_write_state")]; tensor coreml_update_state_90 = read_state(input = key_cache)[name = string("coreml_update_state_400")]; tensor value_cache_internal_tensor_assign_22_stride_0 = const()[name = string("value_cache_internal_tensor_assign_22_stride_0"), val = tensor([1, 1, 1, 1])]; tensor value_cache_internal_tensor_assign_22_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_22_begin_mask_0"), val = tensor([false, false, false, true])]; tensor value_cache_internal_tensor_assign_22_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_22_end_mask_0"), val = tensor([false, true, false, true])]; tensor value_cache_internal_tensor_assign_22_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_22_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor value_states_87_cast_fp16 = transpose(perm = value_states_87_perm_0, x = var_6092_cast_fp16)[name = string("transpose_7")]; tensor value_cache_internal_tensor_assign_22_cast_fp16 = slice_update(begin = concat_171, begin_mask = value_cache_internal_tensor_assign_22_begin_mask_0, end = concat_172, end_mask = value_cache_internal_tensor_assign_22_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_22_squeeze_mask_0, stride = value_cache_internal_tensor_assign_22_stride_0, update = value_states_87_cast_fp16, x = coreml_update_state_89)[name = string("value_cache_internal_tensor_assign_22_cast_fp16")]; write_state(data = value_cache_internal_tensor_assign_22_cast_fp16, input = value_cache)[name = string("coreml_update_state_401_write_state")]; tensor coreml_update_state_91 = read_state(input = value_cache)[name = string("coreml_update_state_401")]; tensor var_6156_begin_0 = const()[name = string("op_6156_begin_0"), val = tensor([21, 0, 0, 0])]; tensor var_6156_end_0 = const()[name = string("op_6156_end_0"), val = tensor([22, 2, 2048, 64])]; tensor var_6156_end_mask_0 = const()[name = string("op_6156_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_6156_cast_fp16 = slice_by_index(begin = var_6156_begin_0, end = var_6156_end_0, end_mask = var_6156_end_mask_0, x = coreml_update_state_90)[name = string("op_6156_cast_fp16")]; tensor tile_42 = const()[name = string("tile_42"), val = tensor([1, 1])]; int32 var_6159_axis_0 = const()[name = string("op_6159_axis_0"), val = int32(1)]; tensor var_6159_cast_fp16_0, tensor var_6159_cast_fp16_1 = split(axis = var_6159_axis_0, split_sizes = tile_42, x = var_6156_cast_fp16)[name = string("op_6159_cast_fp16")]; tensor var_6166_begin_0 = const()[name = string("op_6166_begin_0"), val = tensor([21, 0, 0, 0])]; tensor var_6166_end_0 = const()[name = string("op_6166_end_0"), val = tensor([22, 2, 2048, 64])]; tensor var_6166_end_mask_0 = const()[name = string("op_6166_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_6166_cast_fp16 = slice_by_index(begin = var_6166_begin_0, end = var_6166_end_0, end_mask = var_6166_end_mask_0, x = coreml_update_state_91)[name = string("op_6166_cast_fp16")]; tensor tile_43 = const()[name = string("tile_43"), val = tensor([1, 1])]; int32 var_6169_axis_0 = const()[name = string("op_6169_axis_0"), val = int32(1)]; tensor var_6169_cast_fp16_0, tensor var_6169_cast_fp16_1 = split(axis = var_6169_axis_0, split_sizes = tile_43, x = var_6166_cast_fp16)[name = string("op_6169_cast_fp16")]; tensor var_6172_split_sizes_0 = const()[name = string("op_6172_split_sizes_0"), val = tensor([8, 8])]; int32 var_6172_axis_0 = const()[name = string("op_6172_axis_0"), val = int32(1)]; tensor var_6172_cast_fp16_0, tensor var_6172_cast_fp16_1 = split(axis = var_6172_axis_0, split_sizes = var_6172_split_sizes_0, x = query_states_87_cast_fp16)[name = string("op_6172_cast_fp16")]; bool attn_weights_337_transpose_x_0 = const()[name = string("attn_weights_337_transpose_x_0"), val = bool(false)]; bool attn_weights_337_transpose_y_0 = const()[name = string("attn_weights_337_transpose_y_0"), val = bool(false)]; tensor attn_weights_337_cast_fp16 = matmul(transpose_x = attn_weights_337_transpose_x_0, transpose_y = attn_weights_337_transpose_y_0, x = var_6159_cast_fp16_0, y = var_6172_cast_fp16_0)[name = string("attn_weights_337_cast_fp16")]; fp16 _inversed_attn_weights_339_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_339_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_339_cast_fp16 = mul(x = attn_weights_337_cast_fp16, y = _inversed_attn_weights_339_y_0_to_fp16)[name = string("_inversed_attn_weights_339_cast_fp16")]; tensor attn_weights_341_cast_fp16 = add(x = _inversed_attn_weights_339_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_341_cast_fp16")]; int32 var_6179 = const()[name = string("op_6179"), val = int32(2)]; tensor attn_weights_343_cast_fp16 = softmax(axis = var_6179, x = attn_weights_341_cast_fp16)[name = string("attn_weights_343_cast_fp16")]; bool var_6185_transpose_x_1 = const()[name = string("op_6185_transpose_x_1"), val = bool(true)]; bool var_6185_transpose_y_1 = const()[name = string("op_6185_transpose_y_1"), val = bool(false)]; tensor var_6185_cast_fp16 = matmul(transpose_x = var_6185_transpose_x_1, transpose_y = var_6185_transpose_y_1, x = attn_weights_343_cast_fp16, y = var_6169_cast_fp16_0)[name = string("op_6185_cast_fp16")]; bool attn_weights_345_transpose_x_0 = const()[name = string("attn_weights_345_transpose_x_0"), val = bool(false)]; bool attn_weights_345_transpose_y_0 = const()[name = string("attn_weights_345_transpose_y_0"), val = bool(false)]; tensor attn_weights_345_cast_fp16 = matmul(transpose_x = attn_weights_345_transpose_x_0, transpose_y = attn_weights_345_transpose_y_0, x = var_6159_cast_fp16_1, y = var_6172_cast_fp16_1)[name = string("attn_weights_345_cast_fp16")]; fp16 _inversed_attn_weights_347_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_347_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_347_cast_fp16 = mul(x = attn_weights_345_cast_fp16, y = _inversed_attn_weights_347_y_0_to_fp16)[name = string("_inversed_attn_weights_347_cast_fp16")]; tensor attn_weights_349_cast_fp16 = add(x = _inversed_attn_weights_347_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_349_cast_fp16")]; int32 var_6191 = const()[name = string("op_6191"), val = int32(2)]; tensor attn_weights_351_cast_fp16 = softmax(axis = var_6191, x = attn_weights_349_cast_fp16)[name = string("attn_weights_351_cast_fp16")]; bool attn_output_127_transpose_x_1 = const()[name = string("attn_output_127_transpose_x_1"), val = bool(true)]; bool attn_output_127_transpose_y_1 = const()[name = string("attn_output_127_transpose_y_1"), val = bool(false)]; tensor attn_output_127_cast_fp16 = matmul(transpose_x = attn_output_127_transpose_x_1, transpose_y = attn_output_127_transpose_y_1, x = attn_weights_351_cast_fp16, y = var_6169_cast_fp16_1)[name = string("attn_output_127_cast_fp16")]; int32 var_6199 = const()[name = string("op_6199"), val = int32(1)]; bool attn_output_129_interleave_0 = const()[name = string("attn_output_129_interleave_0"), val = bool(false)]; tensor attn_output_129_cast_fp16 = concat(axis = var_6199, interleave = attn_output_129_interleave_0, values = (var_6185_cast_fp16, attn_output_127_cast_fp16))[name = string("attn_output_129_cast_fp16")]; tensor var_6203_perm_0 = const()[name = string("op_6203_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_6208 = const()[name = string("op_6208"), val = tensor([1, 1024, 1, 64])]; tensor var_6203_cast_fp16 = transpose(perm = var_6203_perm_0, x = attn_output_129_cast_fp16)[name = string("transpose_6")]; tensor x_383_cast_fp16 = reshape(shape = var_6208, x = var_6203_cast_fp16)[name = string("x_383_cast_fp16")]; string hidden_states_129_pad_type_0 = const()[name = string("hidden_states_129_pad_type_0"), val = string("valid")]; tensor hidden_states_129_strides_0 = const()[name = string("hidden_states_129_strides_0"), val = tensor([1, 1])]; tensor hidden_states_129_pad_0 = const()[name = string("hidden_states_129_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_129_dilations_0 = const()[name = string("hidden_states_129_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_129_groups_0 = const()[name = string("hidden_states_129_groups_0"), val = int32(1)]; tensor var_6215_to_fp16 = const()[name = string("op_6215_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(638783872)))]; tensor hidden_states_129_cast_fp16 = conv(dilations = hidden_states_129_dilations_0, groups = hidden_states_129_groups_0, pad = hidden_states_129_pad_0, pad_type = hidden_states_129_pad_type_0, strides = hidden_states_129_strides_0, weight = var_6215_to_fp16, x = x_383_cast_fp16)[name = string("hidden_states_129_cast_fp16")]; tensor x_385_cast_fp16 = add(x = x_377_cast_fp16, y = hidden_states_129_cast_fp16)[name = string("x_385_cast_fp16")]; int32 var_6227 = const()[name = string("op_6227"), val = int32(1)]; fp16 const_223_promoted_to_fp16 = const()[name = string("const_223_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_6230_cast_fp16 = mul(x = x_385_cast_fp16, y = const_223_promoted_to_fp16)[name = string("op_6230_cast_fp16")]; bool x_387_interleave_0 = const()[name = string("x_387_interleave_0"), val = bool(false)]; tensor x_387_cast_fp16 = concat(axis = var_6227, interleave = x_387_interleave_0, values = (x_385_cast_fp16, var_6230_cast_fp16))[name = string("x_387_cast_fp16")]; tensor out_259_axes_0 = const()[name = string("out_259_axes_0"), val = tensor([1])]; fp16 var_6240_to_fp16 = const()[name = string("op_6240_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_259_cast_fp16 = layer_norm(axes = out_259_axes_0, epsilon = var_6240_to_fp16, x = x_387_cast_fp16)[name = string("out_259_cast_fp16")]; tensor layer_layers_21_post_attention_layernorm_weight_to_fp16 = const()[name = string("layer_layers_21_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(640881088)))]; tensor out_261_cast_fp16 = mul(x = out_259_cast_fp16, y = layer_layers_21_post_attention_layernorm_weight_to_fp16)[name = string("out_261_cast_fp16")]; tensor var_6246_split_sizes_0 = const()[name = string("op_6246_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_6246_axis_0 = const()[name = string("op_6246_axis_0"), val = int32(1)]; tensor var_6246_cast_fp16_0, tensor var_6246_cast_fp16_1 = split(axis = var_6246_axis_0, split_sizes = var_6246_split_sizes_0, x = out_261_cast_fp16)[name = string("op_6246_cast_fp16")]; string input_43_pad_type_0 = const()[name = string("input_43_pad_type_0"), val = string("valid")]; tensor input_43_strides_0 = const()[name = string("input_43_strides_0"), val = tensor([1, 1])]; tensor input_43_pad_0 = const()[name = string("input_43_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_43_dilations_0 = const()[name = string("input_43_dilations_0"), val = tensor([1, 1])]; int32 input_43_groups_0 = const()[name = string("input_43_groups_0"), val = int32(1)]; tensor var_6251_to_fp16 = const()[name = string("op_6251_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(640885248)))]; tensor input_43_cast_fp16 = conv(dilations = input_43_dilations_0, groups = input_43_groups_0, pad = input_43_pad_0, pad_type = input_43_pad_type_0, strides = input_43_strides_0, weight = var_6251_to_fp16, x = var_6246_cast_fp16_0)[name = string("input_43_cast_fp16")]; tensor var_6262_cast_fp16 = silu(x = input_43_cast_fp16)[name = string("op_6262_cast_fp16")]; string var_6267_pad_type_0 = const()[name = string("op_6267_pad_type_0"), val = string("valid")]; tensor var_6267_strides_0 = const()[name = string("op_6267_strides_0"), val = tensor([1, 1])]; tensor var_6267_pad_0 = const()[name = string("op_6267_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_6267_dilations_0 = const()[name = string("op_6267_dilations_0"), val = tensor([1, 1])]; int32 var_6267_groups_0 = const()[name = string("op_6267_groups_0"), val = int32(1)]; tensor var_6250_to_fp16 = const()[name = string("op_6250_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(649273920)))]; tensor var_6267_cast_fp16 = conv(dilations = var_6267_dilations_0, groups = var_6267_groups_0, pad = var_6267_pad_0, pad_type = var_6267_pad_type_0, strides = var_6267_strides_0, weight = var_6250_to_fp16, x = var_6246_cast_fp16_0)[name = string("op_6267_cast_fp16")]; tensor x_393_cast_fp16 = mul(x = var_6262_cast_fp16, y = var_6267_cast_fp16)[name = string("x_393_cast_fp16")]; string hidden_states_131_pad_type_0 = const()[name = string("hidden_states_131_pad_type_0"), val = string("valid")]; tensor hidden_states_131_strides_0 = const()[name = string("hidden_states_131_strides_0"), val = tensor([1, 1])]; tensor hidden_states_131_pad_0 = const()[name = string("hidden_states_131_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_131_dilations_0 = const()[name = string("hidden_states_131_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_131_groups_0 = const()[name = string("hidden_states_131_groups_0"), val = int32(1)]; tensor var_6249_to_fp16 = const()[name = string("op_6249_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(657662592)))]; tensor hidden_states_131_cast_fp16 = conv(dilations = hidden_states_131_dilations_0, groups = hidden_states_131_groups_0, pad = hidden_states_131_pad_0, pad_type = hidden_states_131_pad_type_0, strides = hidden_states_131_strides_0, weight = var_6249_to_fp16, x = x_393_cast_fp16)[name = string("hidden_states_131_cast_fp16")]; tensor x_395_cast_fp16 = add(x = x_385_cast_fp16, y = hidden_states_131_cast_fp16)[name = string("x_395_cast_fp16")]; int32 var_6280 = const()[name = string("op_6280"), val = int32(1)]; fp16 const_224_promoted_to_fp16 = const()[name = string("const_224_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_6283_cast_fp16 = mul(x = x_395_cast_fp16, y = const_224_promoted_to_fp16)[name = string("op_6283_cast_fp16")]; bool x_397_interleave_0 = const()[name = string("x_397_interleave_0"), val = bool(false)]; tensor x_397_cast_fp16 = concat(axis = var_6280, interleave = x_397_interleave_0, values = (x_395_cast_fp16, var_6283_cast_fp16))[name = string("x_397_cast_fp16")]; tensor out_265_axes_0 = const()[name = string("out_265_axes_0"), val = tensor([1])]; fp16 var_6293_to_fp16 = const()[name = string("op_6293_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_265_cast_fp16 = layer_norm(axes = out_265_axes_0, epsilon = var_6293_to_fp16, x = x_397_cast_fp16)[name = string("out_265_cast_fp16")]; tensor layer_layers_22_input_layernorm_weight_to_fp16 = const()[name = string("layer_layers_22_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(666051264)))]; tensor out_267_cast_fp16 = mul(x = out_265_cast_fp16, y = layer_layers_22_input_layernorm_weight_to_fp16)[name = string("out_267_cast_fp16")]; tensor var_6299_split_sizes_0 = const()[name = string("op_6299_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_6299_axis_0 = const()[name = string("op_6299_axis_0"), val = int32(1)]; tensor var_6299_cast_fp16_0, tensor var_6299_cast_fp16_1 = split(axis = var_6299_axis_0, split_sizes = var_6299_split_sizes_0, x = out_267_cast_fp16)[name = string("op_6299_cast_fp16")]; string query_states_89_pad_type_0 = const()[name = string("query_states_89_pad_type_0"), val = string("valid")]; tensor query_states_89_strides_0 = const()[name = string("query_states_89_strides_0"), val = tensor([1, 1])]; tensor query_states_89_pad_0 = const()[name = string("query_states_89_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_states_89_dilations_0 = const()[name = string("query_states_89_dilations_0"), val = tensor([1, 1])]; int32 query_states_89_groups_0 = const()[name = string("query_states_89_groups_0"), val = int32(1)]; tensor var_6321_to_fp16 = const()[name = string("op_6321_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(666055424)))]; tensor query_states_89_cast_fp16 = conv(dilations = query_states_89_dilations_0, groups = query_states_89_groups_0, pad = query_states_89_pad_0, pad_type = query_states_89_pad_type_0, strides = query_states_89_strides_0, weight = var_6321_to_fp16, x = var_6299_cast_fp16_0)[name = string("query_states_89_cast_fp16")]; string key_states_89_pad_type_0 = const()[name = string("key_states_89_pad_type_0"), val = string("valid")]; tensor key_states_89_strides_0 = const()[name = string("key_states_89_strides_0"), val = tensor([1, 1])]; tensor key_states_89_pad_0 = const()[name = string("key_states_89_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_states_89_dilations_0 = const()[name = string("key_states_89_dilations_0"), val = tensor([1, 1])]; int32 key_states_89_groups_0 = const()[name = string("key_states_89_groups_0"), val = int32(1)]; tensor var_6332_to_fp16 = const()[name = string("op_6332_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(668152640)))]; tensor key_states_89_cast_fp16 = conv(dilations = key_states_89_dilations_0, groups = key_states_89_groups_0, pad = key_states_89_pad_0, pad_type = key_states_89_pad_type_0, strides = key_states_89_strides_0, weight = var_6332_to_fp16, x = var_6299_cast_fp16_0)[name = string("key_states_89_cast_fp16")]; string value_states_89_pad_type_0 = const()[name = string("value_states_89_pad_type_0"), val = string("valid")]; tensor value_states_89_strides_0 = const()[name = string("value_states_89_strides_0"), val = tensor([1, 1])]; tensor value_states_89_pad_0 = const()[name = string("value_states_89_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_states_89_dilations_0 = const()[name = string("value_states_89_dilations_0"), val = tensor([1, 1])]; int32 value_states_89_groups_0 = const()[name = string("value_states_89_groups_0"), val = int32(1)]; tensor var_6343_to_fp16 = const()[name = string("op_6343_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(668414848)))]; tensor value_states_89_cast_fp16 = conv(dilations = value_states_89_dilations_0, groups = value_states_89_groups_0, pad = value_states_89_pad_0, pad_type = value_states_89_pad_type_0, strides = value_states_89_strides_0, weight = var_6343_to_fp16, x = var_6299_cast_fp16_0)[name = string("value_states_89_cast_fp16")]; tensor var_6351 = const()[name = string("op_6351"), val = tensor([1, 16, 64, 64])]; tensor embed_89_cast_fp16 = reshape(shape = var_6351, x = query_states_89_cast_fp16)[name = string("embed_89_cast_fp16")]; tensor var_6355 = const()[name = string("op_6355"), val = tensor([1, 2, 64, 64])]; tensor var_6356_cast_fp16 = reshape(shape = var_6355, x = key_states_89_cast_fp16)[name = string("op_6356_cast_fp16")]; tensor embed_91_perm_0 = const()[name = string("embed_91_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_6362 = const()[name = string("op_6362"), val = tensor([1, 2, 64, 64])]; tensor var_6363_cast_fp16 = reshape(shape = var_6362, x = value_states_89_cast_fp16)[name = string("op_6363_cast_fp16")]; tensor value_states_91_perm_0 = const()[name = string("value_states_91_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_6367_cast_fp16 = mul(x = embed_89_cast_fp16, y = cos_1_cast_fp16)[name = string("op_6367_cast_fp16")]; tensor var_6368_split_sizes_0 = const()[name = string("op_6368_split_sizes_0"), val = tensor([32, 32])]; int32 var_6368_axis_0 = const()[name = string("op_6368_axis_0"), val = int32(-2)]; tensor var_6368_cast_fp16_0, tensor var_6368_cast_fp16_1 = split(axis = var_6368_axis_0, split_sizes = var_6368_split_sizes_0, x = embed_89_cast_fp16)[name = string("op_6368_cast_fp16")]; fp16 const_227_promoted_to_fp16 = const()[name = string("const_227_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_6370_cast_fp16 = mul(x = var_6368_cast_fp16_1, y = const_227_promoted_to_fp16)[name = string("op_6370_cast_fp16")]; int32 var_6372 = const()[name = string("op_6372"), val = int32(-2)]; bool var_6373_interleave_0 = const()[name = string("op_6373_interleave_0"), val = bool(false)]; tensor var_6373_cast_fp16 = concat(axis = var_6372, interleave = var_6373_interleave_0, values = (var_6370_cast_fp16, var_6368_cast_fp16_0))[name = string("op_6373_cast_fp16")]; tensor var_6374_cast_fp16 = mul(x = var_6373_cast_fp16, y = sin_1_cast_fp16)[name = string("op_6374_cast_fp16")]; tensor query_states_91_cast_fp16 = add(x = var_6367_cast_fp16, y = var_6374_cast_fp16)[name = string("query_states_91_cast_fp16")]; tensor embed_91_cast_fp16 = transpose(perm = embed_91_perm_0, x = var_6356_cast_fp16)[name = string("transpose_5")]; tensor var_6377_cast_fp16 = mul(x = embed_91_cast_fp16, y = cos_cast_fp16)[name = string("op_6377_cast_fp16")]; tensor var_6378_split_sizes_0 = const()[name = string("op_6378_split_sizes_0"), val = tensor([32, 32])]; int32 var_6378_axis_0 = const()[name = string("op_6378_axis_0"), val = int32(-1)]; tensor var_6378_cast_fp16_0, tensor var_6378_cast_fp16_1 = split(axis = var_6378_axis_0, split_sizes = var_6378_split_sizes_0, x = embed_91_cast_fp16)[name = string("op_6378_cast_fp16")]; fp16 const_228_promoted_to_fp16 = const()[name = string("const_228_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_6380_cast_fp16 = mul(x = var_6378_cast_fp16_1, y = const_228_promoted_to_fp16)[name = string("op_6380_cast_fp16")]; int32 var_6382 = const()[name = string("op_6382"), val = int32(-1)]; bool var_6383_interleave_0 = const()[name = string("op_6383_interleave_0"), val = bool(false)]; tensor var_6383_cast_fp16 = concat(axis = var_6382, interleave = var_6383_interleave_0, values = (var_6380_cast_fp16, var_6378_cast_fp16_0))[name = string("op_6383_cast_fp16")]; tensor var_6384_cast_fp16 = mul(x = var_6383_cast_fp16, y = sin_cast_fp16)[name = string("op_6384_cast_fp16")]; tensor key_states_91_cast_fp16 = add(x = var_6377_cast_fp16, y = var_6384_cast_fp16)[name = string("key_states_91_cast_fp16")]; tensor expand_dims_221 = const()[name = string("expand_dims_221"), val = tensor([22])]; tensor expand_dims_222 = const()[name = string("expand_dims_222"), val = tensor([0])]; tensor expand_dims_224 = const()[name = string("expand_dims_224"), val = tensor([23])]; tensor concat_179_values3_0 = const()[name = string("concat_179_values3_0"), val = tensor([0])]; int32 concat_179_axis_0 = const()[name = string("concat_179_axis_0"), val = int32(0)]; bool concat_179_interleave_0 = const()[name = string("concat_179_interleave_0"), val = bool(false)]; tensor concat_179 = concat(axis = concat_179_axis_0, interleave = concat_179_interleave_0, values = (expand_dims_221, expand_dims_222, position_id, concat_179_values3_0))[name = string("concat_179")]; tensor concat_180_values1_0 = const()[name = string("concat_180_values1_0"), val = tensor([0])]; tensor concat_180_values3_0 = const()[name = string("concat_180_values3_0"), val = tensor([0])]; int32 concat_180_axis_0 = const()[name = string("concat_180_axis_0"), val = int32(0)]; bool concat_180_interleave_0 = const()[name = string("concat_180_interleave_0"), val = bool(false)]; tensor concat_180 = concat(axis = concat_180_axis_0, interleave = concat_180_interleave_0, values = (expand_dims_224, concat_180_values1_0, var_426, concat_180_values3_0))[name = string("concat_180")]; tensor key_cache_internal_tensor_assign_23_stride_0 = const()[name = string("key_cache_internal_tensor_assign_23_stride_0"), val = tensor([1, 1, 1, 1])]; tensor key_cache_internal_tensor_assign_23_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_23_begin_mask_0"), val = tensor([false, false, false, true])]; tensor key_cache_internal_tensor_assign_23_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_23_end_mask_0"), val = tensor([false, true, false, true])]; tensor key_cache_internal_tensor_assign_23_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_23_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor key_cache_internal_tensor_assign_23_cast_fp16 = slice_update(begin = concat_179, begin_mask = key_cache_internal_tensor_assign_23_begin_mask_0, end = concat_180, end_mask = key_cache_internal_tensor_assign_23_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_23_squeeze_mask_0, stride = key_cache_internal_tensor_assign_23_stride_0, update = key_states_91_cast_fp16, x = coreml_update_state_90)[name = string("key_cache_internal_tensor_assign_23_cast_fp16")]; write_state(data = key_cache_internal_tensor_assign_23_cast_fp16, input = key_cache)[name = string("coreml_update_state_402_write_state")]; tensor coreml_update_state_92 = read_state(input = key_cache)[name = string("coreml_update_state_402")]; tensor value_cache_internal_tensor_assign_23_stride_0 = const()[name = string("value_cache_internal_tensor_assign_23_stride_0"), val = tensor([1, 1, 1, 1])]; tensor value_cache_internal_tensor_assign_23_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_23_begin_mask_0"), val = tensor([false, false, false, true])]; tensor value_cache_internal_tensor_assign_23_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_23_end_mask_0"), val = tensor([false, true, false, true])]; tensor value_cache_internal_tensor_assign_23_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_23_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor value_states_91_cast_fp16 = transpose(perm = value_states_91_perm_0, x = var_6363_cast_fp16)[name = string("transpose_4")]; tensor value_cache_internal_tensor_assign_23_cast_fp16 = slice_update(begin = concat_179, begin_mask = value_cache_internal_tensor_assign_23_begin_mask_0, end = concat_180, end_mask = value_cache_internal_tensor_assign_23_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_23_squeeze_mask_0, stride = value_cache_internal_tensor_assign_23_stride_0, update = value_states_91_cast_fp16, x = coreml_update_state_91)[name = string("value_cache_internal_tensor_assign_23_cast_fp16")]; write_state(data = value_cache_internal_tensor_assign_23_cast_fp16, input = value_cache)[name = string("coreml_update_state_403_write_state")]; tensor coreml_update_state_93 = read_state(input = value_cache)[name = string("coreml_update_state_403")]; tensor var_6427_begin_0 = const()[name = string("op_6427_begin_0"), val = tensor([22, 0, 0, 0])]; tensor var_6427_end_0 = const()[name = string("op_6427_end_0"), val = tensor([23, 2, 2048, 64])]; tensor var_6427_end_mask_0 = const()[name = string("op_6427_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_6427_cast_fp16 = slice_by_index(begin = var_6427_begin_0, end = var_6427_end_0, end_mask = var_6427_end_mask_0, x = coreml_update_state_92)[name = string("op_6427_cast_fp16")]; tensor tile_44 = const()[name = string("tile_44"), val = tensor([1, 1])]; int32 var_6430_axis_0 = const()[name = string("op_6430_axis_0"), val = int32(1)]; tensor var_6430_cast_fp16_0, tensor var_6430_cast_fp16_1 = split(axis = var_6430_axis_0, split_sizes = tile_44, x = var_6427_cast_fp16)[name = string("op_6430_cast_fp16")]; tensor var_6437_begin_0 = const()[name = string("op_6437_begin_0"), val = tensor([22, 0, 0, 0])]; tensor var_6437_end_0 = const()[name = string("op_6437_end_0"), val = tensor([23, 2, 2048, 64])]; tensor var_6437_end_mask_0 = const()[name = string("op_6437_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_6437_cast_fp16 = slice_by_index(begin = var_6437_begin_0, end = var_6437_end_0, end_mask = var_6437_end_mask_0, x = coreml_update_state_93)[name = string("op_6437_cast_fp16")]; tensor tile_45 = const()[name = string("tile_45"), val = tensor([1, 1])]; int32 var_6440_axis_0 = const()[name = string("op_6440_axis_0"), val = int32(1)]; tensor var_6440_cast_fp16_0, tensor var_6440_cast_fp16_1 = split(axis = var_6440_axis_0, split_sizes = tile_45, x = var_6437_cast_fp16)[name = string("op_6440_cast_fp16")]; tensor var_6443_split_sizes_0 = const()[name = string("op_6443_split_sizes_0"), val = tensor([8, 8])]; int32 var_6443_axis_0 = const()[name = string("op_6443_axis_0"), val = int32(1)]; tensor var_6443_cast_fp16_0, tensor var_6443_cast_fp16_1 = split(axis = var_6443_axis_0, split_sizes = var_6443_split_sizes_0, x = query_states_91_cast_fp16)[name = string("op_6443_cast_fp16")]; bool attn_weights_353_transpose_x_0 = const()[name = string("attn_weights_353_transpose_x_0"), val = bool(false)]; bool attn_weights_353_transpose_y_0 = const()[name = string("attn_weights_353_transpose_y_0"), val = bool(false)]; tensor attn_weights_353_cast_fp16 = matmul(transpose_x = attn_weights_353_transpose_x_0, transpose_y = attn_weights_353_transpose_y_0, x = var_6430_cast_fp16_0, y = var_6443_cast_fp16_0)[name = string("attn_weights_353_cast_fp16")]; fp16 _inversed_attn_weights_355_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_355_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_355_cast_fp16 = mul(x = attn_weights_353_cast_fp16, y = _inversed_attn_weights_355_y_0_to_fp16)[name = string("_inversed_attn_weights_355_cast_fp16")]; tensor attn_weights_357_cast_fp16 = add(x = _inversed_attn_weights_355_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_357_cast_fp16")]; int32 var_6450 = const()[name = string("op_6450"), val = int32(2)]; tensor attn_weights_359_cast_fp16 = softmax(axis = var_6450, x = attn_weights_357_cast_fp16)[name = string("attn_weights_359_cast_fp16")]; bool var_6456_transpose_x_1 = const()[name = string("op_6456_transpose_x_1"), val = bool(true)]; bool var_6456_transpose_y_1 = const()[name = string("op_6456_transpose_y_1"), val = bool(false)]; tensor var_6456_cast_fp16 = matmul(transpose_x = var_6456_transpose_x_1, transpose_y = var_6456_transpose_y_1, x = attn_weights_359_cast_fp16, y = var_6440_cast_fp16_0)[name = string("op_6456_cast_fp16")]; bool attn_weights_361_transpose_x_0 = const()[name = string("attn_weights_361_transpose_x_0"), val = bool(false)]; bool attn_weights_361_transpose_y_0 = const()[name = string("attn_weights_361_transpose_y_0"), val = bool(false)]; tensor attn_weights_361_cast_fp16 = matmul(transpose_x = attn_weights_361_transpose_x_0, transpose_y = attn_weights_361_transpose_y_0, x = var_6430_cast_fp16_1, y = var_6443_cast_fp16_1)[name = string("attn_weights_361_cast_fp16")]; fp16 _inversed_attn_weights_363_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_363_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_363_cast_fp16 = mul(x = attn_weights_361_cast_fp16, y = _inversed_attn_weights_363_y_0_to_fp16)[name = string("_inversed_attn_weights_363_cast_fp16")]; tensor attn_weights_365_cast_fp16 = add(x = _inversed_attn_weights_363_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_365_cast_fp16")]; int32 var_6462 = const()[name = string("op_6462"), val = int32(2)]; tensor attn_weights_367_cast_fp16 = softmax(axis = var_6462, x = attn_weights_365_cast_fp16)[name = string("attn_weights_367_cast_fp16")]; bool attn_output_133_transpose_x_1 = const()[name = string("attn_output_133_transpose_x_1"), val = bool(true)]; bool attn_output_133_transpose_y_1 = const()[name = string("attn_output_133_transpose_y_1"), val = bool(false)]; tensor attn_output_133_cast_fp16 = matmul(transpose_x = attn_output_133_transpose_x_1, transpose_y = attn_output_133_transpose_y_1, x = attn_weights_367_cast_fp16, y = var_6440_cast_fp16_1)[name = string("attn_output_133_cast_fp16")]; int32 var_6470 = const()[name = string("op_6470"), val = int32(1)]; bool attn_output_135_interleave_0 = const()[name = string("attn_output_135_interleave_0"), val = bool(false)]; tensor attn_output_135_cast_fp16 = concat(axis = var_6470, interleave = attn_output_135_interleave_0, values = (var_6456_cast_fp16, attn_output_133_cast_fp16))[name = string("attn_output_135_cast_fp16")]; tensor var_6474_perm_0 = const()[name = string("op_6474_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_6479 = const()[name = string("op_6479"), val = tensor([1, 1024, 1, 64])]; tensor var_6474_cast_fp16 = transpose(perm = var_6474_perm_0, x = attn_output_135_cast_fp16)[name = string("transpose_3")]; tensor x_401_cast_fp16 = reshape(shape = var_6479, x = var_6474_cast_fp16)[name = string("x_401_cast_fp16")]; string hidden_states_135_pad_type_0 = const()[name = string("hidden_states_135_pad_type_0"), val = string("valid")]; tensor hidden_states_135_strides_0 = const()[name = string("hidden_states_135_strides_0"), val = tensor([1, 1])]; tensor hidden_states_135_pad_0 = const()[name = string("hidden_states_135_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_135_dilations_0 = const()[name = string("hidden_states_135_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_135_groups_0 = const()[name = string("hidden_states_135_groups_0"), val = int32(1)]; tensor var_6486_to_fp16 = const()[name = string("op_6486_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(668677056)))]; tensor hidden_states_135_cast_fp16 = conv(dilations = hidden_states_135_dilations_0, groups = hidden_states_135_groups_0, pad = hidden_states_135_pad_0, pad_type = hidden_states_135_pad_type_0, strides = hidden_states_135_strides_0, weight = var_6486_to_fp16, x = x_401_cast_fp16)[name = string("hidden_states_135_cast_fp16")]; tensor x_403_cast_fp16 = add(x = x_395_cast_fp16, y = hidden_states_135_cast_fp16)[name = string("x_403_cast_fp16")]; int32 var_6498 = const()[name = string("op_6498"), val = int32(1)]; fp16 const_233_promoted_to_fp16 = const()[name = string("const_233_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_6501_cast_fp16 = mul(x = x_403_cast_fp16, y = const_233_promoted_to_fp16)[name = string("op_6501_cast_fp16")]; bool x_405_interleave_0 = const()[name = string("x_405_interleave_0"), val = bool(false)]; tensor x_405_cast_fp16 = concat(axis = var_6498, interleave = x_405_interleave_0, values = (x_403_cast_fp16, var_6501_cast_fp16))[name = string("x_405_cast_fp16")]; tensor out_271_axes_0 = const()[name = string("out_271_axes_0"), val = tensor([1])]; fp16 var_6511_to_fp16 = const()[name = string("op_6511_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_271_cast_fp16 = layer_norm(axes = out_271_axes_0, epsilon = var_6511_to_fp16, x = x_405_cast_fp16)[name = string("out_271_cast_fp16")]; tensor layer_layers_22_post_attention_layernorm_weight_to_fp16 = const()[name = string("layer_layers_22_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(670774272)))]; tensor out_273_cast_fp16 = mul(x = out_271_cast_fp16, y = layer_layers_22_post_attention_layernorm_weight_to_fp16)[name = string("out_273_cast_fp16")]; tensor var_6517_split_sizes_0 = const()[name = string("op_6517_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_6517_axis_0 = const()[name = string("op_6517_axis_0"), val = int32(1)]; tensor var_6517_cast_fp16_0, tensor var_6517_cast_fp16_1 = split(axis = var_6517_axis_0, split_sizes = var_6517_split_sizes_0, x = out_273_cast_fp16)[name = string("op_6517_cast_fp16")]; string input_45_pad_type_0 = const()[name = string("input_45_pad_type_0"), val = string("valid")]; tensor input_45_strides_0 = const()[name = string("input_45_strides_0"), val = tensor([1, 1])]; tensor input_45_pad_0 = const()[name = string("input_45_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_45_dilations_0 = const()[name = string("input_45_dilations_0"), val = tensor([1, 1])]; int32 input_45_groups_0 = const()[name = string("input_45_groups_0"), val = int32(1)]; tensor var_6522_to_fp16 = const()[name = string("op_6522_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(670778432)))]; tensor input_45_cast_fp16 = conv(dilations = input_45_dilations_0, groups = input_45_groups_0, pad = input_45_pad_0, pad_type = input_45_pad_type_0, strides = input_45_strides_0, weight = var_6522_to_fp16, x = var_6517_cast_fp16_0)[name = string("input_45_cast_fp16")]; tensor var_6533_cast_fp16 = silu(x = input_45_cast_fp16)[name = string("op_6533_cast_fp16")]; string var_6538_pad_type_0 = const()[name = string("op_6538_pad_type_0"), val = string("valid")]; tensor var_6538_strides_0 = const()[name = string("op_6538_strides_0"), val = tensor([1, 1])]; tensor var_6538_pad_0 = const()[name = string("op_6538_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_6538_dilations_0 = const()[name = string("op_6538_dilations_0"), val = tensor([1, 1])]; int32 var_6538_groups_0 = const()[name = string("op_6538_groups_0"), val = int32(1)]; tensor var_6521_to_fp16 = const()[name = string("op_6521_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(679167104)))]; tensor var_6538_cast_fp16 = conv(dilations = var_6538_dilations_0, groups = var_6538_groups_0, pad = var_6538_pad_0, pad_type = var_6538_pad_type_0, strides = var_6538_strides_0, weight = var_6521_to_fp16, x = var_6517_cast_fp16_0)[name = string("op_6538_cast_fp16")]; tensor x_411_cast_fp16 = mul(x = var_6533_cast_fp16, y = var_6538_cast_fp16)[name = string("x_411_cast_fp16")]; string hidden_states_137_pad_type_0 = const()[name = string("hidden_states_137_pad_type_0"), val = string("valid")]; tensor hidden_states_137_strides_0 = const()[name = string("hidden_states_137_strides_0"), val = tensor([1, 1])]; tensor hidden_states_137_pad_0 = const()[name = string("hidden_states_137_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_137_dilations_0 = const()[name = string("hidden_states_137_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_137_groups_0 = const()[name = string("hidden_states_137_groups_0"), val = int32(1)]; tensor var_6520_to_fp16 = const()[name = string("op_6520_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(687555776)))]; tensor hidden_states_137_cast_fp16 = conv(dilations = hidden_states_137_dilations_0, groups = hidden_states_137_groups_0, pad = hidden_states_137_pad_0, pad_type = hidden_states_137_pad_type_0, strides = hidden_states_137_strides_0, weight = var_6520_to_fp16, x = x_411_cast_fp16)[name = string("hidden_states_137_cast_fp16")]; tensor x_413_cast_fp16 = add(x = x_403_cast_fp16, y = hidden_states_137_cast_fp16)[name = string("x_413_cast_fp16")]; int32 var_6551 = const()[name = string("op_6551"), val = int32(1)]; fp16 const_234_promoted_to_fp16 = const()[name = string("const_234_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_6554_cast_fp16 = mul(x = x_413_cast_fp16, y = const_234_promoted_to_fp16)[name = string("op_6554_cast_fp16")]; bool x_415_interleave_0 = const()[name = string("x_415_interleave_0"), val = bool(false)]; tensor x_415_cast_fp16 = concat(axis = var_6551, interleave = x_415_interleave_0, values = (x_413_cast_fp16, var_6554_cast_fp16))[name = string("x_415_cast_fp16")]; tensor out_277_axes_0 = const()[name = string("out_277_axes_0"), val = tensor([1])]; fp16 var_6564_to_fp16 = const()[name = string("op_6564_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_277_cast_fp16 = layer_norm(axes = out_277_axes_0, epsilon = var_6564_to_fp16, x = x_415_cast_fp16)[name = string("out_277_cast_fp16")]; tensor layer_layers_23_input_layernorm_weight_to_fp16 = const()[name = string("layer_layers_23_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(695944448)))]; tensor out_279_cast_fp16 = mul(x = out_277_cast_fp16, y = layer_layers_23_input_layernorm_weight_to_fp16)[name = string("out_279_cast_fp16")]; tensor var_6570_split_sizes_0 = const()[name = string("op_6570_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_6570_axis_0 = const()[name = string("op_6570_axis_0"), val = int32(1)]; tensor var_6570_cast_fp16_0, tensor var_6570_cast_fp16_1 = split(axis = var_6570_axis_0, split_sizes = var_6570_split_sizes_0, x = out_279_cast_fp16)[name = string("op_6570_cast_fp16")]; string query_states_93_pad_type_0 = const()[name = string("query_states_93_pad_type_0"), val = string("valid")]; tensor query_states_93_strides_0 = const()[name = string("query_states_93_strides_0"), val = tensor([1, 1])]; tensor query_states_93_pad_0 = const()[name = string("query_states_93_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_states_93_dilations_0 = const()[name = string("query_states_93_dilations_0"), val = tensor([1, 1])]; int32 query_states_93_groups_0 = const()[name = string("query_states_93_groups_0"), val = int32(1)]; tensor var_6592_to_fp16 = const()[name = string("op_6592_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(695948608)))]; tensor query_states_93_cast_fp16 = conv(dilations = query_states_93_dilations_0, groups = query_states_93_groups_0, pad = query_states_93_pad_0, pad_type = query_states_93_pad_type_0, strides = query_states_93_strides_0, weight = var_6592_to_fp16, x = var_6570_cast_fp16_0)[name = string("query_states_93_cast_fp16")]; string key_states_93_pad_type_0 = const()[name = string("key_states_93_pad_type_0"), val = string("valid")]; tensor key_states_93_strides_0 = const()[name = string("key_states_93_strides_0"), val = tensor([1, 1])]; tensor key_states_93_pad_0 = const()[name = string("key_states_93_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_states_93_dilations_0 = const()[name = string("key_states_93_dilations_0"), val = tensor([1, 1])]; int32 key_states_93_groups_0 = const()[name = string("key_states_93_groups_0"), val = int32(1)]; tensor var_6603_to_fp16 = const()[name = string("op_6603_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(698045824)))]; tensor key_states_93_cast_fp16 = conv(dilations = key_states_93_dilations_0, groups = key_states_93_groups_0, pad = key_states_93_pad_0, pad_type = key_states_93_pad_type_0, strides = key_states_93_strides_0, weight = var_6603_to_fp16, x = var_6570_cast_fp16_0)[name = string("key_states_93_cast_fp16")]; string value_states_93_pad_type_0 = const()[name = string("value_states_93_pad_type_0"), val = string("valid")]; tensor value_states_93_strides_0 = const()[name = string("value_states_93_strides_0"), val = tensor([1, 1])]; tensor value_states_93_pad_0 = const()[name = string("value_states_93_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_states_93_dilations_0 = const()[name = string("value_states_93_dilations_0"), val = tensor([1, 1])]; int32 value_states_93_groups_0 = const()[name = string("value_states_93_groups_0"), val = int32(1)]; tensor var_6614_to_fp16 = const()[name = string("op_6614_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(698308032)))]; tensor value_states_93_cast_fp16 = conv(dilations = value_states_93_dilations_0, groups = value_states_93_groups_0, pad = value_states_93_pad_0, pad_type = value_states_93_pad_type_0, strides = value_states_93_strides_0, weight = var_6614_to_fp16, x = var_6570_cast_fp16_0)[name = string("value_states_93_cast_fp16")]; tensor var_6622 = const()[name = string("op_6622"), val = tensor([1, 16, 64, 64])]; tensor embed_93_cast_fp16 = reshape(shape = var_6622, x = query_states_93_cast_fp16)[name = string("embed_93_cast_fp16")]; tensor var_6626 = const()[name = string("op_6626"), val = tensor([1, 2, 64, 64])]; tensor var_6627_cast_fp16 = reshape(shape = var_6626, x = key_states_93_cast_fp16)[name = string("op_6627_cast_fp16")]; tensor embed_perm_0 = const()[name = string("embed_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_6633 = const()[name = string("op_6633"), val = tensor([1, 2, 64, 64])]; tensor var_6634_cast_fp16 = reshape(shape = var_6633, x = value_states_93_cast_fp16)[name = string("op_6634_cast_fp16")]; tensor value_states_perm_0 = const()[name = string("value_states_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_6638_cast_fp16 = mul(x = embed_93_cast_fp16, y = cos_1_cast_fp16)[name = string("op_6638_cast_fp16")]; tensor var_6639_split_sizes_0 = const()[name = string("op_6639_split_sizes_0"), val = tensor([32, 32])]; int32 var_6639_axis_0 = const()[name = string("op_6639_axis_0"), val = int32(-2)]; tensor var_6639_cast_fp16_0, tensor var_6639_cast_fp16_1 = split(axis = var_6639_axis_0, split_sizes = var_6639_split_sizes_0, x = embed_93_cast_fp16)[name = string("op_6639_cast_fp16")]; fp16 const_237_promoted_to_fp16 = const()[name = string("const_237_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_6641_cast_fp16 = mul(x = var_6639_cast_fp16_1, y = const_237_promoted_to_fp16)[name = string("op_6641_cast_fp16")]; int32 var_6643 = const()[name = string("op_6643"), val = int32(-2)]; bool var_6644_interleave_0 = const()[name = string("op_6644_interleave_0"), val = bool(false)]; tensor var_6644_cast_fp16 = concat(axis = var_6643, interleave = var_6644_interleave_0, values = (var_6641_cast_fp16, var_6639_cast_fp16_0))[name = string("op_6644_cast_fp16")]; tensor var_6645_cast_fp16 = mul(x = var_6644_cast_fp16, y = sin_1_cast_fp16)[name = string("op_6645_cast_fp16")]; tensor query_states_cast_fp16 = add(x = var_6638_cast_fp16, y = var_6645_cast_fp16)[name = string("query_states_cast_fp16")]; tensor embed_cast_fp16 = transpose(perm = embed_perm_0, x = var_6627_cast_fp16)[name = string("transpose_2")]; tensor var_6648_cast_fp16 = mul(x = embed_cast_fp16, y = cos_cast_fp16)[name = string("op_6648_cast_fp16")]; tensor var_6649_split_sizes_0 = const()[name = string("op_6649_split_sizes_0"), val = tensor([32, 32])]; int32 var_6649_axis_0 = const()[name = string("op_6649_axis_0"), val = int32(-1)]; tensor var_6649_cast_fp16_0, tensor var_6649_cast_fp16_1 = split(axis = var_6649_axis_0, split_sizes = var_6649_split_sizes_0, x = embed_cast_fp16)[name = string("op_6649_cast_fp16")]; fp16 const_238_promoted_to_fp16 = const()[name = string("const_238_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_6651_cast_fp16 = mul(x = var_6649_cast_fp16_1, y = const_238_promoted_to_fp16)[name = string("op_6651_cast_fp16")]; int32 var_6653 = const()[name = string("op_6653"), val = int32(-1)]; bool var_6654_interleave_0 = const()[name = string("op_6654_interleave_0"), val = bool(false)]; tensor var_6654_cast_fp16 = concat(axis = var_6653, interleave = var_6654_interleave_0, values = (var_6651_cast_fp16, var_6649_cast_fp16_0))[name = string("op_6654_cast_fp16")]; tensor var_6655_cast_fp16 = mul(x = var_6654_cast_fp16, y = sin_cast_fp16)[name = string("op_6655_cast_fp16")]; tensor key_states_cast_fp16 = add(x = var_6648_cast_fp16, y = var_6655_cast_fp16)[name = string("key_states_cast_fp16")]; tensor expand_dims_231 = const()[name = string("expand_dims_231"), val = tensor([23])]; tensor expand_dims_232 = const()[name = string("expand_dims_232"), val = tensor([0])]; tensor expand_dims_234 = const()[name = string("expand_dims_234"), val = tensor([24])]; tensor concat_187_values3_0 = const()[name = string("concat_187_values3_0"), val = tensor([0])]; int32 concat_187_axis_0 = const()[name = string("concat_187_axis_0"), val = int32(0)]; bool concat_187_interleave_0 = const()[name = string("concat_187_interleave_0"), val = bool(false)]; tensor concat_187 = concat(axis = concat_187_axis_0, interleave = concat_187_interleave_0, values = (expand_dims_231, expand_dims_232, position_id, concat_187_values3_0))[name = string("concat_187")]; tensor concat_188_values1_0 = const()[name = string("concat_188_values1_0"), val = tensor([0])]; tensor concat_188_values3_0 = const()[name = string("concat_188_values3_0"), val = tensor([0])]; int32 concat_188_axis_0 = const()[name = string("concat_188_axis_0"), val = int32(0)]; bool concat_188_interleave_0 = const()[name = string("concat_188_interleave_0"), val = bool(false)]; tensor concat_188 = concat(axis = concat_188_axis_0, interleave = concat_188_interleave_0, values = (expand_dims_234, concat_188_values1_0, var_426, concat_188_values3_0))[name = string("concat_188")]; tensor key_cache_internal_tensor_assign_24_stride_0 = const()[name = string("key_cache_internal_tensor_assign_24_stride_0"), val = tensor([1, 1, 1, 1])]; tensor key_cache_internal_tensor_assign_24_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_24_begin_mask_0"), val = tensor([false, false, false, true])]; tensor key_cache_internal_tensor_assign_24_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_24_end_mask_0"), val = tensor([false, true, false, true])]; tensor key_cache_internal_tensor_assign_24_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_24_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor key_cache_internal_tensor_assign_24_cast_fp16 = slice_update(begin = concat_187, begin_mask = key_cache_internal_tensor_assign_24_begin_mask_0, end = concat_188, end_mask = key_cache_internal_tensor_assign_24_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_24_squeeze_mask_0, stride = key_cache_internal_tensor_assign_24_stride_0, update = key_states_cast_fp16, x = coreml_update_state_92)[name = string("key_cache_internal_tensor_assign_24_cast_fp16")]; write_state(data = key_cache_internal_tensor_assign_24_cast_fp16, input = key_cache)[name = string("coreml_update_state_404_write_state")]; tensor coreml_update_state_94 = read_state(input = key_cache)[name = string("coreml_update_state_404")]; tensor value_cache_internal_tensor_assign_24_stride_0 = const()[name = string("value_cache_internal_tensor_assign_24_stride_0"), val = tensor([1, 1, 1, 1])]; tensor value_cache_internal_tensor_assign_24_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_24_begin_mask_0"), val = tensor([false, false, false, true])]; tensor value_cache_internal_tensor_assign_24_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_24_end_mask_0"), val = tensor([false, true, false, true])]; tensor value_cache_internal_tensor_assign_24_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_24_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor value_states_cast_fp16 = transpose(perm = value_states_perm_0, x = var_6634_cast_fp16)[name = string("transpose_1")]; tensor value_cache_internal_tensor_assign_24_cast_fp16 = slice_update(begin = concat_187, begin_mask = value_cache_internal_tensor_assign_24_begin_mask_0, end = concat_188, end_mask = value_cache_internal_tensor_assign_24_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_24_squeeze_mask_0, stride = value_cache_internal_tensor_assign_24_stride_0, update = value_states_cast_fp16, x = coreml_update_state_93)[name = string("value_cache_internal_tensor_assign_24_cast_fp16")]; write_state(data = value_cache_internal_tensor_assign_24_cast_fp16, input = value_cache)[name = string("coreml_update_state_405_write_state")]; tensor coreml_update_state_95 = read_state(input = value_cache)[name = string("coreml_update_state_405")]; tensor var_6698_begin_0 = const()[name = string("op_6698_begin_0"), val = tensor([23, 0, 0, 0])]; tensor var_6698_end_0 = const()[name = string("op_6698_end_0"), val = tensor([1, 2, 2048, 64])]; tensor var_6698_end_mask_0 = const()[name = string("op_6698_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_6698_cast_fp16 = slice_by_index(begin = var_6698_begin_0, end = var_6698_end_0, end_mask = var_6698_end_mask_0, x = coreml_update_state_94)[name = string("op_6698_cast_fp16")]; tensor tile_46 = const()[name = string("tile_46"), val = tensor([1, 1])]; int32 var_6701_axis_0 = const()[name = string("op_6701_axis_0"), val = int32(1)]; tensor var_6701_cast_fp16_0, tensor var_6701_cast_fp16_1 = split(axis = var_6701_axis_0, split_sizes = tile_46, x = var_6698_cast_fp16)[name = string("op_6701_cast_fp16")]; tensor var_6708_begin_0 = const()[name = string("op_6708_begin_0"), val = tensor([23, 0, 0, 0])]; tensor var_6708_end_0 = const()[name = string("op_6708_end_0"), val = tensor([1, 2, 2048, 64])]; tensor var_6708_end_mask_0 = const()[name = string("op_6708_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_6708_cast_fp16 = slice_by_index(begin = var_6708_begin_0, end = var_6708_end_0, end_mask = var_6708_end_mask_0, x = coreml_update_state_95)[name = string("op_6708_cast_fp16")]; tensor tile_47 = const()[name = string("tile_47"), val = tensor([1, 1])]; int32 var_6711_axis_0 = const()[name = string("op_6711_axis_0"), val = int32(1)]; tensor var_6711_cast_fp16_0, tensor var_6711_cast_fp16_1 = split(axis = var_6711_axis_0, split_sizes = tile_47, x = var_6708_cast_fp16)[name = string("op_6711_cast_fp16")]; tensor var_6714_split_sizes_0 = const()[name = string("op_6714_split_sizes_0"), val = tensor([8, 8])]; int32 var_6714_axis_0 = const()[name = string("op_6714_axis_0"), val = int32(1)]; tensor var_6714_cast_fp16_0, tensor var_6714_cast_fp16_1 = split(axis = var_6714_axis_0, split_sizes = var_6714_split_sizes_0, x = query_states_cast_fp16)[name = string("op_6714_cast_fp16")]; bool attn_weights_369_transpose_x_0 = const()[name = string("attn_weights_369_transpose_x_0"), val = bool(false)]; bool attn_weights_369_transpose_y_0 = const()[name = string("attn_weights_369_transpose_y_0"), val = bool(false)]; tensor attn_weights_369_cast_fp16 = matmul(transpose_x = attn_weights_369_transpose_x_0, transpose_y = attn_weights_369_transpose_y_0, x = var_6701_cast_fp16_0, y = var_6714_cast_fp16_0)[name = string("attn_weights_369_cast_fp16")]; fp16 _inversed_attn_weights_371_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_371_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_371_cast_fp16 = mul(x = attn_weights_369_cast_fp16, y = _inversed_attn_weights_371_y_0_to_fp16)[name = string("_inversed_attn_weights_371_cast_fp16")]; tensor attn_weights_373_cast_fp16 = add(x = _inversed_attn_weights_371_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_373_cast_fp16")]; int32 var_6721 = const()[name = string("op_6721"), val = int32(2)]; tensor attn_weights_375_cast_fp16 = softmax(axis = var_6721, x = attn_weights_373_cast_fp16)[name = string("attn_weights_375_cast_fp16")]; bool var_6727_transpose_x_1 = const()[name = string("op_6727_transpose_x_1"), val = bool(true)]; bool var_6727_transpose_y_1 = const()[name = string("op_6727_transpose_y_1"), val = bool(false)]; tensor var_6727_cast_fp16 = matmul(transpose_x = var_6727_transpose_x_1, transpose_y = var_6727_transpose_y_1, x = attn_weights_375_cast_fp16, y = var_6711_cast_fp16_0)[name = string("op_6727_cast_fp16")]; bool attn_weights_377_transpose_x_0 = const()[name = string("attn_weights_377_transpose_x_0"), val = bool(false)]; bool attn_weights_377_transpose_y_0 = const()[name = string("attn_weights_377_transpose_y_0"), val = bool(false)]; tensor attn_weights_377_cast_fp16 = matmul(transpose_x = attn_weights_377_transpose_x_0, transpose_y = attn_weights_377_transpose_y_0, x = var_6701_cast_fp16_1, y = var_6714_cast_fp16_1)[name = string("attn_weights_377_cast_fp16")]; fp16 _inversed_attn_weights_379_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_379_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_379_cast_fp16 = mul(x = attn_weights_377_cast_fp16, y = _inversed_attn_weights_379_y_0_to_fp16)[name = string("_inversed_attn_weights_379_cast_fp16")]; tensor attn_weights_381_cast_fp16 = add(x = _inversed_attn_weights_379_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_381_cast_fp16")]; int32 var_6733 = const()[name = string("op_6733"), val = int32(2)]; tensor attn_weights_cast_fp16 = softmax(axis = var_6733, x = attn_weights_381_cast_fp16)[name = string("attn_weights_cast_fp16")]; bool attn_output_139_transpose_x_1 = const()[name = string("attn_output_139_transpose_x_1"), val = bool(true)]; bool attn_output_139_transpose_y_1 = const()[name = string("attn_output_139_transpose_y_1"), val = bool(false)]; tensor attn_output_139_cast_fp16 = matmul(transpose_x = attn_output_139_transpose_x_1, transpose_y = attn_output_139_transpose_y_1, x = attn_weights_cast_fp16, y = var_6711_cast_fp16_1)[name = string("attn_output_139_cast_fp16")]; int32 var_6741 = const()[name = string("op_6741"), val = int32(1)]; bool attn_output_141_interleave_0 = const()[name = string("attn_output_141_interleave_0"), val = bool(false)]; tensor attn_output_141_cast_fp16 = concat(axis = var_6741, interleave = attn_output_141_interleave_0, values = (var_6727_cast_fp16, attn_output_139_cast_fp16))[name = string("attn_output_141_cast_fp16")]; tensor var_6745_perm_0 = const()[name = string("op_6745_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_6750 = const()[name = string("op_6750"), val = tensor([1, 1024, 1, 64])]; tensor var_6745_cast_fp16 = transpose(perm = var_6745_perm_0, x = attn_output_141_cast_fp16)[name = string("transpose_0")]; tensor x_419_cast_fp16 = reshape(shape = var_6750, x = var_6745_cast_fp16)[name = string("x_419_cast_fp16")]; string hidden_states_141_pad_type_0 = const()[name = string("hidden_states_141_pad_type_0"), val = string("valid")]; tensor hidden_states_141_strides_0 = const()[name = string("hidden_states_141_strides_0"), val = tensor([1, 1])]; tensor hidden_states_141_pad_0 = const()[name = string("hidden_states_141_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_141_dilations_0 = const()[name = string("hidden_states_141_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_141_groups_0 = const()[name = string("hidden_states_141_groups_0"), val = int32(1)]; tensor var_6757_to_fp16 = const()[name = string("op_6757_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(698570240)))]; tensor hidden_states_141_cast_fp16 = conv(dilations = hidden_states_141_dilations_0, groups = hidden_states_141_groups_0, pad = hidden_states_141_pad_0, pad_type = hidden_states_141_pad_type_0, strides = hidden_states_141_strides_0, weight = var_6757_to_fp16, x = x_419_cast_fp16)[name = string("hidden_states_141_cast_fp16")]; tensor x_421_cast_fp16 = add(x = x_413_cast_fp16, y = hidden_states_141_cast_fp16)[name = string("x_421_cast_fp16")]; int32 var_6769 = const()[name = string("op_6769"), val = int32(1)]; fp16 const_243_promoted_to_fp16 = const()[name = string("const_243_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_6772_cast_fp16 = mul(x = x_421_cast_fp16, y = const_243_promoted_to_fp16)[name = string("op_6772_cast_fp16")]; bool x_423_interleave_0 = const()[name = string("x_423_interleave_0"), val = bool(false)]; tensor x_423_cast_fp16 = concat(axis = var_6769, interleave = x_423_interleave_0, values = (x_421_cast_fp16, var_6772_cast_fp16))[name = string("x_423_cast_fp16")]; tensor out_283_axes_0 = const()[name = string("out_283_axes_0"), val = tensor([1])]; fp16 var_6782_to_fp16 = const()[name = string("op_6782_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_283_cast_fp16 = layer_norm(axes = out_283_axes_0, epsilon = var_6782_to_fp16, x = x_423_cast_fp16)[name = string("out_283_cast_fp16")]; tensor layer_layers_23_post_attention_layernorm_weight_to_fp16 = const()[name = string("layer_layers_23_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(700667456)))]; tensor out_285_cast_fp16 = mul(x = out_283_cast_fp16, y = layer_layers_23_post_attention_layernorm_weight_to_fp16)[name = string("out_285_cast_fp16")]; tensor var_6788_split_sizes_0 = const()[name = string("op_6788_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_6788_axis_0 = const()[name = string("op_6788_axis_0"), val = int32(1)]; tensor var_6788_cast_fp16_0, tensor var_6788_cast_fp16_1 = split(axis = var_6788_axis_0, split_sizes = var_6788_split_sizes_0, x = out_285_cast_fp16)[name = string("op_6788_cast_fp16")]; string input_pad_type_0 = const()[name = string("input_pad_type_0"), val = string("valid")]; tensor input_strides_0 = const()[name = string("input_strides_0"), val = tensor([1, 1])]; tensor input_pad_0 = const()[name = string("input_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_dilations_0 = const()[name = string("input_dilations_0"), val = tensor([1, 1])]; int32 input_groups_0 = const()[name = string("input_groups_0"), val = int32(1)]; tensor var_6793_to_fp16 = const()[name = string("op_6793_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(700671616)))]; tensor input_cast_fp16 = conv(dilations = input_dilations_0, groups = input_groups_0, pad = input_pad_0, pad_type = input_pad_type_0, strides = input_strides_0, weight = var_6793_to_fp16, x = var_6788_cast_fp16_0)[name = string("input_cast_fp16")]; tensor var_6804_cast_fp16 = silu(x = input_cast_fp16)[name = string("op_6804_cast_fp16")]; string var_6809_pad_type_0 = const()[name = string("op_6809_pad_type_0"), val = string("valid")]; tensor var_6809_strides_0 = const()[name = string("op_6809_strides_0"), val = tensor([1, 1])]; tensor var_6809_pad_0 = const()[name = string("op_6809_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_6809_dilations_0 = const()[name = string("op_6809_dilations_0"), val = tensor([1, 1])]; int32 var_6809_groups_0 = const()[name = string("op_6809_groups_0"), val = int32(1)]; tensor var_6792_to_fp16 = const()[name = string("op_6792_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(709060288)))]; tensor var_6809_cast_fp16 = conv(dilations = var_6809_dilations_0, groups = var_6809_groups_0, pad = var_6809_pad_0, pad_type = var_6809_pad_type_0, strides = var_6809_strides_0, weight = var_6792_to_fp16, x = var_6788_cast_fp16_0)[name = string("op_6809_cast_fp16")]; tensor x_429_cast_fp16 = mul(x = var_6804_cast_fp16, y = var_6809_cast_fp16)[name = string("x_429_cast_fp16")]; string hidden_states_pad_type_0 = const()[name = string("hidden_states_pad_type_0"), val = string("valid")]; tensor hidden_states_strides_0 = const()[name = string("hidden_states_strides_0"), val = tensor([1, 1])]; tensor hidden_states_pad_0 = const()[name = string("hidden_states_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_dilations_0 = const()[name = string("hidden_states_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_groups_0 = const()[name = string("hidden_states_groups_0"), val = int32(1)]; tensor var_6791_to_fp16 = const()[name = string("op_6791_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(717448960)))]; tensor hidden_states_cast_fp16 = conv(dilations = hidden_states_dilations_0, groups = hidden_states_groups_0, pad = hidden_states_pad_0, pad_type = hidden_states_pad_type_0, strides = hidden_states_strides_0, weight = var_6791_to_fp16, x = x_429_cast_fp16)[name = string("hidden_states_cast_fp16")]; tensor x_431_cast_fp16 = add(x = x_421_cast_fp16, y = hidden_states_cast_fp16)[name = string("x_431_cast_fp16")]; int32 var_6822 = const()[name = string("op_6822"), val = int32(1)]; fp16 const_244_promoted_to_fp16 = const()[name = string("const_244_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_6825_cast_fp16 = mul(x = x_431_cast_fp16, y = const_244_promoted_to_fp16)[name = string("op_6825_cast_fp16")]; bool x_433_interleave_0 = const()[name = string("x_433_interleave_0"), val = bool(false)]; tensor x_433_cast_fp16 = concat(axis = var_6822, interleave = x_433_interleave_0, values = (x_431_cast_fp16, var_6825_cast_fp16))[name = string("x_433_cast_fp16")]; tensor out_289_axes_0 = const()[name = string("out_289_axes_0"), val = tensor([1])]; fp16 var_6835_to_fp16 = const()[name = string("op_6835_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_289_cast_fp16 = layer_norm(axes = out_289_axes_0, epsilon = var_6835_to_fp16, x = x_433_cast_fp16)[name = string("out_289_cast_fp16")]; tensor layer_norm_weight_to_fp16 = const()[name = string("layer_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(725837632)))]; tensor out_291_cast_fp16 = mul(x = out_289_cast_fp16, y = layer_norm_weight_to_fp16)[name = string("out_291_cast_fp16")]; tensor var_6841_split_sizes_0 = const()[name = string("op_6841_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_6841_axis_0 = const()[name = string("op_6841_axis_0"), val = int32(1)]; tensor output, tensor var_6841_cast_fp16_1 = split(axis = var_6841_axis_0, split_sizes = var_6841_split_sizes_0, x = out_291_cast_fp16)[name = string("op_6841_cast_fp16")]; } -> (output); func length_8(tensor inputs_embeds, state> key_cache, tensor position_id, state> value_cache) { tensor var_260 = const()[name = string("op_260"), val = tensor([[0, 1, 2, 3, 4, 5, 6, 7]])]; tensor position_ids_1 = add(x = var_260, y = position_id)[name = string("position_ids_1")]; int32 cos_batch_dims_0 = const()[name = string("cos_batch_dims_0"), val = int32(0)]; bool cos_validate_indices_0 = const()[name = string("cos_validate_indices_0"), val = bool(false)]; tensor layer_rope_emb_cos_cached_to_fp16 = const()[name = string("layer_rope_emb_cos_cached_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64)))]; int32 greater_equal_0_y_0 = const()[name = string("greater_equal_0_y_0"), val = int32(0)]; tensor greater_equal_0 = greater_equal(x = position_ids_1, y = greater_equal_0_y_0)[name = string("greater_equal_0")]; int32 slice_by_index_0 = const()[name = string("slice_by_index_0"), val = int32(32768)]; tensor add_0 = add(x = position_ids_1, y = slice_by_index_0)[name = string("add_0")]; tensor select_0 = select(a = position_ids_1, b = add_0, cond = greater_equal_0)[name = string("select_0")]; int32 cos_cast_fp16_axis_0 = const()[name = string("cos_cast_fp16_axis_0"), val = int32(0)]; tensor cos_cast_fp16 = gather(axis = cos_cast_fp16_axis_0, batch_dims = cos_batch_dims_0, indices = select_0, validate_indices = cos_validate_indices_0, x = layer_rope_emb_cos_cached_to_fp16)[name = string("cos_cast_fp16")]; int32 sin_batch_dims_0 = const()[name = string("sin_batch_dims_0"), val = int32(0)]; bool sin_validate_indices_0 = const()[name = string("sin_validate_indices_0"), val = bool(false)]; tensor layer_rope_emb_sin_cached_to_fp16 = const()[name = string("layer_rope_emb_sin_cached_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(4194432)))]; int32 sin_cast_fp16_axis_0 = const()[name = string("sin_cast_fp16_axis_0"), val = int32(0)]; tensor sin_cast_fp16 = gather(axis = sin_cast_fp16_axis_0, batch_dims = sin_batch_dims_0, indices = select_0, validate_indices = sin_validate_indices_0, x = layer_rope_emb_sin_cached_to_fp16)[name = string("sin_cast_fp16")]; tensor var_285 = const()[name = string("op_285"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(8388800)))]; tensor var_292_axes_0 = const()[name = string("op_292_axes_0"), val = tensor([2])]; tensor var_292 = expand_dims(axes = var_292_axes_0, x = position_ids_1)[name = string("op_292")]; tensor var_293 = greater(x = var_285, y = var_292)[name = string("op_293")]; tensor attention_mask_3_axes_0 = const()[name = string("attention_mask_3_axes_0"), val = tensor([1])]; string attention_mask_1_to_fp16_dtype_0 = const()[name = string("attention_mask_1_to_fp16_dtype_0"), val = string("fp16")]; tensor var_293_to_fp16 = cast(dtype = attention_mask_1_to_fp16_dtype_0, x = var_293)[name = string("cast_245")]; tensor attention_mask_3_cast_fp16 = expand_dims(axes = attention_mask_3_axes_0, x = var_293_to_fp16)[name = string("attention_mask_3_cast_fp16")]; fp16 var_301_promoted_to_fp16 = const()[name = string("op_301_promoted_to_fp16"), val = fp16(0x0p+0)]; tensor var_302_cast_fp16 = equal(x = attention_mask_3_cast_fp16, y = var_301_promoted_to_fp16)[name = string("op_302_cast_fp16")]; tensor var_303_after_broadcast_to_fp16 = const()[name = string("op_303_after_broadcast_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(726366720)))]; tensor attention_mask_cast_fp16 = select(a = attention_mask_3_cast_fp16, b = var_303_after_broadcast_to_fp16, cond = var_302_cast_fp16)[name = string("attention_mask_cast_fp16")]; tensor cos_1_perm_0 = const()[name = string("cos_1_perm_0"), val = tensor([0, -1, -2])]; tensor sin_1_perm_0 = const()[name = string("sin_1_perm_0"), val = tensor([0, -1, -2])]; tensor attn_mask_perm_0 = const()[name = string("attn_mask_perm_0"), val = tensor([0, 1, -1, -2])]; int32 var_318 = const()[name = string("op_318"), val = int32(1)]; fp16 const_4_promoted_to_fp16 = const()[name = string("const_4_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_321_cast_fp16 = mul(x = inputs_embeds, y = const_4_promoted_to_fp16)[name = string("op_321_cast_fp16")]; bool x_1_interleave_0 = const()[name = string("x_1_interleave_0"), val = bool(false)]; tensor x_1_cast_fp16 = concat(axis = var_318, interleave = x_1_interleave_0, values = (inputs_embeds, var_321_cast_fp16))[name = string("x_1_cast_fp16")]; tensor out_1_axes_0 = const()[name = string("out_1_axes_0"), val = tensor([1])]; fp16 var_331_to_fp16 = const()[name = string("op_331_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_1_cast_fp16 = layer_norm(axes = out_1_axes_0, epsilon = var_331_to_fp16, x = x_1_cast_fp16)[name = string("out_1_cast_fp16")]; tensor layer_layers_0_input_layernorm_weight_to_fp16 = const()[name = string("layer_layers_0_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(8401216)))]; tensor out_3_cast_fp16 = mul(x = out_1_cast_fp16, y = layer_layers_0_input_layernorm_weight_to_fp16)[name = string("out_3_cast_fp16")]; tensor var_337_split_sizes_0 = const()[name = string("op_337_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_337_axis_0 = const()[name = string("op_337_axis_0"), val = int32(1)]; tensor var_337_cast_fp16_0, tensor var_337_cast_fp16_1 = split(axis = var_337_axis_0, split_sizes = var_337_split_sizes_0, x = out_3_cast_fp16)[name = string("op_337_cast_fp16")]; tensor q_len_1 = const()[name = string("q_len_1"), val = tensor([8])]; string query_states_1_pad_type_0 = const()[name = string("query_states_1_pad_type_0"), val = string("valid")]; tensor query_states_1_strides_0 = const()[name = string("query_states_1_strides_0"), val = tensor([1, 1])]; tensor query_states_1_pad_0 = const()[name = string("query_states_1_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_states_1_dilations_0 = const()[name = string("query_states_1_dilations_0"), val = tensor([1, 1])]; int32 query_states_1_groups_0 = const()[name = string("query_states_1_groups_0"), val = int32(1)]; tensor var_359_to_fp16 = const()[name = string("op_359_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(8405376)))]; tensor query_states_1_cast_fp16 = conv(dilations = query_states_1_dilations_0, groups = query_states_1_groups_0, pad = query_states_1_pad_0, pad_type = query_states_1_pad_type_0, strides = query_states_1_strides_0, weight = var_359_to_fp16, x = var_337_cast_fp16_0)[name = string("query_states_1_cast_fp16")]; string key_states_1_pad_type_0 = const()[name = string("key_states_1_pad_type_0"), val = string("valid")]; tensor key_states_1_strides_0 = const()[name = string("key_states_1_strides_0"), val = tensor([1, 1])]; tensor key_states_1_pad_0 = const()[name = string("key_states_1_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_states_1_dilations_0 = const()[name = string("key_states_1_dilations_0"), val = tensor([1, 1])]; int32 key_states_1_groups_0 = const()[name = string("key_states_1_groups_0"), val = int32(1)]; tensor var_370_to_fp16 = const()[name = string("op_370_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(10502592)))]; tensor key_states_1_cast_fp16 = conv(dilations = key_states_1_dilations_0, groups = key_states_1_groups_0, pad = key_states_1_pad_0, pad_type = key_states_1_pad_type_0, strides = key_states_1_strides_0, weight = var_370_to_fp16, x = var_337_cast_fp16_0)[name = string("key_states_1_cast_fp16")]; string value_states_1_pad_type_0 = const()[name = string("value_states_1_pad_type_0"), val = string("valid")]; tensor value_states_1_strides_0 = const()[name = string("value_states_1_strides_0"), val = tensor([1, 1])]; tensor value_states_1_pad_0 = const()[name = string("value_states_1_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_states_1_dilations_0 = const()[name = string("value_states_1_dilations_0"), val = tensor([1, 1])]; int32 value_states_1_groups_0 = const()[name = string("value_states_1_groups_0"), val = int32(1)]; tensor var_381_to_fp16 = const()[name = string("op_381_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(10764800)))]; tensor value_states_1_cast_fp16 = conv(dilations = value_states_1_dilations_0, groups = value_states_1_groups_0, pad = value_states_1_pad_0, pad_type = value_states_1_pad_type_0, strides = value_states_1_strides_0, weight = var_381_to_fp16, x = var_337_cast_fp16_0)[name = string("value_states_1_cast_fp16")]; tensor var_389 = const()[name = string("op_389"), val = tensor([1, 16, 64, 8])]; tensor embed_1_cast_fp16 = reshape(shape = var_389, x = query_states_1_cast_fp16)[name = string("embed_1_cast_fp16")]; tensor var_393 = const()[name = string("op_393"), val = tensor([1, 2, 64, 8])]; tensor var_394_cast_fp16 = reshape(shape = var_393, x = key_states_1_cast_fp16)[name = string("op_394_cast_fp16")]; tensor embed_3_perm_0 = const()[name = string("embed_3_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_400 = const()[name = string("op_400"), val = tensor([1, 2, 64, 8])]; tensor var_401_cast_fp16 = reshape(shape = var_400, x = value_states_1_cast_fp16)[name = string("op_401_cast_fp16")]; tensor value_states_3_perm_0 = const()[name = string("value_states_3_perm_0"), val = tensor([0, 1, 3, 2])]; tensor cos_1_cast_fp16 = transpose(perm = cos_1_perm_0, x = cos_cast_fp16)[name = string("transpose_74")]; tensor var_405_cast_fp16 = mul(x = embed_1_cast_fp16, y = cos_1_cast_fp16)[name = string("op_405_cast_fp16")]; tensor var_406_split_sizes_0 = const()[name = string("op_406_split_sizes_0"), val = tensor([32, 32])]; int32 var_406_axis_0 = const()[name = string("op_406_axis_0"), val = int32(-2)]; tensor var_406_cast_fp16_0, tensor var_406_cast_fp16_1 = split(axis = var_406_axis_0, split_sizes = var_406_split_sizes_0, x = embed_1_cast_fp16)[name = string("op_406_cast_fp16")]; fp16 const_7_promoted_to_fp16 = const()[name = string("const_7_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_408_cast_fp16 = mul(x = var_406_cast_fp16_1, y = const_7_promoted_to_fp16)[name = string("op_408_cast_fp16")]; int32 var_410 = const()[name = string("op_410"), val = int32(-2)]; bool var_411_interleave_0 = const()[name = string("op_411_interleave_0"), val = bool(false)]; tensor var_411_cast_fp16 = concat(axis = var_410, interleave = var_411_interleave_0, values = (var_408_cast_fp16, var_406_cast_fp16_0))[name = string("op_411_cast_fp16")]; tensor sin_1_cast_fp16 = transpose(perm = sin_1_perm_0, x = sin_cast_fp16)[name = string("transpose_73")]; tensor var_412_cast_fp16 = mul(x = var_411_cast_fp16, y = sin_1_cast_fp16)[name = string("op_412_cast_fp16")]; tensor query_states_3_cast_fp16 = add(x = var_405_cast_fp16, y = var_412_cast_fp16)[name = string("query_states_3_cast_fp16")]; tensor embed_3_cast_fp16 = transpose(perm = embed_3_perm_0, x = var_394_cast_fp16)[name = string("transpose_71")]; tensor var_415_cast_fp16 = mul(x = embed_3_cast_fp16, y = cos_cast_fp16)[name = string("op_415_cast_fp16")]; tensor var_416_split_sizes_0 = const()[name = string("op_416_split_sizes_0"), val = tensor([32, 32])]; int32 var_416_axis_0 = const()[name = string("op_416_axis_0"), val = int32(-1)]; tensor var_416_cast_fp16_0, tensor var_416_cast_fp16_1 = split(axis = var_416_axis_0, split_sizes = var_416_split_sizes_0, x = embed_3_cast_fp16)[name = string("op_416_cast_fp16")]; fp16 const_8_promoted_to_fp16 = const()[name = string("const_8_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_418_cast_fp16 = mul(x = var_416_cast_fp16_1, y = const_8_promoted_to_fp16)[name = string("op_418_cast_fp16")]; int32 var_420 = const()[name = string("op_420"), val = int32(-1)]; bool var_421_interleave_0 = const()[name = string("op_421_interleave_0"), val = bool(false)]; tensor var_421_cast_fp16 = concat(axis = var_420, interleave = var_421_interleave_0, values = (var_418_cast_fp16, var_416_cast_fp16_0))[name = string("op_421_cast_fp16")]; tensor var_422_cast_fp16 = mul(x = var_421_cast_fp16, y = sin_cast_fp16)[name = string("op_422_cast_fp16")]; tensor key_states_3_cast_fp16 = add(x = var_415_cast_fp16, y = var_422_cast_fp16)[name = string("key_states_3_cast_fp16")]; tensor var_426 = add(x = position_id, y = q_len_1)[name = string("op_426")]; tensor read_state_0 = read_state(input = key_cache)[name = string("read_state_0")]; tensor expand_dims_1 = const()[name = string("expand_dims_1"), val = tensor([0])]; tensor expand_dims_2 = const()[name = string("expand_dims_2"), val = tensor([0])]; tensor expand_dims_4 = const()[name = string("expand_dims_4"), val = tensor([1])]; tensor concat_3_values3_0 = const()[name = string("concat_3_values3_0"), val = tensor([0])]; int32 concat_3_axis_0 = const()[name = string("concat_3_axis_0"), val = int32(0)]; bool concat_3_interleave_0 = const()[name = string("concat_3_interleave_0"), val = bool(false)]; tensor concat_3 = concat(axis = concat_3_axis_0, interleave = concat_3_interleave_0, values = (expand_dims_1, expand_dims_2, position_id, concat_3_values3_0))[name = string("concat_3")]; tensor concat_4_values1_0 = const()[name = string("concat_4_values1_0"), val = tensor([0])]; tensor concat_4_values3_0 = const()[name = string("concat_4_values3_0"), val = tensor([0])]; int32 concat_4_axis_0 = const()[name = string("concat_4_axis_0"), val = int32(0)]; bool concat_4_interleave_0 = const()[name = string("concat_4_interleave_0"), val = bool(false)]; tensor concat_4 = concat(axis = concat_4_axis_0, interleave = concat_4_interleave_0, values = (expand_dims_4, concat_4_values1_0, var_426, concat_4_values3_0))[name = string("concat_4")]; tensor key_cache_internal_tensor_assign_1_stride_0 = const()[name = string("key_cache_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1, 1])]; tensor key_cache_internal_tensor_assign_1_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false, true])]; tensor key_cache_internal_tensor_assign_1_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_1_end_mask_0"), val = tensor([false, true, false, true])]; tensor key_cache_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor key_cache_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_3, begin_mask = key_cache_internal_tensor_assign_1_begin_mask_0, end = concat_4, end_mask = key_cache_internal_tensor_assign_1_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_1_squeeze_mask_0, stride = key_cache_internal_tensor_assign_1_stride_0, update = key_states_3_cast_fp16, x = read_state_0)[name = string("key_cache_internal_tensor_assign_1_cast_fp16")]; write_state(data = key_cache_internal_tensor_assign_1_cast_fp16, input = key_cache)[name = string("coreml_update_state_214_write_state")]; tensor coreml_update_state_48 = read_state(input = key_cache)[name = string("coreml_update_state_214")]; tensor read_state_1 = read_state(input = value_cache)[name = string("read_state_1")]; tensor value_cache_internal_tensor_assign_1_stride_0 = const()[name = string("value_cache_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1, 1])]; tensor value_cache_internal_tensor_assign_1_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false, true])]; tensor value_cache_internal_tensor_assign_1_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_1_end_mask_0"), val = tensor([false, true, false, true])]; tensor value_cache_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor value_states_3_cast_fp16 = transpose(perm = value_states_3_perm_0, x = var_401_cast_fp16)[name = string("transpose_70")]; tensor value_cache_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_3, begin_mask = value_cache_internal_tensor_assign_1_begin_mask_0, end = concat_4, end_mask = value_cache_internal_tensor_assign_1_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_1_squeeze_mask_0, stride = value_cache_internal_tensor_assign_1_stride_0, update = value_states_3_cast_fp16, x = read_state_1)[name = string("value_cache_internal_tensor_assign_1_cast_fp16")]; write_state(data = value_cache_internal_tensor_assign_1_cast_fp16, input = value_cache)[name = string("coreml_update_state_215_write_state")]; tensor coreml_update_state_49 = read_state(input = value_cache)[name = string("coreml_update_state_215")]; tensor var_465_begin_0 = const()[name = string("op_465_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_465_end_0 = const()[name = string("op_465_end_0"), val = tensor([1, 2, 2048, 64])]; tensor var_465_end_mask_0 = const()[name = string("op_465_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_465_cast_fp16 = slice_by_index(begin = var_465_begin_0, end = var_465_end_0, end_mask = var_465_end_mask_0, x = coreml_update_state_48)[name = string("op_465_cast_fp16")]; tensor tile_0 = const()[name = string("tile_0"), val = tensor([1, 1])]; int32 var_468_axis_0 = const()[name = string("op_468_axis_0"), val = int32(1)]; tensor var_468_cast_fp16_0, tensor var_468_cast_fp16_1 = split(axis = var_468_axis_0, split_sizes = tile_0, x = var_465_cast_fp16)[name = string("op_468_cast_fp16")]; tensor var_475_begin_0 = const()[name = string("op_475_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_475_end_0 = const()[name = string("op_475_end_0"), val = tensor([1, 2, 2048, 64])]; tensor var_475_end_mask_0 = const()[name = string("op_475_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_475_cast_fp16 = slice_by_index(begin = var_475_begin_0, end = var_475_end_0, end_mask = var_475_end_mask_0, x = coreml_update_state_49)[name = string("op_475_cast_fp16")]; tensor tile_1 = const()[name = string("tile_1"), val = tensor([1, 1])]; int32 var_478_axis_0 = const()[name = string("op_478_axis_0"), val = int32(1)]; tensor var_478_cast_fp16_0, tensor var_478_cast_fp16_1 = split(axis = var_478_axis_0, split_sizes = tile_1, x = var_475_cast_fp16)[name = string("op_478_cast_fp16")]; tensor var_481_split_sizes_0 = const()[name = string("op_481_split_sizes_0"), val = tensor([8, 8])]; int32 var_481_axis_0 = const()[name = string("op_481_axis_0"), val = int32(1)]; tensor var_481_cast_fp16_0, tensor var_481_cast_fp16_1 = split(axis = var_481_axis_0, split_sizes = var_481_split_sizes_0, x = query_states_3_cast_fp16)[name = string("op_481_cast_fp16")]; bool attn_weights_1_transpose_x_0 = const()[name = string("attn_weights_1_transpose_x_0"), val = bool(false)]; bool attn_weights_1_transpose_y_0 = const()[name = string("attn_weights_1_transpose_y_0"), val = bool(false)]; tensor attn_weights_1_cast_fp16 = matmul(transpose_x = attn_weights_1_transpose_x_0, transpose_y = attn_weights_1_transpose_y_0, x = var_468_cast_fp16_0, y = var_481_cast_fp16_0)[name = string("attn_weights_1_cast_fp16")]; fp16 _inversed_attn_weights_3_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_3_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_3_cast_fp16 = mul(x = attn_weights_1_cast_fp16, y = _inversed_attn_weights_3_y_0_to_fp16)[name = string("_inversed_attn_weights_3_cast_fp16")]; tensor attn_mask_cast_fp16 = transpose(perm = attn_mask_perm_0, x = attention_mask_cast_fp16)[name = string("transpose_72")]; tensor attn_weights_5_cast_fp16 = add(x = _inversed_attn_weights_3_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_5_cast_fp16")]; int32 var_488 = const()[name = string("op_488"), val = int32(2)]; tensor attn_weights_7_cast_fp16 = softmax(axis = var_488, x = attn_weights_5_cast_fp16)[name = string("attn_weights_7_cast_fp16")]; bool var_494_transpose_x_1 = const()[name = string("op_494_transpose_x_1"), val = bool(true)]; bool var_494_transpose_y_1 = const()[name = string("op_494_transpose_y_1"), val = bool(false)]; tensor var_494_cast_fp16 = matmul(transpose_x = var_494_transpose_x_1, transpose_y = var_494_transpose_y_1, x = attn_weights_7_cast_fp16, y = var_478_cast_fp16_0)[name = string("op_494_cast_fp16")]; bool attn_weights_9_transpose_x_0 = const()[name = string("attn_weights_9_transpose_x_0"), val = bool(false)]; bool attn_weights_9_transpose_y_0 = const()[name = string("attn_weights_9_transpose_y_0"), val = bool(false)]; tensor attn_weights_9_cast_fp16 = matmul(transpose_x = attn_weights_9_transpose_x_0, transpose_y = attn_weights_9_transpose_y_0, x = var_468_cast_fp16_1, y = var_481_cast_fp16_1)[name = string("attn_weights_9_cast_fp16")]; fp16 _inversed_attn_weights_11_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_11_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_11_cast_fp16 = mul(x = attn_weights_9_cast_fp16, y = _inversed_attn_weights_11_y_0_to_fp16)[name = string("_inversed_attn_weights_11_cast_fp16")]; tensor attn_weights_13_cast_fp16 = add(x = _inversed_attn_weights_11_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_13_cast_fp16")]; int32 var_500 = const()[name = string("op_500"), val = int32(2)]; tensor attn_weights_15_cast_fp16 = softmax(axis = var_500, x = attn_weights_13_cast_fp16)[name = string("attn_weights_15_cast_fp16")]; bool attn_output_1_transpose_x_1 = const()[name = string("attn_output_1_transpose_x_1"), val = bool(true)]; bool attn_output_1_transpose_y_1 = const()[name = string("attn_output_1_transpose_y_1"), val = bool(false)]; tensor attn_output_1_cast_fp16 = matmul(transpose_x = attn_output_1_transpose_x_1, transpose_y = attn_output_1_transpose_y_1, x = attn_weights_15_cast_fp16, y = var_478_cast_fp16_1)[name = string("attn_output_1_cast_fp16")]; int32 var_508 = const()[name = string("op_508"), val = int32(1)]; bool attn_output_3_interleave_0 = const()[name = string("attn_output_3_interleave_0"), val = bool(false)]; tensor attn_output_3_cast_fp16 = concat(axis = var_508, interleave = attn_output_3_interleave_0, values = (var_494_cast_fp16, attn_output_1_cast_fp16))[name = string("attn_output_3_cast_fp16")]; tensor var_512_perm_0 = const()[name = string("op_512_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_517 = const()[name = string("op_517"), val = tensor([1, 1024, 1, 8])]; tensor var_512_cast_fp16 = transpose(perm = var_512_perm_0, x = attn_output_3_cast_fp16)[name = string("transpose_69")]; tensor x_5_cast_fp16 = reshape(shape = var_517, x = var_512_cast_fp16)[name = string("x_5_cast_fp16")]; string hidden_states_3_pad_type_0 = const()[name = string("hidden_states_3_pad_type_0"), val = string("valid")]; tensor hidden_states_3_strides_0 = const()[name = string("hidden_states_3_strides_0"), val = tensor([1, 1])]; tensor hidden_states_3_pad_0 = const()[name = string("hidden_states_3_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_3_dilations_0 = const()[name = string("hidden_states_3_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_3_groups_0 = const()[name = string("hidden_states_3_groups_0"), val = int32(1)]; tensor var_524_to_fp16 = const()[name = string("op_524_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(11027008)))]; tensor hidden_states_3_cast_fp16 = conv(dilations = hidden_states_3_dilations_0, groups = hidden_states_3_groups_0, pad = hidden_states_3_pad_0, pad_type = hidden_states_3_pad_type_0, strides = hidden_states_3_strides_0, weight = var_524_to_fp16, x = x_5_cast_fp16)[name = string("hidden_states_3_cast_fp16")]; tensor x_7_cast_fp16 = add(x = inputs_embeds, y = hidden_states_3_cast_fp16)[name = string("x_7_cast_fp16")]; int32 var_536 = const()[name = string("op_536"), val = int32(1)]; fp16 const_13_promoted_to_fp16 = const()[name = string("const_13_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_539_cast_fp16 = mul(x = x_7_cast_fp16, y = const_13_promoted_to_fp16)[name = string("op_539_cast_fp16")]; bool x_9_interleave_0 = const()[name = string("x_9_interleave_0"), val = bool(false)]; tensor x_9_cast_fp16 = concat(axis = var_536, interleave = x_9_interleave_0, values = (x_7_cast_fp16, var_539_cast_fp16))[name = string("x_9_cast_fp16")]; tensor out_7_axes_0 = const()[name = string("out_7_axes_0"), val = tensor([1])]; fp16 var_549_to_fp16 = const()[name = string("op_549_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_7_cast_fp16 = layer_norm(axes = out_7_axes_0, epsilon = var_549_to_fp16, x = x_9_cast_fp16)[name = string("out_7_cast_fp16")]; tensor layer_layers_0_post_attention_layernorm_weight_to_fp16 = const()[name = string("layer_layers_0_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(13124224)))]; tensor out_9_cast_fp16 = mul(x = out_7_cast_fp16, y = layer_layers_0_post_attention_layernorm_weight_to_fp16)[name = string("out_9_cast_fp16")]; tensor var_555_split_sizes_0 = const()[name = string("op_555_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_555_axis_0 = const()[name = string("op_555_axis_0"), val = int32(1)]; tensor var_555_cast_fp16_0, tensor var_555_cast_fp16_1 = split(axis = var_555_axis_0, split_sizes = var_555_split_sizes_0, x = out_9_cast_fp16)[name = string("op_555_cast_fp16")]; string input_1_pad_type_0 = const()[name = string("input_1_pad_type_0"), val = string("valid")]; tensor input_1_strides_0 = const()[name = string("input_1_strides_0"), val = tensor([1, 1])]; tensor input_1_pad_0 = const()[name = string("input_1_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_1_dilations_0 = const()[name = string("input_1_dilations_0"), val = tensor([1, 1])]; int32 input_1_groups_0 = const()[name = string("input_1_groups_0"), val = int32(1)]; tensor var_560_to_fp16 = const()[name = string("op_560_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(13128384)))]; tensor input_1_cast_fp16 = conv(dilations = input_1_dilations_0, groups = input_1_groups_0, pad = input_1_pad_0, pad_type = input_1_pad_type_0, strides = input_1_strides_0, weight = var_560_to_fp16, x = var_555_cast_fp16_0)[name = string("input_1_cast_fp16")]; tensor var_571_cast_fp16 = silu(x = input_1_cast_fp16)[name = string("op_571_cast_fp16")]; string var_576_pad_type_0 = const()[name = string("op_576_pad_type_0"), val = string("valid")]; tensor var_576_strides_0 = const()[name = string("op_576_strides_0"), val = tensor([1, 1])]; tensor var_576_pad_0 = const()[name = string("op_576_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_576_dilations_0 = const()[name = string("op_576_dilations_0"), val = tensor([1, 1])]; int32 var_576_groups_0 = const()[name = string("op_576_groups_0"), val = int32(1)]; tensor var_559_to_fp16 = const()[name = string("op_559_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(21517056)))]; tensor var_576_cast_fp16 = conv(dilations = var_576_dilations_0, groups = var_576_groups_0, pad = var_576_pad_0, pad_type = var_576_pad_type_0, strides = var_576_strides_0, weight = var_559_to_fp16, x = var_555_cast_fp16_0)[name = string("op_576_cast_fp16")]; tensor x_15_cast_fp16 = mul(x = var_571_cast_fp16, y = var_576_cast_fp16)[name = string("x_15_cast_fp16")]; string hidden_states_5_pad_type_0 = const()[name = string("hidden_states_5_pad_type_0"), val = string("valid")]; tensor hidden_states_5_strides_0 = const()[name = string("hidden_states_5_strides_0"), val = tensor([1, 1])]; tensor hidden_states_5_pad_0 = const()[name = string("hidden_states_5_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_5_dilations_0 = const()[name = string("hidden_states_5_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_5_groups_0 = const()[name = string("hidden_states_5_groups_0"), val = int32(1)]; tensor var_558_to_fp16 = const()[name = string("op_558_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(29905728)))]; tensor hidden_states_5_cast_fp16 = conv(dilations = hidden_states_5_dilations_0, groups = hidden_states_5_groups_0, pad = hidden_states_5_pad_0, pad_type = hidden_states_5_pad_type_0, strides = hidden_states_5_strides_0, weight = var_558_to_fp16, x = x_15_cast_fp16)[name = string("hidden_states_5_cast_fp16")]; tensor x_17_cast_fp16 = add(x = x_7_cast_fp16, y = hidden_states_5_cast_fp16)[name = string("x_17_cast_fp16")]; int32 var_589 = const()[name = string("op_589"), val = int32(1)]; fp16 const_14_promoted_to_fp16 = const()[name = string("const_14_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_592_cast_fp16 = mul(x = x_17_cast_fp16, y = const_14_promoted_to_fp16)[name = string("op_592_cast_fp16")]; bool x_19_interleave_0 = const()[name = string("x_19_interleave_0"), val = bool(false)]; tensor x_19_cast_fp16 = concat(axis = var_589, interleave = x_19_interleave_0, values = (x_17_cast_fp16, var_592_cast_fp16))[name = string("x_19_cast_fp16")]; tensor out_13_axes_0 = const()[name = string("out_13_axes_0"), val = tensor([1])]; fp16 var_602_to_fp16 = const()[name = string("op_602_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_13_cast_fp16 = layer_norm(axes = out_13_axes_0, epsilon = var_602_to_fp16, x = x_19_cast_fp16)[name = string("out_13_cast_fp16")]; tensor layer_layers_1_input_layernorm_weight_to_fp16 = const()[name = string("layer_layers_1_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(38294400)))]; tensor out_15_cast_fp16 = mul(x = out_13_cast_fp16, y = layer_layers_1_input_layernorm_weight_to_fp16)[name = string("out_15_cast_fp16")]; tensor var_608_split_sizes_0 = const()[name = string("op_608_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_608_axis_0 = const()[name = string("op_608_axis_0"), val = int32(1)]; tensor var_608_cast_fp16_0, tensor var_608_cast_fp16_1 = split(axis = var_608_axis_0, split_sizes = var_608_split_sizes_0, x = out_15_cast_fp16)[name = string("op_608_cast_fp16")]; string query_states_5_pad_type_0 = const()[name = string("query_states_5_pad_type_0"), val = string("valid")]; tensor query_states_5_strides_0 = const()[name = string("query_states_5_strides_0"), val = tensor([1, 1])]; tensor query_states_5_pad_0 = const()[name = string("query_states_5_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_states_5_dilations_0 = const()[name = string("query_states_5_dilations_0"), val = tensor([1, 1])]; int32 query_states_5_groups_0 = const()[name = string("query_states_5_groups_0"), val = int32(1)]; tensor var_630_to_fp16 = const()[name = string("op_630_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(38298560)))]; tensor query_states_5_cast_fp16 = conv(dilations = query_states_5_dilations_0, groups = query_states_5_groups_0, pad = query_states_5_pad_0, pad_type = query_states_5_pad_type_0, strides = query_states_5_strides_0, weight = var_630_to_fp16, x = var_608_cast_fp16_0)[name = string("query_states_5_cast_fp16")]; string key_states_5_pad_type_0 = const()[name = string("key_states_5_pad_type_0"), val = string("valid")]; tensor key_states_5_strides_0 = const()[name = string("key_states_5_strides_0"), val = tensor([1, 1])]; tensor key_states_5_pad_0 = const()[name = string("key_states_5_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_states_5_dilations_0 = const()[name = string("key_states_5_dilations_0"), val = tensor([1, 1])]; int32 key_states_5_groups_0 = const()[name = string("key_states_5_groups_0"), val = int32(1)]; tensor var_641_to_fp16 = const()[name = string("op_641_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(40395776)))]; tensor key_states_5_cast_fp16 = conv(dilations = key_states_5_dilations_0, groups = key_states_5_groups_0, pad = key_states_5_pad_0, pad_type = key_states_5_pad_type_0, strides = key_states_5_strides_0, weight = var_641_to_fp16, x = var_608_cast_fp16_0)[name = string("key_states_5_cast_fp16")]; string value_states_5_pad_type_0 = const()[name = string("value_states_5_pad_type_0"), val = string("valid")]; tensor value_states_5_strides_0 = const()[name = string("value_states_5_strides_0"), val = tensor([1, 1])]; tensor value_states_5_pad_0 = const()[name = string("value_states_5_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_states_5_dilations_0 = const()[name = string("value_states_5_dilations_0"), val = tensor([1, 1])]; int32 value_states_5_groups_0 = const()[name = string("value_states_5_groups_0"), val = int32(1)]; tensor var_652_to_fp16 = const()[name = string("op_652_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(40657984)))]; tensor value_states_5_cast_fp16 = conv(dilations = value_states_5_dilations_0, groups = value_states_5_groups_0, pad = value_states_5_pad_0, pad_type = value_states_5_pad_type_0, strides = value_states_5_strides_0, weight = var_652_to_fp16, x = var_608_cast_fp16_0)[name = string("value_states_5_cast_fp16")]; tensor var_660 = const()[name = string("op_660"), val = tensor([1, 16, 64, 8])]; tensor embed_5_cast_fp16 = reshape(shape = var_660, x = query_states_5_cast_fp16)[name = string("embed_5_cast_fp16")]; tensor var_664 = const()[name = string("op_664"), val = tensor([1, 2, 64, 8])]; tensor var_665_cast_fp16 = reshape(shape = var_664, x = key_states_5_cast_fp16)[name = string("op_665_cast_fp16")]; tensor embed_7_perm_0 = const()[name = string("embed_7_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_671 = const()[name = string("op_671"), val = tensor([1, 2, 64, 8])]; tensor var_672_cast_fp16 = reshape(shape = var_671, x = value_states_5_cast_fp16)[name = string("op_672_cast_fp16")]; tensor value_states_7_perm_0 = const()[name = string("value_states_7_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_676_cast_fp16 = mul(x = embed_5_cast_fp16, y = cos_1_cast_fp16)[name = string("op_676_cast_fp16")]; tensor var_677_split_sizes_0 = const()[name = string("op_677_split_sizes_0"), val = tensor([32, 32])]; int32 var_677_axis_0 = const()[name = string("op_677_axis_0"), val = int32(-2)]; tensor var_677_cast_fp16_0, tensor var_677_cast_fp16_1 = split(axis = var_677_axis_0, split_sizes = var_677_split_sizes_0, x = embed_5_cast_fp16)[name = string("op_677_cast_fp16")]; fp16 const_17_promoted_to_fp16 = const()[name = string("const_17_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_679_cast_fp16 = mul(x = var_677_cast_fp16_1, y = const_17_promoted_to_fp16)[name = string("op_679_cast_fp16")]; int32 var_681 = const()[name = string("op_681"), val = int32(-2)]; bool var_682_interleave_0 = const()[name = string("op_682_interleave_0"), val = bool(false)]; tensor var_682_cast_fp16 = concat(axis = var_681, interleave = var_682_interleave_0, values = (var_679_cast_fp16, var_677_cast_fp16_0))[name = string("op_682_cast_fp16")]; tensor var_683_cast_fp16 = mul(x = var_682_cast_fp16, y = sin_1_cast_fp16)[name = string("op_683_cast_fp16")]; tensor query_states_7_cast_fp16 = add(x = var_676_cast_fp16, y = var_683_cast_fp16)[name = string("query_states_7_cast_fp16")]; tensor embed_7_cast_fp16 = transpose(perm = embed_7_perm_0, x = var_665_cast_fp16)[name = string("transpose_68")]; tensor var_686_cast_fp16 = mul(x = embed_7_cast_fp16, y = cos_cast_fp16)[name = string("op_686_cast_fp16")]; tensor var_687_split_sizes_0 = const()[name = string("op_687_split_sizes_0"), val = tensor([32, 32])]; int32 var_687_axis_0 = const()[name = string("op_687_axis_0"), val = int32(-1)]; tensor var_687_cast_fp16_0, tensor var_687_cast_fp16_1 = split(axis = var_687_axis_0, split_sizes = var_687_split_sizes_0, x = embed_7_cast_fp16)[name = string("op_687_cast_fp16")]; fp16 const_18_promoted_to_fp16 = const()[name = string("const_18_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_689_cast_fp16 = mul(x = var_687_cast_fp16_1, y = const_18_promoted_to_fp16)[name = string("op_689_cast_fp16")]; int32 var_691 = const()[name = string("op_691"), val = int32(-1)]; bool var_692_interleave_0 = const()[name = string("op_692_interleave_0"), val = bool(false)]; tensor var_692_cast_fp16 = concat(axis = var_691, interleave = var_692_interleave_0, values = (var_689_cast_fp16, var_687_cast_fp16_0))[name = string("op_692_cast_fp16")]; tensor var_693_cast_fp16 = mul(x = var_692_cast_fp16, y = sin_cast_fp16)[name = string("op_693_cast_fp16")]; tensor key_states_7_cast_fp16 = add(x = var_686_cast_fp16, y = var_693_cast_fp16)[name = string("key_states_7_cast_fp16")]; tensor expand_dims_11 = const()[name = string("expand_dims_11"), val = tensor([1])]; tensor expand_dims_12 = const()[name = string("expand_dims_12"), val = tensor([0])]; tensor expand_dims_14 = const()[name = string("expand_dims_14"), val = tensor([2])]; tensor concat_11_values3_0 = const()[name = string("concat_11_values3_0"), val = tensor([0])]; int32 concat_11_axis_0 = const()[name = string("concat_11_axis_0"), val = int32(0)]; bool concat_11_interleave_0 = const()[name = string("concat_11_interleave_0"), val = bool(false)]; tensor concat_11 = concat(axis = concat_11_axis_0, interleave = concat_11_interleave_0, values = (expand_dims_11, expand_dims_12, position_id, concat_11_values3_0))[name = string("concat_11")]; tensor concat_12_values1_0 = const()[name = string("concat_12_values1_0"), val = tensor([0])]; tensor concat_12_values3_0 = const()[name = string("concat_12_values3_0"), val = tensor([0])]; int32 concat_12_axis_0 = const()[name = string("concat_12_axis_0"), val = int32(0)]; bool concat_12_interleave_0 = const()[name = string("concat_12_interleave_0"), val = bool(false)]; tensor concat_12 = concat(axis = concat_12_axis_0, interleave = concat_12_interleave_0, values = (expand_dims_14, concat_12_values1_0, var_426, concat_12_values3_0))[name = string("concat_12")]; tensor key_cache_internal_tensor_assign_2_stride_0 = const()[name = string("key_cache_internal_tensor_assign_2_stride_0"), val = tensor([1, 1, 1, 1])]; tensor key_cache_internal_tensor_assign_2_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_2_begin_mask_0"), val = tensor([false, false, false, true])]; tensor key_cache_internal_tensor_assign_2_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_2_end_mask_0"), val = tensor([false, true, false, true])]; tensor key_cache_internal_tensor_assign_2_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_2_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor key_cache_internal_tensor_assign_2_cast_fp16 = slice_update(begin = concat_11, begin_mask = key_cache_internal_tensor_assign_2_begin_mask_0, end = concat_12, end_mask = key_cache_internal_tensor_assign_2_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_2_squeeze_mask_0, stride = key_cache_internal_tensor_assign_2_stride_0, update = key_states_7_cast_fp16, x = coreml_update_state_48)[name = string("key_cache_internal_tensor_assign_2_cast_fp16")]; write_state(data = key_cache_internal_tensor_assign_2_cast_fp16, input = key_cache)[name = string("coreml_update_state_216_write_state")]; tensor coreml_update_state_50 = read_state(input = key_cache)[name = string("coreml_update_state_216")]; tensor value_cache_internal_tensor_assign_2_stride_0 = const()[name = string("value_cache_internal_tensor_assign_2_stride_0"), val = tensor([1, 1, 1, 1])]; tensor value_cache_internal_tensor_assign_2_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_2_begin_mask_0"), val = tensor([false, false, false, true])]; tensor value_cache_internal_tensor_assign_2_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_2_end_mask_0"), val = tensor([false, true, false, true])]; tensor value_cache_internal_tensor_assign_2_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_2_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor value_states_7_cast_fp16 = transpose(perm = value_states_7_perm_0, x = var_672_cast_fp16)[name = string("transpose_67")]; tensor value_cache_internal_tensor_assign_2_cast_fp16 = slice_update(begin = concat_11, begin_mask = value_cache_internal_tensor_assign_2_begin_mask_0, end = concat_12, end_mask = value_cache_internal_tensor_assign_2_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_2_squeeze_mask_0, stride = value_cache_internal_tensor_assign_2_stride_0, update = value_states_7_cast_fp16, x = coreml_update_state_49)[name = string("value_cache_internal_tensor_assign_2_cast_fp16")]; write_state(data = value_cache_internal_tensor_assign_2_cast_fp16, input = value_cache)[name = string("coreml_update_state_217_write_state")]; tensor coreml_update_state_51 = read_state(input = value_cache)[name = string("coreml_update_state_217")]; tensor var_736_begin_0 = const()[name = string("op_736_begin_0"), val = tensor([1, 0, 0, 0])]; tensor var_736_end_0 = const()[name = string("op_736_end_0"), val = tensor([2, 2, 2048, 64])]; tensor var_736_end_mask_0 = const()[name = string("op_736_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_736_cast_fp16 = slice_by_index(begin = var_736_begin_0, end = var_736_end_0, end_mask = var_736_end_mask_0, x = coreml_update_state_50)[name = string("op_736_cast_fp16")]; tensor tile_2 = const()[name = string("tile_2"), val = tensor([1, 1])]; int32 var_739_axis_0 = const()[name = string("op_739_axis_0"), val = int32(1)]; tensor var_739_cast_fp16_0, tensor var_739_cast_fp16_1 = split(axis = var_739_axis_0, split_sizes = tile_2, x = var_736_cast_fp16)[name = string("op_739_cast_fp16")]; tensor var_746_begin_0 = const()[name = string("op_746_begin_0"), val = tensor([1, 0, 0, 0])]; tensor var_746_end_0 = const()[name = string("op_746_end_0"), val = tensor([2, 2, 2048, 64])]; tensor var_746_end_mask_0 = const()[name = string("op_746_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_746_cast_fp16 = slice_by_index(begin = var_746_begin_0, end = var_746_end_0, end_mask = var_746_end_mask_0, x = coreml_update_state_51)[name = string("op_746_cast_fp16")]; tensor tile_3 = const()[name = string("tile_3"), val = tensor([1, 1])]; int32 var_749_axis_0 = const()[name = string("op_749_axis_0"), val = int32(1)]; tensor var_749_cast_fp16_0, tensor var_749_cast_fp16_1 = split(axis = var_749_axis_0, split_sizes = tile_3, x = var_746_cast_fp16)[name = string("op_749_cast_fp16")]; tensor var_752_split_sizes_0 = const()[name = string("op_752_split_sizes_0"), val = tensor([8, 8])]; int32 var_752_axis_0 = const()[name = string("op_752_axis_0"), val = int32(1)]; tensor var_752_cast_fp16_0, tensor var_752_cast_fp16_1 = split(axis = var_752_axis_0, split_sizes = var_752_split_sizes_0, x = query_states_7_cast_fp16)[name = string("op_752_cast_fp16")]; bool attn_weights_17_transpose_x_0 = const()[name = string("attn_weights_17_transpose_x_0"), val = bool(false)]; bool attn_weights_17_transpose_y_0 = const()[name = string("attn_weights_17_transpose_y_0"), val = bool(false)]; tensor attn_weights_17_cast_fp16 = matmul(transpose_x = attn_weights_17_transpose_x_0, transpose_y = attn_weights_17_transpose_y_0, x = var_739_cast_fp16_0, y = var_752_cast_fp16_0)[name = string("attn_weights_17_cast_fp16")]; fp16 _inversed_attn_weights_19_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_19_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_19_cast_fp16 = mul(x = attn_weights_17_cast_fp16, y = _inversed_attn_weights_19_y_0_to_fp16)[name = string("_inversed_attn_weights_19_cast_fp16")]; tensor attn_weights_21_cast_fp16 = add(x = _inversed_attn_weights_19_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_21_cast_fp16")]; int32 var_759 = const()[name = string("op_759"), val = int32(2)]; tensor attn_weights_23_cast_fp16 = softmax(axis = var_759, x = attn_weights_21_cast_fp16)[name = string("attn_weights_23_cast_fp16")]; bool var_765_transpose_x_1 = const()[name = string("op_765_transpose_x_1"), val = bool(true)]; bool var_765_transpose_y_1 = const()[name = string("op_765_transpose_y_1"), val = bool(false)]; tensor var_765_cast_fp16 = matmul(transpose_x = var_765_transpose_x_1, transpose_y = var_765_transpose_y_1, x = attn_weights_23_cast_fp16, y = var_749_cast_fp16_0)[name = string("op_765_cast_fp16")]; bool attn_weights_25_transpose_x_0 = const()[name = string("attn_weights_25_transpose_x_0"), val = bool(false)]; bool attn_weights_25_transpose_y_0 = const()[name = string("attn_weights_25_transpose_y_0"), val = bool(false)]; tensor attn_weights_25_cast_fp16 = matmul(transpose_x = attn_weights_25_transpose_x_0, transpose_y = attn_weights_25_transpose_y_0, x = var_739_cast_fp16_1, y = var_752_cast_fp16_1)[name = string("attn_weights_25_cast_fp16")]; fp16 _inversed_attn_weights_27_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_27_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_27_cast_fp16 = mul(x = attn_weights_25_cast_fp16, y = _inversed_attn_weights_27_y_0_to_fp16)[name = string("_inversed_attn_weights_27_cast_fp16")]; tensor attn_weights_29_cast_fp16 = add(x = _inversed_attn_weights_27_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_29_cast_fp16")]; int32 var_771 = const()[name = string("op_771"), val = int32(2)]; tensor attn_weights_31_cast_fp16 = softmax(axis = var_771, x = attn_weights_29_cast_fp16)[name = string("attn_weights_31_cast_fp16")]; bool attn_output_7_transpose_x_1 = const()[name = string("attn_output_7_transpose_x_1"), val = bool(true)]; bool attn_output_7_transpose_y_1 = const()[name = string("attn_output_7_transpose_y_1"), val = bool(false)]; tensor attn_output_7_cast_fp16 = matmul(transpose_x = attn_output_7_transpose_x_1, transpose_y = attn_output_7_transpose_y_1, x = attn_weights_31_cast_fp16, y = var_749_cast_fp16_1)[name = string("attn_output_7_cast_fp16")]; int32 var_779 = const()[name = string("op_779"), val = int32(1)]; bool attn_output_9_interleave_0 = const()[name = string("attn_output_9_interleave_0"), val = bool(false)]; tensor attn_output_9_cast_fp16 = concat(axis = var_779, interleave = attn_output_9_interleave_0, values = (var_765_cast_fp16, attn_output_7_cast_fp16))[name = string("attn_output_9_cast_fp16")]; tensor var_783_perm_0 = const()[name = string("op_783_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_788 = const()[name = string("op_788"), val = tensor([1, 1024, 1, 8])]; tensor var_783_cast_fp16 = transpose(perm = var_783_perm_0, x = attn_output_9_cast_fp16)[name = string("transpose_66")]; tensor x_23_cast_fp16 = reshape(shape = var_788, x = var_783_cast_fp16)[name = string("x_23_cast_fp16")]; string hidden_states_9_pad_type_0 = const()[name = string("hidden_states_9_pad_type_0"), val = string("valid")]; tensor hidden_states_9_strides_0 = const()[name = string("hidden_states_9_strides_0"), val = tensor([1, 1])]; tensor hidden_states_9_pad_0 = const()[name = string("hidden_states_9_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_9_dilations_0 = const()[name = string("hidden_states_9_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_9_groups_0 = const()[name = string("hidden_states_9_groups_0"), val = int32(1)]; tensor var_795_to_fp16 = const()[name = string("op_795_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(40920192)))]; tensor hidden_states_9_cast_fp16 = conv(dilations = hidden_states_9_dilations_0, groups = hidden_states_9_groups_0, pad = hidden_states_9_pad_0, pad_type = hidden_states_9_pad_type_0, strides = hidden_states_9_strides_0, weight = var_795_to_fp16, x = x_23_cast_fp16)[name = string("hidden_states_9_cast_fp16")]; tensor x_25_cast_fp16 = add(x = x_17_cast_fp16, y = hidden_states_9_cast_fp16)[name = string("x_25_cast_fp16")]; int32 var_807 = const()[name = string("op_807"), val = int32(1)]; fp16 const_23_promoted_to_fp16 = const()[name = string("const_23_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_810_cast_fp16 = mul(x = x_25_cast_fp16, y = const_23_promoted_to_fp16)[name = string("op_810_cast_fp16")]; bool x_27_interleave_0 = const()[name = string("x_27_interleave_0"), val = bool(false)]; tensor x_27_cast_fp16 = concat(axis = var_807, interleave = x_27_interleave_0, values = (x_25_cast_fp16, var_810_cast_fp16))[name = string("x_27_cast_fp16")]; tensor out_19_axes_0 = const()[name = string("out_19_axes_0"), val = tensor([1])]; fp16 var_820_to_fp16 = const()[name = string("op_820_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_19_cast_fp16 = layer_norm(axes = out_19_axes_0, epsilon = var_820_to_fp16, x = x_27_cast_fp16)[name = string("out_19_cast_fp16")]; tensor layer_layers_1_post_attention_layernorm_weight_to_fp16 = const()[name = string("layer_layers_1_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(43017408)))]; tensor out_21_cast_fp16 = mul(x = out_19_cast_fp16, y = layer_layers_1_post_attention_layernorm_weight_to_fp16)[name = string("out_21_cast_fp16")]; tensor var_826_split_sizes_0 = const()[name = string("op_826_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_826_axis_0 = const()[name = string("op_826_axis_0"), val = int32(1)]; tensor var_826_cast_fp16_0, tensor var_826_cast_fp16_1 = split(axis = var_826_axis_0, split_sizes = var_826_split_sizes_0, x = out_21_cast_fp16)[name = string("op_826_cast_fp16")]; string input_3_pad_type_0 = const()[name = string("input_3_pad_type_0"), val = string("valid")]; tensor input_3_strides_0 = const()[name = string("input_3_strides_0"), val = tensor([1, 1])]; tensor input_3_pad_0 = const()[name = string("input_3_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_3_dilations_0 = const()[name = string("input_3_dilations_0"), val = tensor([1, 1])]; int32 input_3_groups_0 = const()[name = string("input_3_groups_0"), val = int32(1)]; tensor var_831_to_fp16 = const()[name = string("op_831_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(43021568)))]; tensor input_3_cast_fp16 = conv(dilations = input_3_dilations_0, groups = input_3_groups_0, pad = input_3_pad_0, pad_type = input_3_pad_type_0, strides = input_3_strides_0, weight = var_831_to_fp16, x = var_826_cast_fp16_0)[name = string("input_3_cast_fp16")]; tensor var_842_cast_fp16 = silu(x = input_3_cast_fp16)[name = string("op_842_cast_fp16")]; string var_847_pad_type_0 = const()[name = string("op_847_pad_type_0"), val = string("valid")]; tensor var_847_strides_0 = const()[name = string("op_847_strides_0"), val = tensor([1, 1])]; tensor var_847_pad_0 = const()[name = string("op_847_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_847_dilations_0 = const()[name = string("op_847_dilations_0"), val = tensor([1, 1])]; int32 var_847_groups_0 = const()[name = string("op_847_groups_0"), val = int32(1)]; tensor var_830_to_fp16 = const()[name = string("op_830_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(51410240)))]; tensor var_847_cast_fp16 = conv(dilations = var_847_dilations_0, groups = var_847_groups_0, pad = var_847_pad_0, pad_type = var_847_pad_type_0, strides = var_847_strides_0, weight = var_830_to_fp16, x = var_826_cast_fp16_0)[name = string("op_847_cast_fp16")]; tensor x_33_cast_fp16 = mul(x = var_842_cast_fp16, y = var_847_cast_fp16)[name = string("x_33_cast_fp16")]; string hidden_states_11_pad_type_0 = const()[name = string("hidden_states_11_pad_type_0"), val = string("valid")]; tensor hidden_states_11_strides_0 = const()[name = string("hidden_states_11_strides_0"), val = tensor([1, 1])]; tensor hidden_states_11_pad_0 = const()[name = string("hidden_states_11_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_11_dilations_0 = const()[name = string("hidden_states_11_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_11_groups_0 = const()[name = string("hidden_states_11_groups_0"), val = int32(1)]; tensor var_829_to_fp16 = const()[name = string("op_829_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(59798912)))]; tensor hidden_states_11_cast_fp16 = conv(dilations = hidden_states_11_dilations_0, groups = hidden_states_11_groups_0, pad = hidden_states_11_pad_0, pad_type = hidden_states_11_pad_type_0, strides = hidden_states_11_strides_0, weight = var_829_to_fp16, x = x_33_cast_fp16)[name = string("hidden_states_11_cast_fp16")]; tensor x_35_cast_fp16 = add(x = x_25_cast_fp16, y = hidden_states_11_cast_fp16)[name = string("x_35_cast_fp16")]; int32 var_860 = const()[name = string("op_860"), val = int32(1)]; fp16 const_24_promoted_to_fp16 = const()[name = string("const_24_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_863_cast_fp16 = mul(x = x_35_cast_fp16, y = const_24_promoted_to_fp16)[name = string("op_863_cast_fp16")]; bool x_37_interleave_0 = const()[name = string("x_37_interleave_0"), val = bool(false)]; tensor x_37_cast_fp16 = concat(axis = var_860, interleave = x_37_interleave_0, values = (x_35_cast_fp16, var_863_cast_fp16))[name = string("x_37_cast_fp16")]; tensor out_25_axes_0 = const()[name = string("out_25_axes_0"), val = tensor([1])]; fp16 var_873_to_fp16 = const()[name = string("op_873_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_25_cast_fp16 = layer_norm(axes = out_25_axes_0, epsilon = var_873_to_fp16, x = x_37_cast_fp16)[name = string("out_25_cast_fp16")]; tensor layer_layers_2_input_layernorm_weight_to_fp16 = const()[name = string("layer_layers_2_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(68187584)))]; tensor out_27_cast_fp16 = mul(x = out_25_cast_fp16, y = layer_layers_2_input_layernorm_weight_to_fp16)[name = string("out_27_cast_fp16")]; tensor var_879_split_sizes_0 = const()[name = string("op_879_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_879_axis_0 = const()[name = string("op_879_axis_0"), val = int32(1)]; tensor var_879_cast_fp16_0, tensor var_879_cast_fp16_1 = split(axis = var_879_axis_0, split_sizes = var_879_split_sizes_0, x = out_27_cast_fp16)[name = string("op_879_cast_fp16")]; string query_states_9_pad_type_0 = const()[name = string("query_states_9_pad_type_0"), val = string("valid")]; tensor query_states_9_strides_0 = const()[name = string("query_states_9_strides_0"), val = tensor([1, 1])]; tensor query_states_9_pad_0 = const()[name = string("query_states_9_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_states_9_dilations_0 = const()[name = string("query_states_9_dilations_0"), val = tensor([1, 1])]; int32 query_states_9_groups_0 = const()[name = string("query_states_9_groups_0"), val = int32(1)]; tensor var_901_to_fp16 = const()[name = string("op_901_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(68191744)))]; tensor query_states_9_cast_fp16 = conv(dilations = query_states_9_dilations_0, groups = query_states_9_groups_0, pad = query_states_9_pad_0, pad_type = query_states_9_pad_type_0, strides = query_states_9_strides_0, weight = var_901_to_fp16, x = var_879_cast_fp16_0)[name = string("query_states_9_cast_fp16")]; string key_states_9_pad_type_0 = const()[name = string("key_states_9_pad_type_0"), val = string("valid")]; tensor key_states_9_strides_0 = const()[name = string("key_states_9_strides_0"), val = tensor([1, 1])]; tensor key_states_9_pad_0 = const()[name = string("key_states_9_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_states_9_dilations_0 = const()[name = string("key_states_9_dilations_0"), val = tensor([1, 1])]; int32 key_states_9_groups_0 = const()[name = string("key_states_9_groups_0"), val = int32(1)]; tensor var_912_to_fp16 = const()[name = string("op_912_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(70288960)))]; tensor key_states_9_cast_fp16 = conv(dilations = key_states_9_dilations_0, groups = key_states_9_groups_0, pad = key_states_9_pad_0, pad_type = key_states_9_pad_type_0, strides = key_states_9_strides_0, weight = var_912_to_fp16, x = var_879_cast_fp16_0)[name = string("key_states_9_cast_fp16")]; string value_states_9_pad_type_0 = const()[name = string("value_states_9_pad_type_0"), val = string("valid")]; tensor value_states_9_strides_0 = const()[name = string("value_states_9_strides_0"), val = tensor([1, 1])]; tensor value_states_9_pad_0 = const()[name = string("value_states_9_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_states_9_dilations_0 = const()[name = string("value_states_9_dilations_0"), val = tensor([1, 1])]; int32 value_states_9_groups_0 = const()[name = string("value_states_9_groups_0"), val = int32(1)]; tensor var_923_to_fp16 = const()[name = string("op_923_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(70551168)))]; tensor value_states_9_cast_fp16 = conv(dilations = value_states_9_dilations_0, groups = value_states_9_groups_0, pad = value_states_9_pad_0, pad_type = value_states_9_pad_type_0, strides = value_states_9_strides_0, weight = var_923_to_fp16, x = var_879_cast_fp16_0)[name = string("value_states_9_cast_fp16")]; tensor var_931 = const()[name = string("op_931"), val = tensor([1, 16, 64, 8])]; tensor embed_9_cast_fp16 = reshape(shape = var_931, x = query_states_9_cast_fp16)[name = string("embed_9_cast_fp16")]; tensor var_935 = const()[name = string("op_935"), val = tensor([1, 2, 64, 8])]; tensor var_936_cast_fp16 = reshape(shape = var_935, x = key_states_9_cast_fp16)[name = string("op_936_cast_fp16")]; tensor embed_11_perm_0 = const()[name = string("embed_11_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_942 = const()[name = string("op_942"), val = tensor([1, 2, 64, 8])]; tensor var_943_cast_fp16 = reshape(shape = var_942, x = value_states_9_cast_fp16)[name = string("op_943_cast_fp16")]; tensor value_states_11_perm_0 = const()[name = string("value_states_11_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_947_cast_fp16 = mul(x = embed_9_cast_fp16, y = cos_1_cast_fp16)[name = string("op_947_cast_fp16")]; tensor var_948_split_sizes_0 = const()[name = string("op_948_split_sizes_0"), val = tensor([32, 32])]; int32 var_948_axis_0 = const()[name = string("op_948_axis_0"), val = int32(-2)]; tensor var_948_cast_fp16_0, tensor var_948_cast_fp16_1 = split(axis = var_948_axis_0, split_sizes = var_948_split_sizes_0, x = embed_9_cast_fp16)[name = string("op_948_cast_fp16")]; fp16 const_27_promoted_to_fp16 = const()[name = string("const_27_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_950_cast_fp16 = mul(x = var_948_cast_fp16_1, y = const_27_promoted_to_fp16)[name = string("op_950_cast_fp16")]; int32 var_952 = const()[name = string("op_952"), val = int32(-2)]; bool var_953_interleave_0 = const()[name = string("op_953_interleave_0"), val = bool(false)]; tensor var_953_cast_fp16 = concat(axis = var_952, interleave = var_953_interleave_0, values = (var_950_cast_fp16, var_948_cast_fp16_0))[name = string("op_953_cast_fp16")]; tensor var_954_cast_fp16 = mul(x = var_953_cast_fp16, y = sin_1_cast_fp16)[name = string("op_954_cast_fp16")]; tensor query_states_11_cast_fp16 = add(x = var_947_cast_fp16, y = var_954_cast_fp16)[name = string("query_states_11_cast_fp16")]; tensor embed_11_cast_fp16 = transpose(perm = embed_11_perm_0, x = var_936_cast_fp16)[name = string("transpose_65")]; tensor var_957_cast_fp16 = mul(x = embed_11_cast_fp16, y = cos_cast_fp16)[name = string("op_957_cast_fp16")]; tensor var_958_split_sizes_0 = const()[name = string("op_958_split_sizes_0"), val = tensor([32, 32])]; int32 var_958_axis_0 = const()[name = string("op_958_axis_0"), val = int32(-1)]; tensor var_958_cast_fp16_0, tensor var_958_cast_fp16_1 = split(axis = var_958_axis_0, split_sizes = var_958_split_sizes_0, x = embed_11_cast_fp16)[name = string("op_958_cast_fp16")]; fp16 const_28_promoted_to_fp16 = const()[name = string("const_28_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_960_cast_fp16 = mul(x = var_958_cast_fp16_1, y = const_28_promoted_to_fp16)[name = string("op_960_cast_fp16")]; int32 var_962 = const()[name = string("op_962"), val = int32(-1)]; bool var_963_interleave_0 = const()[name = string("op_963_interleave_0"), val = bool(false)]; tensor var_963_cast_fp16 = concat(axis = var_962, interleave = var_963_interleave_0, values = (var_960_cast_fp16, var_958_cast_fp16_0))[name = string("op_963_cast_fp16")]; tensor var_964_cast_fp16 = mul(x = var_963_cast_fp16, y = sin_cast_fp16)[name = string("op_964_cast_fp16")]; tensor key_states_11_cast_fp16 = add(x = var_957_cast_fp16, y = var_964_cast_fp16)[name = string("key_states_11_cast_fp16")]; tensor expand_dims_21 = const()[name = string("expand_dims_21"), val = tensor([2])]; tensor expand_dims_22 = const()[name = string("expand_dims_22"), val = tensor([0])]; tensor expand_dims_24 = const()[name = string("expand_dims_24"), val = tensor([3])]; tensor concat_19_values3_0 = const()[name = string("concat_19_values3_0"), val = tensor([0])]; int32 concat_19_axis_0 = const()[name = string("concat_19_axis_0"), val = int32(0)]; bool concat_19_interleave_0 = const()[name = string("concat_19_interleave_0"), val = bool(false)]; tensor concat_19 = concat(axis = concat_19_axis_0, interleave = concat_19_interleave_0, values = (expand_dims_21, expand_dims_22, position_id, concat_19_values3_0))[name = string("concat_19")]; tensor concat_20_values1_0 = const()[name = string("concat_20_values1_0"), val = tensor([0])]; tensor concat_20_values3_0 = const()[name = string("concat_20_values3_0"), val = tensor([0])]; int32 concat_20_axis_0 = const()[name = string("concat_20_axis_0"), val = int32(0)]; bool concat_20_interleave_0 = const()[name = string("concat_20_interleave_0"), val = bool(false)]; tensor concat_20 = concat(axis = concat_20_axis_0, interleave = concat_20_interleave_0, values = (expand_dims_24, concat_20_values1_0, var_426, concat_20_values3_0))[name = string("concat_20")]; tensor key_cache_internal_tensor_assign_3_stride_0 = const()[name = string("key_cache_internal_tensor_assign_3_stride_0"), val = tensor([1, 1, 1, 1])]; tensor key_cache_internal_tensor_assign_3_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_3_begin_mask_0"), val = tensor([false, false, false, true])]; tensor key_cache_internal_tensor_assign_3_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_3_end_mask_0"), val = tensor([false, true, false, true])]; tensor key_cache_internal_tensor_assign_3_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_3_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor key_cache_internal_tensor_assign_3_cast_fp16 = slice_update(begin = concat_19, begin_mask = key_cache_internal_tensor_assign_3_begin_mask_0, end = concat_20, end_mask = key_cache_internal_tensor_assign_3_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_3_squeeze_mask_0, stride = key_cache_internal_tensor_assign_3_stride_0, update = key_states_11_cast_fp16, x = coreml_update_state_50)[name = string("key_cache_internal_tensor_assign_3_cast_fp16")]; write_state(data = key_cache_internal_tensor_assign_3_cast_fp16, input = key_cache)[name = string("coreml_update_state_218_write_state")]; tensor coreml_update_state_52 = read_state(input = key_cache)[name = string("coreml_update_state_218")]; tensor value_cache_internal_tensor_assign_3_stride_0 = const()[name = string("value_cache_internal_tensor_assign_3_stride_0"), val = tensor([1, 1, 1, 1])]; tensor value_cache_internal_tensor_assign_3_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_3_begin_mask_0"), val = tensor([false, false, false, true])]; tensor value_cache_internal_tensor_assign_3_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_3_end_mask_0"), val = tensor([false, true, false, true])]; tensor value_cache_internal_tensor_assign_3_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_3_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor value_states_11_cast_fp16 = transpose(perm = value_states_11_perm_0, x = var_943_cast_fp16)[name = string("transpose_64")]; tensor value_cache_internal_tensor_assign_3_cast_fp16 = slice_update(begin = concat_19, begin_mask = value_cache_internal_tensor_assign_3_begin_mask_0, end = concat_20, end_mask = value_cache_internal_tensor_assign_3_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_3_squeeze_mask_0, stride = value_cache_internal_tensor_assign_3_stride_0, update = value_states_11_cast_fp16, x = coreml_update_state_51)[name = string("value_cache_internal_tensor_assign_3_cast_fp16")]; write_state(data = value_cache_internal_tensor_assign_3_cast_fp16, input = value_cache)[name = string("coreml_update_state_219_write_state")]; tensor coreml_update_state_53 = read_state(input = value_cache)[name = string("coreml_update_state_219")]; tensor var_1007_begin_0 = const()[name = string("op_1007_begin_0"), val = tensor([2, 0, 0, 0])]; tensor var_1007_end_0 = const()[name = string("op_1007_end_0"), val = tensor([3, 2, 2048, 64])]; tensor var_1007_end_mask_0 = const()[name = string("op_1007_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_1007_cast_fp16 = slice_by_index(begin = var_1007_begin_0, end = var_1007_end_0, end_mask = var_1007_end_mask_0, x = coreml_update_state_52)[name = string("op_1007_cast_fp16")]; tensor tile_4 = const()[name = string("tile_4"), val = tensor([1, 1])]; int32 var_1010_axis_0 = const()[name = string("op_1010_axis_0"), val = int32(1)]; tensor var_1010_cast_fp16_0, tensor var_1010_cast_fp16_1 = split(axis = var_1010_axis_0, split_sizes = tile_4, x = var_1007_cast_fp16)[name = string("op_1010_cast_fp16")]; tensor var_1017_begin_0 = const()[name = string("op_1017_begin_0"), val = tensor([2, 0, 0, 0])]; tensor var_1017_end_0 = const()[name = string("op_1017_end_0"), val = tensor([3, 2, 2048, 64])]; tensor var_1017_end_mask_0 = const()[name = string("op_1017_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_1017_cast_fp16 = slice_by_index(begin = var_1017_begin_0, end = var_1017_end_0, end_mask = var_1017_end_mask_0, x = coreml_update_state_53)[name = string("op_1017_cast_fp16")]; tensor tile_5 = const()[name = string("tile_5"), val = tensor([1, 1])]; int32 var_1020_axis_0 = const()[name = string("op_1020_axis_0"), val = int32(1)]; tensor var_1020_cast_fp16_0, tensor var_1020_cast_fp16_1 = split(axis = var_1020_axis_0, split_sizes = tile_5, x = var_1017_cast_fp16)[name = string("op_1020_cast_fp16")]; tensor var_1023_split_sizes_0 = const()[name = string("op_1023_split_sizes_0"), val = tensor([8, 8])]; int32 var_1023_axis_0 = const()[name = string("op_1023_axis_0"), val = int32(1)]; tensor var_1023_cast_fp16_0, tensor var_1023_cast_fp16_1 = split(axis = var_1023_axis_0, split_sizes = var_1023_split_sizes_0, x = query_states_11_cast_fp16)[name = string("op_1023_cast_fp16")]; bool attn_weights_33_transpose_x_0 = const()[name = string("attn_weights_33_transpose_x_0"), val = bool(false)]; bool attn_weights_33_transpose_y_0 = const()[name = string("attn_weights_33_transpose_y_0"), val = bool(false)]; tensor attn_weights_33_cast_fp16 = matmul(transpose_x = attn_weights_33_transpose_x_0, transpose_y = attn_weights_33_transpose_y_0, x = var_1010_cast_fp16_0, y = var_1023_cast_fp16_0)[name = string("attn_weights_33_cast_fp16")]; fp16 _inversed_attn_weights_35_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_35_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_35_cast_fp16 = mul(x = attn_weights_33_cast_fp16, y = _inversed_attn_weights_35_y_0_to_fp16)[name = string("_inversed_attn_weights_35_cast_fp16")]; tensor attn_weights_37_cast_fp16 = add(x = _inversed_attn_weights_35_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_37_cast_fp16")]; int32 var_1030 = const()[name = string("op_1030"), val = int32(2)]; tensor attn_weights_39_cast_fp16 = softmax(axis = var_1030, x = attn_weights_37_cast_fp16)[name = string("attn_weights_39_cast_fp16")]; bool var_1036_transpose_x_1 = const()[name = string("op_1036_transpose_x_1"), val = bool(true)]; bool var_1036_transpose_y_1 = const()[name = string("op_1036_transpose_y_1"), val = bool(false)]; tensor var_1036_cast_fp16 = matmul(transpose_x = var_1036_transpose_x_1, transpose_y = var_1036_transpose_y_1, x = attn_weights_39_cast_fp16, y = var_1020_cast_fp16_0)[name = string("op_1036_cast_fp16")]; bool attn_weights_41_transpose_x_0 = const()[name = string("attn_weights_41_transpose_x_0"), val = bool(false)]; bool attn_weights_41_transpose_y_0 = const()[name = string("attn_weights_41_transpose_y_0"), val = bool(false)]; tensor attn_weights_41_cast_fp16 = matmul(transpose_x = attn_weights_41_transpose_x_0, transpose_y = attn_weights_41_transpose_y_0, x = var_1010_cast_fp16_1, y = var_1023_cast_fp16_1)[name = string("attn_weights_41_cast_fp16")]; fp16 _inversed_attn_weights_43_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_43_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_43_cast_fp16 = mul(x = attn_weights_41_cast_fp16, y = _inversed_attn_weights_43_y_0_to_fp16)[name = string("_inversed_attn_weights_43_cast_fp16")]; tensor attn_weights_45_cast_fp16 = add(x = _inversed_attn_weights_43_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_45_cast_fp16")]; int32 var_1042 = const()[name = string("op_1042"), val = int32(2)]; tensor attn_weights_47_cast_fp16 = softmax(axis = var_1042, x = attn_weights_45_cast_fp16)[name = string("attn_weights_47_cast_fp16")]; bool attn_output_13_transpose_x_1 = const()[name = string("attn_output_13_transpose_x_1"), val = bool(true)]; bool attn_output_13_transpose_y_1 = const()[name = string("attn_output_13_transpose_y_1"), val = bool(false)]; tensor attn_output_13_cast_fp16 = matmul(transpose_x = attn_output_13_transpose_x_1, transpose_y = attn_output_13_transpose_y_1, x = attn_weights_47_cast_fp16, y = var_1020_cast_fp16_1)[name = string("attn_output_13_cast_fp16")]; int32 var_1050 = const()[name = string("op_1050"), val = int32(1)]; bool attn_output_15_interleave_0 = const()[name = string("attn_output_15_interleave_0"), val = bool(false)]; tensor attn_output_15_cast_fp16 = concat(axis = var_1050, interleave = attn_output_15_interleave_0, values = (var_1036_cast_fp16, attn_output_13_cast_fp16))[name = string("attn_output_15_cast_fp16")]; tensor var_1054_perm_0 = const()[name = string("op_1054_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_1059 = const()[name = string("op_1059"), val = tensor([1, 1024, 1, 8])]; tensor var_1054_cast_fp16 = transpose(perm = var_1054_perm_0, x = attn_output_15_cast_fp16)[name = string("transpose_63")]; tensor x_41_cast_fp16 = reshape(shape = var_1059, x = var_1054_cast_fp16)[name = string("x_41_cast_fp16")]; string hidden_states_15_pad_type_0 = const()[name = string("hidden_states_15_pad_type_0"), val = string("valid")]; tensor hidden_states_15_strides_0 = const()[name = string("hidden_states_15_strides_0"), val = tensor([1, 1])]; tensor hidden_states_15_pad_0 = const()[name = string("hidden_states_15_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_15_dilations_0 = const()[name = string("hidden_states_15_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_15_groups_0 = const()[name = string("hidden_states_15_groups_0"), val = int32(1)]; tensor var_1066_to_fp16 = const()[name = string("op_1066_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(70813376)))]; tensor hidden_states_15_cast_fp16 = conv(dilations = hidden_states_15_dilations_0, groups = hidden_states_15_groups_0, pad = hidden_states_15_pad_0, pad_type = hidden_states_15_pad_type_0, strides = hidden_states_15_strides_0, weight = var_1066_to_fp16, x = x_41_cast_fp16)[name = string("hidden_states_15_cast_fp16")]; tensor x_43_cast_fp16 = add(x = x_35_cast_fp16, y = hidden_states_15_cast_fp16)[name = string("x_43_cast_fp16")]; int32 var_1078 = const()[name = string("op_1078"), val = int32(1)]; fp16 const_33_promoted_to_fp16 = const()[name = string("const_33_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_1081_cast_fp16 = mul(x = x_43_cast_fp16, y = const_33_promoted_to_fp16)[name = string("op_1081_cast_fp16")]; bool x_45_interleave_0 = const()[name = string("x_45_interleave_0"), val = bool(false)]; tensor x_45_cast_fp16 = concat(axis = var_1078, interleave = x_45_interleave_0, values = (x_43_cast_fp16, var_1081_cast_fp16))[name = string("x_45_cast_fp16")]; tensor out_31_axes_0 = const()[name = string("out_31_axes_0"), val = tensor([1])]; fp16 var_1091_to_fp16 = const()[name = string("op_1091_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_31_cast_fp16 = layer_norm(axes = out_31_axes_0, epsilon = var_1091_to_fp16, x = x_45_cast_fp16)[name = string("out_31_cast_fp16")]; tensor layer_layers_2_post_attention_layernorm_weight_to_fp16 = const()[name = string("layer_layers_2_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(72910592)))]; tensor out_33_cast_fp16 = mul(x = out_31_cast_fp16, y = layer_layers_2_post_attention_layernorm_weight_to_fp16)[name = string("out_33_cast_fp16")]; tensor var_1097_split_sizes_0 = const()[name = string("op_1097_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_1097_axis_0 = const()[name = string("op_1097_axis_0"), val = int32(1)]; tensor var_1097_cast_fp16_0, tensor var_1097_cast_fp16_1 = split(axis = var_1097_axis_0, split_sizes = var_1097_split_sizes_0, x = out_33_cast_fp16)[name = string("op_1097_cast_fp16")]; string input_5_pad_type_0 = const()[name = string("input_5_pad_type_0"), val = string("valid")]; tensor input_5_strides_0 = const()[name = string("input_5_strides_0"), val = tensor([1, 1])]; tensor input_5_pad_0 = const()[name = string("input_5_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_5_dilations_0 = const()[name = string("input_5_dilations_0"), val = tensor([1, 1])]; int32 input_5_groups_0 = const()[name = string("input_5_groups_0"), val = int32(1)]; tensor var_1102_to_fp16 = const()[name = string("op_1102_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(72914752)))]; tensor input_5_cast_fp16 = conv(dilations = input_5_dilations_0, groups = input_5_groups_0, pad = input_5_pad_0, pad_type = input_5_pad_type_0, strides = input_5_strides_0, weight = var_1102_to_fp16, x = var_1097_cast_fp16_0)[name = string("input_5_cast_fp16")]; tensor var_1113_cast_fp16 = silu(x = input_5_cast_fp16)[name = string("op_1113_cast_fp16")]; string var_1118_pad_type_0 = const()[name = string("op_1118_pad_type_0"), val = string("valid")]; tensor var_1118_strides_0 = const()[name = string("op_1118_strides_0"), val = tensor([1, 1])]; tensor var_1118_pad_0 = const()[name = string("op_1118_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_1118_dilations_0 = const()[name = string("op_1118_dilations_0"), val = tensor([1, 1])]; int32 var_1118_groups_0 = const()[name = string("op_1118_groups_0"), val = int32(1)]; tensor var_1101_to_fp16 = const()[name = string("op_1101_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(81303424)))]; tensor var_1118_cast_fp16 = conv(dilations = var_1118_dilations_0, groups = var_1118_groups_0, pad = var_1118_pad_0, pad_type = var_1118_pad_type_0, strides = var_1118_strides_0, weight = var_1101_to_fp16, x = var_1097_cast_fp16_0)[name = string("op_1118_cast_fp16")]; tensor x_51_cast_fp16 = mul(x = var_1113_cast_fp16, y = var_1118_cast_fp16)[name = string("x_51_cast_fp16")]; string hidden_states_17_pad_type_0 = const()[name = string("hidden_states_17_pad_type_0"), val = string("valid")]; tensor hidden_states_17_strides_0 = const()[name = string("hidden_states_17_strides_0"), val = tensor([1, 1])]; tensor hidden_states_17_pad_0 = const()[name = string("hidden_states_17_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_17_dilations_0 = const()[name = string("hidden_states_17_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_17_groups_0 = const()[name = string("hidden_states_17_groups_0"), val = int32(1)]; tensor var_1100_to_fp16 = const()[name = string("op_1100_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(89692096)))]; tensor hidden_states_17_cast_fp16 = conv(dilations = hidden_states_17_dilations_0, groups = hidden_states_17_groups_0, pad = hidden_states_17_pad_0, pad_type = hidden_states_17_pad_type_0, strides = hidden_states_17_strides_0, weight = var_1100_to_fp16, x = x_51_cast_fp16)[name = string("hidden_states_17_cast_fp16")]; tensor x_53_cast_fp16 = add(x = x_43_cast_fp16, y = hidden_states_17_cast_fp16)[name = string("x_53_cast_fp16")]; int32 var_1131 = const()[name = string("op_1131"), val = int32(1)]; fp16 const_34_promoted_to_fp16 = const()[name = string("const_34_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_1134_cast_fp16 = mul(x = x_53_cast_fp16, y = const_34_promoted_to_fp16)[name = string("op_1134_cast_fp16")]; bool x_55_interleave_0 = const()[name = string("x_55_interleave_0"), val = bool(false)]; tensor x_55_cast_fp16 = concat(axis = var_1131, interleave = x_55_interleave_0, values = (x_53_cast_fp16, var_1134_cast_fp16))[name = string("x_55_cast_fp16")]; tensor out_37_axes_0 = const()[name = string("out_37_axes_0"), val = tensor([1])]; fp16 var_1144_to_fp16 = const()[name = string("op_1144_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_37_cast_fp16 = layer_norm(axes = out_37_axes_0, epsilon = var_1144_to_fp16, x = x_55_cast_fp16)[name = string("out_37_cast_fp16")]; tensor layer_layers_3_input_layernorm_weight_to_fp16 = const()[name = string("layer_layers_3_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(98080768)))]; tensor out_39_cast_fp16 = mul(x = out_37_cast_fp16, y = layer_layers_3_input_layernorm_weight_to_fp16)[name = string("out_39_cast_fp16")]; tensor var_1150_split_sizes_0 = const()[name = string("op_1150_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_1150_axis_0 = const()[name = string("op_1150_axis_0"), val = int32(1)]; tensor var_1150_cast_fp16_0, tensor var_1150_cast_fp16_1 = split(axis = var_1150_axis_0, split_sizes = var_1150_split_sizes_0, x = out_39_cast_fp16)[name = string("op_1150_cast_fp16")]; string query_states_13_pad_type_0 = const()[name = string("query_states_13_pad_type_0"), val = string("valid")]; tensor query_states_13_strides_0 = const()[name = string("query_states_13_strides_0"), val = tensor([1, 1])]; tensor query_states_13_pad_0 = const()[name = string("query_states_13_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_states_13_dilations_0 = const()[name = string("query_states_13_dilations_0"), val = tensor([1, 1])]; int32 query_states_13_groups_0 = const()[name = string("query_states_13_groups_0"), val = int32(1)]; tensor var_1172_to_fp16 = const()[name = string("op_1172_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(98084928)))]; tensor query_states_13_cast_fp16 = conv(dilations = query_states_13_dilations_0, groups = query_states_13_groups_0, pad = query_states_13_pad_0, pad_type = query_states_13_pad_type_0, strides = query_states_13_strides_0, weight = var_1172_to_fp16, x = var_1150_cast_fp16_0)[name = string("query_states_13_cast_fp16")]; string key_states_13_pad_type_0 = const()[name = string("key_states_13_pad_type_0"), val = string("valid")]; tensor key_states_13_strides_0 = const()[name = string("key_states_13_strides_0"), val = tensor([1, 1])]; tensor key_states_13_pad_0 = const()[name = string("key_states_13_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_states_13_dilations_0 = const()[name = string("key_states_13_dilations_0"), val = tensor([1, 1])]; int32 key_states_13_groups_0 = const()[name = string("key_states_13_groups_0"), val = int32(1)]; tensor var_1183_to_fp16 = const()[name = string("op_1183_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(100182144)))]; tensor key_states_13_cast_fp16 = conv(dilations = key_states_13_dilations_0, groups = key_states_13_groups_0, pad = key_states_13_pad_0, pad_type = key_states_13_pad_type_0, strides = key_states_13_strides_0, weight = var_1183_to_fp16, x = var_1150_cast_fp16_0)[name = string("key_states_13_cast_fp16")]; string value_states_13_pad_type_0 = const()[name = string("value_states_13_pad_type_0"), val = string("valid")]; tensor value_states_13_strides_0 = const()[name = string("value_states_13_strides_0"), val = tensor([1, 1])]; tensor value_states_13_pad_0 = const()[name = string("value_states_13_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_states_13_dilations_0 = const()[name = string("value_states_13_dilations_0"), val = tensor([1, 1])]; int32 value_states_13_groups_0 = const()[name = string("value_states_13_groups_0"), val = int32(1)]; tensor var_1194_to_fp16 = const()[name = string("op_1194_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(100444352)))]; tensor value_states_13_cast_fp16 = conv(dilations = value_states_13_dilations_0, groups = value_states_13_groups_0, pad = value_states_13_pad_0, pad_type = value_states_13_pad_type_0, strides = value_states_13_strides_0, weight = var_1194_to_fp16, x = var_1150_cast_fp16_0)[name = string("value_states_13_cast_fp16")]; tensor var_1202 = const()[name = string("op_1202"), val = tensor([1, 16, 64, 8])]; tensor embed_13_cast_fp16 = reshape(shape = var_1202, x = query_states_13_cast_fp16)[name = string("embed_13_cast_fp16")]; tensor var_1206 = const()[name = string("op_1206"), val = tensor([1, 2, 64, 8])]; tensor var_1207_cast_fp16 = reshape(shape = var_1206, x = key_states_13_cast_fp16)[name = string("op_1207_cast_fp16")]; tensor embed_15_perm_0 = const()[name = string("embed_15_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_1213 = const()[name = string("op_1213"), val = tensor([1, 2, 64, 8])]; tensor var_1214_cast_fp16 = reshape(shape = var_1213, x = value_states_13_cast_fp16)[name = string("op_1214_cast_fp16")]; tensor value_states_15_perm_0 = const()[name = string("value_states_15_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_1218_cast_fp16 = mul(x = embed_13_cast_fp16, y = cos_1_cast_fp16)[name = string("op_1218_cast_fp16")]; tensor var_1219_split_sizes_0 = const()[name = string("op_1219_split_sizes_0"), val = tensor([32, 32])]; int32 var_1219_axis_0 = const()[name = string("op_1219_axis_0"), val = int32(-2)]; tensor var_1219_cast_fp16_0, tensor var_1219_cast_fp16_1 = split(axis = var_1219_axis_0, split_sizes = var_1219_split_sizes_0, x = embed_13_cast_fp16)[name = string("op_1219_cast_fp16")]; fp16 const_37_promoted_to_fp16 = const()[name = string("const_37_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_1221_cast_fp16 = mul(x = var_1219_cast_fp16_1, y = const_37_promoted_to_fp16)[name = string("op_1221_cast_fp16")]; int32 var_1223 = const()[name = string("op_1223"), val = int32(-2)]; bool var_1224_interleave_0 = const()[name = string("op_1224_interleave_0"), val = bool(false)]; tensor var_1224_cast_fp16 = concat(axis = var_1223, interleave = var_1224_interleave_0, values = (var_1221_cast_fp16, var_1219_cast_fp16_0))[name = string("op_1224_cast_fp16")]; tensor var_1225_cast_fp16 = mul(x = var_1224_cast_fp16, y = sin_1_cast_fp16)[name = string("op_1225_cast_fp16")]; tensor query_states_15_cast_fp16 = add(x = var_1218_cast_fp16, y = var_1225_cast_fp16)[name = string("query_states_15_cast_fp16")]; tensor embed_15_cast_fp16 = transpose(perm = embed_15_perm_0, x = var_1207_cast_fp16)[name = string("transpose_62")]; tensor var_1228_cast_fp16 = mul(x = embed_15_cast_fp16, y = cos_cast_fp16)[name = string("op_1228_cast_fp16")]; tensor var_1229_split_sizes_0 = const()[name = string("op_1229_split_sizes_0"), val = tensor([32, 32])]; int32 var_1229_axis_0 = const()[name = string("op_1229_axis_0"), val = int32(-1)]; tensor var_1229_cast_fp16_0, tensor var_1229_cast_fp16_1 = split(axis = var_1229_axis_0, split_sizes = var_1229_split_sizes_0, x = embed_15_cast_fp16)[name = string("op_1229_cast_fp16")]; fp16 const_38_promoted_to_fp16 = const()[name = string("const_38_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_1231_cast_fp16 = mul(x = var_1229_cast_fp16_1, y = const_38_promoted_to_fp16)[name = string("op_1231_cast_fp16")]; int32 var_1233 = const()[name = string("op_1233"), val = int32(-1)]; bool var_1234_interleave_0 = const()[name = string("op_1234_interleave_0"), val = bool(false)]; tensor var_1234_cast_fp16 = concat(axis = var_1233, interleave = var_1234_interleave_0, values = (var_1231_cast_fp16, var_1229_cast_fp16_0))[name = string("op_1234_cast_fp16")]; tensor var_1235_cast_fp16 = mul(x = var_1234_cast_fp16, y = sin_cast_fp16)[name = string("op_1235_cast_fp16")]; tensor key_states_15_cast_fp16 = add(x = var_1228_cast_fp16, y = var_1235_cast_fp16)[name = string("key_states_15_cast_fp16")]; tensor expand_dims_31 = const()[name = string("expand_dims_31"), val = tensor([3])]; tensor expand_dims_32 = const()[name = string("expand_dims_32"), val = tensor([0])]; tensor expand_dims_34 = const()[name = string("expand_dims_34"), val = tensor([4])]; tensor concat_27_values3_0 = const()[name = string("concat_27_values3_0"), val = tensor([0])]; int32 concat_27_axis_0 = const()[name = string("concat_27_axis_0"), val = int32(0)]; bool concat_27_interleave_0 = const()[name = string("concat_27_interleave_0"), val = bool(false)]; tensor concat_27 = concat(axis = concat_27_axis_0, interleave = concat_27_interleave_0, values = (expand_dims_31, expand_dims_32, position_id, concat_27_values3_0))[name = string("concat_27")]; tensor concat_28_values1_0 = const()[name = string("concat_28_values1_0"), val = tensor([0])]; tensor concat_28_values3_0 = const()[name = string("concat_28_values3_0"), val = tensor([0])]; int32 concat_28_axis_0 = const()[name = string("concat_28_axis_0"), val = int32(0)]; bool concat_28_interleave_0 = const()[name = string("concat_28_interleave_0"), val = bool(false)]; tensor concat_28 = concat(axis = concat_28_axis_0, interleave = concat_28_interleave_0, values = (expand_dims_34, concat_28_values1_0, var_426, concat_28_values3_0))[name = string("concat_28")]; tensor key_cache_internal_tensor_assign_4_stride_0 = const()[name = string("key_cache_internal_tensor_assign_4_stride_0"), val = tensor([1, 1, 1, 1])]; tensor key_cache_internal_tensor_assign_4_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_4_begin_mask_0"), val = tensor([false, false, false, true])]; tensor key_cache_internal_tensor_assign_4_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_4_end_mask_0"), val = tensor([false, true, false, true])]; tensor key_cache_internal_tensor_assign_4_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_4_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor key_cache_internal_tensor_assign_4_cast_fp16 = slice_update(begin = concat_27, begin_mask = key_cache_internal_tensor_assign_4_begin_mask_0, end = concat_28, end_mask = key_cache_internal_tensor_assign_4_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_4_squeeze_mask_0, stride = key_cache_internal_tensor_assign_4_stride_0, update = key_states_15_cast_fp16, x = coreml_update_state_52)[name = string("key_cache_internal_tensor_assign_4_cast_fp16")]; write_state(data = key_cache_internal_tensor_assign_4_cast_fp16, input = key_cache)[name = string("coreml_update_state_220_write_state")]; tensor coreml_update_state_54 = read_state(input = key_cache)[name = string("coreml_update_state_220")]; tensor value_cache_internal_tensor_assign_4_stride_0 = const()[name = string("value_cache_internal_tensor_assign_4_stride_0"), val = tensor([1, 1, 1, 1])]; tensor value_cache_internal_tensor_assign_4_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_4_begin_mask_0"), val = tensor([false, false, false, true])]; tensor value_cache_internal_tensor_assign_4_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_4_end_mask_0"), val = tensor([false, true, false, true])]; tensor value_cache_internal_tensor_assign_4_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_4_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor value_states_15_cast_fp16 = transpose(perm = value_states_15_perm_0, x = var_1214_cast_fp16)[name = string("transpose_61")]; tensor value_cache_internal_tensor_assign_4_cast_fp16 = slice_update(begin = concat_27, begin_mask = value_cache_internal_tensor_assign_4_begin_mask_0, end = concat_28, end_mask = value_cache_internal_tensor_assign_4_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_4_squeeze_mask_0, stride = value_cache_internal_tensor_assign_4_stride_0, update = value_states_15_cast_fp16, x = coreml_update_state_53)[name = string("value_cache_internal_tensor_assign_4_cast_fp16")]; write_state(data = value_cache_internal_tensor_assign_4_cast_fp16, input = value_cache)[name = string("coreml_update_state_221_write_state")]; tensor coreml_update_state_55 = read_state(input = value_cache)[name = string("coreml_update_state_221")]; tensor var_1278_begin_0 = const()[name = string("op_1278_begin_0"), val = tensor([3, 0, 0, 0])]; tensor var_1278_end_0 = const()[name = string("op_1278_end_0"), val = tensor([4, 2, 2048, 64])]; tensor var_1278_end_mask_0 = const()[name = string("op_1278_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_1278_cast_fp16 = slice_by_index(begin = var_1278_begin_0, end = var_1278_end_0, end_mask = var_1278_end_mask_0, x = coreml_update_state_54)[name = string("op_1278_cast_fp16")]; tensor tile_6 = const()[name = string("tile_6"), val = tensor([1, 1])]; int32 var_1281_axis_0 = const()[name = string("op_1281_axis_0"), val = int32(1)]; tensor var_1281_cast_fp16_0, tensor var_1281_cast_fp16_1 = split(axis = var_1281_axis_0, split_sizes = tile_6, x = var_1278_cast_fp16)[name = string("op_1281_cast_fp16")]; tensor var_1288_begin_0 = const()[name = string("op_1288_begin_0"), val = tensor([3, 0, 0, 0])]; tensor var_1288_end_0 = const()[name = string("op_1288_end_0"), val = tensor([4, 2, 2048, 64])]; tensor var_1288_end_mask_0 = const()[name = string("op_1288_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_1288_cast_fp16 = slice_by_index(begin = var_1288_begin_0, end = var_1288_end_0, end_mask = var_1288_end_mask_0, x = coreml_update_state_55)[name = string("op_1288_cast_fp16")]; tensor tile_7 = const()[name = string("tile_7"), val = tensor([1, 1])]; int32 var_1291_axis_0 = const()[name = string("op_1291_axis_0"), val = int32(1)]; tensor var_1291_cast_fp16_0, tensor var_1291_cast_fp16_1 = split(axis = var_1291_axis_0, split_sizes = tile_7, x = var_1288_cast_fp16)[name = string("op_1291_cast_fp16")]; tensor var_1294_split_sizes_0 = const()[name = string("op_1294_split_sizes_0"), val = tensor([8, 8])]; int32 var_1294_axis_0 = const()[name = string("op_1294_axis_0"), val = int32(1)]; tensor var_1294_cast_fp16_0, tensor var_1294_cast_fp16_1 = split(axis = var_1294_axis_0, split_sizes = var_1294_split_sizes_0, x = query_states_15_cast_fp16)[name = string("op_1294_cast_fp16")]; bool attn_weights_49_transpose_x_0 = const()[name = string("attn_weights_49_transpose_x_0"), val = bool(false)]; bool attn_weights_49_transpose_y_0 = const()[name = string("attn_weights_49_transpose_y_0"), val = bool(false)]; tensor attn_weights_49_cast_fp16 = matmul(transpose_x = attn_weights_49_transpose_x_0, transpose_y = attn_weights_49_transpose_y_0, x = var_1281_cast_fp16_0, y = var_1294_cast_fp16_0)[name = string("attn_weights_49_cast_fp16")]; fp16 _inversed_attn_weights_51_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_51_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_51_cast_fp16 = mul(x = attn_weights_49_cast_fp16, y = _inversed_attn_weights_51_y_0_to_fp16)[name = string("_inversed_attn_weights_51_cast_fp16")]; tensor attn_weights_53_cast_fp16 = add(x = _inversed_attn_weights_51_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_53_cast_fp16")]; int32 var_1301 = const()[name = string("op_1301"), val = int32(2)]; tensor attn_weights_55_cast_fp16 = softmax(axis = var_1301, x = attn_weights_53_cast_fp16)[name = string("attn_weights_55_cast_fp16")]; bool var_1307_transpose_x_1 = const()[name = string("op_1307_transpose_x_1"), val = bool(true)]; bool var_1307_transpose_y_1 = const()[name = string("op_1307_transpose_y_1"), val = bool(false)]; tensor var_1307_cast_fp16 = matmul(transpose_x = var_1307_transpose_x_1, transpose_y = var_1307_transpose_y_1, x = attn_weights_55_cast_fp16, y = var_1291_cast_fp16_0)[name = string("op_1307_cast_fp16")]; bool attn_weights_57_transpose_x_0 = const()[name = string("attn_weights_57_transpose_x_0"), val = bool(false)]; bool attn_weights_57_transpose_y_0 = const()[name = string("attn_weights_57_transpose_y_0"), val = bool(false)]; tensor attn_weights_57_cast_fp16 = matmul(transpose_x = attn_weights_57_transpose_x_0, transpose_y = attn_weights_57_transpose_y_0, x = var_1281_cast_fp16_1, y = var_1294_cast_fp16_1)[name = string("attn_weights_57_cast_fp16")]; fp16 _inversed_attn_weights_59_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_59_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_59_cast_fp16 = mul(x = attn_weights_57_cast_fp16, y = _inversed_attn_weights_59_y_0_to_fp16)[name = string("_inversed_attn_weights_59_cast_fp16")]; tensor attn_weights_61_cast_fp16 = add(x = _inversed_attn_weights_59_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_61_cast_fp16")]; int32 var_1313 = const()[name = string("op_1313"), val = int32(2)]; tensor attn_weights_63_cast_fp16 = softmax(axis = var_1313, x = attn_weights_61_cast_fp16)[name = string("attn_weights_63_cast_fp16")]; bool attn_output_19_transpose_x_1 = const()[name = string("attn_output_19_transpose_x_1"), val = bool(true)]; bool attn_output_19_transpose_y_1 = const()[name = string("attn_output_19_transpose_y_1"), val = bool(false)]; tensor attn_output_19_cast_fp16 = matmul(transpose_x = attn_output_19_transpose_x_1, transpose_y = attn_output_19_transpose_y_1, x = attn_weights_63_cast_fp16, y = var_1291_cast_fp16_1)[name = string("attn_output_19_cast_fp16")]; int32 var_1321 = const()[name = string("op_1321"), val = int32(1)]; bool attn_output_21_interleave_0 = const()[name = string("attn_output_21_interleave_0"), val = bool(false)]; tensor attn_output_21_cast_fp16 = concat(axis = var_1321, interleave = attn_output_21_interleave_0, values = (var_1307_cast_fp16, attn_output_19_cast_fp16))[name = string("attn_output_21_cast_fp16")]; tensor var_1325_perm_0 = const()[name = string("op_1325_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_1330 = const()[name = string("op_1330"), val = tensor([1, 1024, 1, 8])]; tensor var_1325_cast_fp16 = transpose(perm = var_1325_perm_0, x = attn_output_21_cast_fp16)[name = string("transpose_60")]; tensor x_59_cast_fp16 = reshape(shape = var_1330, x = var_1325_cast_fp16)[name = string("x_59_cast_fp16")]; string hidden_states_21_pad_type_0 = const()[name = string("hidden_states_21_pad_type_0"), val = string("valid")]; tensor hidden_states_21_strides_0 = const()[name = string("hidden_states_21_strides_0"), val = tensor([1, 1])]; tensor hidden_states_21_pad_0 = const()[name = string("hidden_states_21_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_21_dilations_0 = const()[name = string("hidden_states_21_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_21_groups_0 = const()[name = string("hidden_states_21_groups_0"), val = int32(1)]; tensor var_1337_to_fp16 = const()[name = string("op_1337_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(100706560)))]; tensor hidden_states_21_cast_fp16 = conv(dilations = hidden_states_21_dilations_0, groups = hidden_states_21_groups_0, pad = hidden_states_21_pad_0, pad_type = hidden_states_21_pad_type_0, strides = hidden_states_21_strides_0, weight = var_1337_to_fp16, x = x_59_cast_fp16)[name = string("hidden_states_21_cast_fp16")]; tensor x_61_cast_fp16 = add(x = x_53_cast_fp16, y = hidden_states_21_cast_fp16)[name = string("x_61_cast_fp16")]; int32 var_1349 = const()[name = string("op_1349"), val = int32(1)]; fp16 const_43_promoted_to_fp16 = const()[name = string("const_43_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_1352_cast_fp16 = mul(x = x_61_cast_fp16, y = const_43_promoted_to_fp16)[name = string("op_1352_cast_fp16")]; bool x_63_interleave_0 = const()[name = string("x_63_interleave_0"), val = bool(false)]; tensor x_63_cast_fp16 = concat(axis = var_1349, interleave = x_63_interleave_0, values = (x_61_cast_fp16, var_1352_cast_fp16))[name = string("x_63_cast_fp16")]; tensor out_43_axes_0 = const()[name = string("out_43_axes_0"), val = tensor([1])]; fp16 var_1362_to_fp16 = const()[name = string("op_1362_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_43_cast_fp16 = layer_norm(axes = out_43_axes_0, epsilon = var_1362_to_fp16, x = x_63_cast_fp16)[name = string("out_43_cast_fp16")]; tensor layer_layers_3_post_attention_layernorm_weight_to_fp16 = const()[name = string("layer_layers_3_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(102803776)))]; tensor out_45_cast_fp16 = mul(x = out_43_cast_fp16, y = layer_layers_3_post_attention_layernorm_weight_to_fp16)[name = string("out_45_cast_fp16")]; tensor var_1368_split_sizes_0 = const()[name = string("op_1368_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_1368_axis_0 = const()[name = string("op_1368_axis_0"), val = int32(1)]; tensor var_1368_cast_fp16_0, tensor var_1368_cast_fp16_1 = split(axis = var_1368_axis_0, split_sizes = var_1368_split_sizes_0, x = out_45_cast_fp16)[name = string("op_1368_cast_fp16")]; string input_7_pad_type_0 = const()[name = string("input_7_pad_type_0"), val = string("valid")]; tensor input_7_strides_0 = const()[name = string("input_7_strides_0"), val = tensor([1, 1])]; tensor input_7_pad_0 = const()[name = string("input_7_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_7_dilations_0 = const()[name = string("input_7_dilations_0"), val = tensor([1, 1])]; int32 input_7_groups_0 = const()[name = string("input_7_groups_0"), val = int32(1)]; tensor var_1373_to_fp16 = const()[name = string("op_1373_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(102807936)))]; tensor input_7_cast_fp16 = conv(dilations = input_7_dilations_0, groups = input_7_groups_0, pad = input_7_pad_0, pad_type = input_7_pad_type_0, strides = input_7_strides_0, weight = var_1373_to_fp16, x = var_1368_cast_fp16_0)[name = string("input_7_cast_fp16")]; tensor var_1384_cast_fp16 = silu(x = input_7_cast_fp16)[name = string("op_1384_cast_fp16")]; string var_1389_pad_type_0 = const()[name = string("op_1389_pad_type_0"), val = string("valid")]; tensor var_1389_strides_0 = const()[name = string("op_1389_strides_0"), val = tensor([1, 1])]; tensor var_1389_pad_0 = const()[name = string("op_1389_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_1389_dilations_0 = const()[name = string("op_1389_dilations_0"), val = tensor([1, 1])]; int32 var_1389_groups_0 = const()[name = string("op_1389_groups_0"), val = int32(1)]; tensor var_1372_to_fp16 = const()[name = string("op_1372_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(111196608)))]; tensor var_1389_cast_fp16 = conv(dilations = var_1389_dilations_0, groups = var_1389_groups_0, pad = var_1389_pad_0, pad_type = var_1389_pad_type_0, strides = var_1389_strides_0, weight = var_1372_to_fp16, x = var_1368_cast_fp16_0)[name = string("op_1389_cast_fp16")]; tensor x_69_cast_fp16 = mul(x = var_1384_cast_fp16, y = var_1389_cast_fp16)[name = string("x_69_cast_fp16")]; string hidden_states_23_pad_type_0 = const()[name = string("hidden_states_23_pad_type_0"), val = string("valid")]; tensor hidden_states_23_strides_0 = const()[name = string("hidden_states_23_strides_0"), val = tensor([1, 1])]; tensor hidden_states_23_pad_0 = const()[name = string("hidden_states_23_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_23_dilations_0 = const()[name = string("hidden_states_23_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_23_groups_0 = const()[name = string("hidden_states_23_groups_0"), val = int32(1)]; tensor var_1371_to_fp16 = const()[name = string("op_1371_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(119585280)))]; tensor hidden_states_23_cast_fp16 = conv(dilations = hidden_states_23_dilations_0, groups = hidden_states_23_groups_0, pad = hidden_states_23_pad_0, pad_type = hidden_states_23_pad_type_0, strides = hidden_states_23_strides_0, weight = var_1371_to_fp16, x = x_69_cast_fp16)[name = string("hidden_states_23_cast_fp16")]; tensor x_71_cast_fp16 = add(x = x_61_cast_fp16, y = hidden_states_23_cast_fp16)[name = string("x_71_cast_fp16")]; int32 var_1402 = const()[name = string("op_1402"), val = int32(1)]; fp16 const_44_promoted_to_fp16 = const()[name = string("const_44_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_1405_cast_fp16 = mul(x = x_71_cast_fp16, y = const_44_promoted_to_fp16)[name = string("op_1405_cast_fp16")]; bool x_73_interleave_0 = const()[name = string("x_73_interleave_0"), val = bool(false)]; tensor x_73_cast_fp16 = concat(axis = var_1402, interleave = x_73_interleave_0, values = (x_71_cast_fp16, var_1405_cast_fp16))[name = string("x_73_cast_fp16")]; tensor out_49_axes_0 = const()[name = string("out_49_axes_0"), val = tensor([1])]; fp16 var_1415_to_fp16 = const()[name = string("op_1415_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_49_cast_fp16 = layer_norm(axes = out_49_axes_0, epsilon = var_1415_to_fp16, x = x_73_cast_fp16)[name = string("out_49_cast_fp16")]; tensor layer_layers_4_input_layernorm_weight_to_fp16 = const()[name = string("layer_layers_4_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(127973952)))]; tensor out_51_cast_fp16 = mul(x = out_49_cast_fp16, y = layer_layers_4_input_layernorm_weight_to_fp16)[name = string("out_51_cast_fp16")]; tensor var_1421_split_sizes_0 = const()[name = string("op_1421_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_1421_axis_0 = const()[name = string("op_1421_axis_0"), val = int32(1)]; tensor var_1421_cast_fp16_0, tensor var_1421_cast_fp16_1 = split(axis = var_1421_axis_0, split_sizes = var_1421_split_sizes_0, x = out_51_cast_fp16)[name = string("op_1421_cast_fp16")]; string query_states_17_pad_type_0 = const()[name = string("query_states_17_pad_type_0"), val = string("valid")]; tensor query_states_17_strides_0 = const()[name = string("query_states_17_strides_0"), val = tensor([1, 1])]; tensor query_states_17_pad_0 = const()[name = string("query_states_17_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_states_17_dilations_0 = const()[name = string("query_states_17_dilations_0"), val = tensor([1, 1])]; int32 query_states_17_groups_0 = const()[name = string("query_states_17_groups_0"), val = int32(1)]; tensor var_1443_to_fp16 = const()[name = string("op_1443_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(127978112)))]; tensor query_states_17_cast_fp16 = conv(dilations = query_states_17_dilations_0, groups = query_states_17_groups_0, pad = query_states_17_pad_0, pad_type = query_states_17_pad_type_0, strides = query_states_17_strides_0, weight = var_1443_to_fp16, x = var_1421_cast_fp16_0)[name = string("query_states_17_cast_fp16")]; string key_states_17_pad_type_0 = const()[name = string("key_states_17_pad_type_0"), val = string("valid")]; tensor key_states_17_strides_0 = const()[name = string("key_states_17_strides_0"), val = tensor([1, 1])]; tensor key_states_17_pad_0 = const()[name = string("key_states_17_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_states_17_dilations_0 = const()[name = string("key_states_17_dilations_0"), val = tensor([1, 1])]; int32 key_states_17_groups_0 = const()[name = string("key_states_17_groups_0"), val = int32(1)]; tensor var_1454_to_fp16 = const()[name = string("op_1454_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(130075328)))]; tensor key_states_17_cast_fp16 = conv(dilations = key_states_17_dilations_0, groups = key_states_17_groups_0, pad = key_states_17_pad_0, pad_type = key_states_17_pad_type_0, strides = key_states_17_strides_0, weight = var_1454_to_fp16, x = var_1421_cast_fp16_0)[name = string("key_states_17_cast_fp16")]; string value_states_17_pad_type_0 = const()[name = string("value_states_17_pad_type_0"), val = string("valid")]; tensor value_states_17_strides_0 = const()[name = string("value_states_17_strides_0"), val = tensor([1, 1])]; tensor value_states_17_pad_0 = const()[name = string("value_states_17_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_states_17_dilations_0 = const()[name = string("value_states_17_dilations_0"), val = tensor([1, 1])]; int32 value_states_17_groups_0 = const()[name = string("value_states_17_groups_0"), val = int32(1)]; tensor var_1465_to_fp16 = const()[name = string("op_1465_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(130337536)))]; tensor value_states_17_cast_fp16 = conv(dilations = value_states_17_dilations_0, groups = value_states_17_groups_0, pad = value_states_17_pad_0, pad_type = value_states_17_pad_type_0, strides = value_states_17_strides_0, weight = var_1465_to_fp16, x = var_1421_cast_fp16_0)[name = string("value_states_17_cast_fp16")]; tensor var_1473 = const()[name = string("op_1473"), val = tensor([1, 16, 64, 8])]; tensor embed_17_cast_fp16 = reshape(shape = var_1473, x = query_states_17_cast_fp16)[name = string("embed_17_cast_fp16")]; tensor var_1477 = const()[name = string("op_1477"), val = tensor([1, 2, 64, 8])]; tensor var_1478_cast_fp16 = reshape(shape = var_1477, x = key_states_17_cast_fp16)[name = string("op_1478_cast_fp16")]; tensor embed_19_perm_0 = const()[name = string("embed_19_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_1484 = const()[name = string("op_1484"), val = tensor([1, 2, 64, 8])]; tensor var_1485_cast_fp16 = reshape(shape = var_1484, x = value_states_17_cast_fp16)[name = string("op_1485_cast_fp16")]; tensor value_states_19_perm_0 = const()[name = string("value_states_19_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_1489_cast_fp16 = mul(x = embed_17_cast_fp16, y = cos_1_cast_fp16)[name = string("op_1489_cast_fp16")]; tensor var_1490_split_sizes_0 = const()[name = string("op_1490_split_sizes_0"), val = tensor([32, 32])]; int32 var_1490_axis_0 = const()[name = string("op_1490_axis_0"), val = int32(-2)]; tensor var_1490_cast_fp16_0, tensor var_1490_cast_fp16_1 = split(axis = var_1490_axis_0, split_sizes = var_1490_split_sizes_0, x = embed_17_cast_fp16)[name = string("op_1490_cast_fp16")]; fp16 const_47_promoted_to_fp16 = const()[name = string("const_47_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_1492_cast_fp16 = mul(x = var_1490_cast_fp16_1, y = const_47_promoted_to_fp16)[name = string("op_1492_cast_fp16")]; int32 var_1494 = const()[name = string("op_1494"), val = int32(-2)]; bool var_1495_interleave_0 = const()[name = string("op_1495_interleave_0"), val = bool(false)]; tensor var_1495_cast_fp16 = concat(axis = var_1494, interleave = var_1495_interleave_0, values = (var_1492_cast_fp16, var_1490_cast_fp16_0))[name = string("op_1495_cast_fp16")]; tensor var_1496_cast_fp16 = mul(x = var_1495_cast_fp16, y = sin_1_cast_fp16)[name = string("op_1496_cast_fp16")]; tensor query_states_19_cast_fp16 = add(x = var_1489_cast_fp16, y = var_1496_cast_fp16)[name = string("query_states_19_cast_fp16")]; tensor embed_19_cast_fp16 = transpose(perm = embed_19_perm_0, x = var_1478_cast_fp16)[name = string("transpose_59")]; tensor var_1499_cast_fp16 = mul(x = embed_19_cast_fp16, y = cos_cast_fp16)[name = string("op_1499_cast_fp16")]; tensor var_1500_split_sizes_0 = const()[name = string("op_1500_split_sizes_0"), val = tensor([32, 32])]; int32 var_1500_axis_0 = const()[name = string("op_1500_axis_0"), val = int32(-1)]; tensor var_1500_cast_fp16_0, tensor var_1500_cast_fp16_1 = split(axis = var_1500_axis_0, split_sizes = var_1500_split_sizes_0, x = embed_19_cast_fp16)[name = string("op_1500_cast_fp16")]; fp16 const_48_promoted_to_fp16 = const()[name = string("const_48_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_1502_cast_fp16 = mul(x = var_1500_cast_fp16_1, y = const_48_promoted_to_fp16)[name = string("op_1502_cast_fp16")]; int32 var_1504 = const()[name = string("op_1504"), val = int32(-1)]; bool var_1505_interleave_0 = const()[name = string("op_1505_interleave_0"), val = bool(false)]; tensor var_1505_cast_fp16 = concat(axis = var_1504, interleave = var_1505_interleave_0, values = (var_1502_cast_fp16, var_1500_cast_fp16_0))[name = string("op_1505_cast_fp16")]; tensor var_1506_cast_fp16 = mul(x = var_1505_cast_fp16, y = sin_cast_fp16)[name = string("op_1506_cast_fp16")]; tensor key_states_19_cast_fp16 = add(x = var_1499_cast_fp16, y = var_1506_cast_fp16)[name = string("key_states_19_cast_fp16")]; tensor expand_dims_41 = const()[name = string("expand_dims_41"), val = tensor([4])]; tensor expand_dims_42 = const()[name = string("expand_dims_42"), val = tensor([0])]; tensor expand_dims_44 = const()[name = string("expand_dims_44"), val = tensor([5])]; tensor concat_35_values3_0 = const()[name = string("concat_35_values3_0"), val = tensor([0])]; int32 concat_35_axis_0 = const()[name = string("concat_35_axis_0"), val = int32(0)]; bool concat_35_interleave_0 = const()[name = string("concat_35_interleave_0"), val = bool(false)]; tensor concat_35 = concat(axis = concat_35_axis_0, interleave = concat_35_interleave_0, values = (expand_dims_41, expand_dims_42, position_id, concat_35_values3_0))[name = string("concat_35")]; tensor concat_36_values1_0 = const()[name = string("concat_36_values1_0"), val = tensor([0])]; tensor concat_36_values3_0 = const()[name = string("concat_36_values3_0"), val = tensor([0])]; int32 concat_36_axis_0 = const()[name = string("concat_36_axis_0"), val = int32(0)]; bool concat_36_interleave_0 = const()[name = string("concat_36_interleave_0"), val = bool(false)]; tensor concat_36 = concat(axis = concat_36_axis_0, interleave = concat_36_interleave_0, values = (expand_dims_44, concat_36_values1_0, var_426, concat_36_values3_0))[name = string("concat_36")]; tensor key_cache_internal_tensor_assign_5_stride_0 = const()[name = string("key_cache_internal_tensor_assign_5_stride_0"), val = tensor([1, 1, 1, 1])]; tensor key_cache_internal_tensor_assign_5_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_5_begin_mask_0"), val = tensor([false, false, false, true])]; tensor key_cache_internal_tensor_assign_5_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_5_end_mask_0"), val = tensor([false, true, false, true])]; tensor key_cache_internal_tensor_assign_5_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_5_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor key_cache_internal_tensor_assign_5_cast_fp16 = slice_update(begin = concat_35, begin_mask = key_cache_internal_tensor_assign_5_begin_mask_0, end = concat_36, end_mask = key_cache_internal_tensor_assign_5_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_5_squeeze_mask_0, stride = key_cache_internal_tensor_assign_5_stride_0, update = key_states_19_cast_fp16, x = coreml_update_state_54)[name = string("key_cache_internal_tensor_assign_5_cast_fp16")]; write_state(data = key_cache_internal_tensor_assign_5_cast_fp16, input = key_cache)[name = string("coreml_update_state_222_write_state")]; tensor coreml_update_state_56 = read_state(input = key_cache)[name = string("coreml_update_state_222")]; tensor value_cache_internal_tensor_assign_5_stride_0 = const()[name = string("value_cache_internal_tensor_assign_5_stride_0"), val = tensor([1, 1, 1, 1])]; tensor value_cache_internal_tensor_assign_5_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_5_begin_mask_0"), val = tensor([false, false, false, true])]; tensor value_cache_internal_tensor_assign_5_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_5_end_mask_0"), val = tensor([false, true, false, true])]; tensor value_cache_internal_tensor_assign_5_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_5_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor value_states_19_cast_fp16 = transpose(perm = value_states_19_perm_0, x = var_1485_cast_fp16)[name = string("transpose_58")]; tensor value_cache_internal_tensor_assign_5_cast_fp16 = slice_update(begin = concat_35, begin_mask = value_cache_internal_tensor_assign_5_begin_mask_0, end = concat_36, end_mask = value_cache_internal_tensor_assign_5_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_5_squeeze_mask_0, stride = value_cache_internal_tensor_assign_5_stride_0, update = value_states_19_cast_fp16, x = coreml_update_state_55)[name = string("value_cache_internal_tensor_assign_5_cast_fp16")]; write_state(data = value_cache_internal_tensor_assign_5_cast_fp16, input = value_cache)[name = string("coreml_update_state_223_write_state")]; tensor coreml_update_state_57 = read_state(input = value_cache)[name = string("coreml_update_state_223")]; tensor var_1549_begin_0 = const()[name = string("op_1549_begin_0"), val = tensor([4, 0, 0, 0])]; tensor var_1549_end_0 = const()[name = string("op_1549_end_0"), val = tensor([5, 2, 2048, 64])]; tensor var_1549_end_mask_0 = const()[name = string("op_1549_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_1549_cast_fp16 = slice_by_index(begin = var_1549_begin_0, end = var_1549_end_0, end_mask = var_1549_end_mask_0, x = coreml_update_state_56)[name = string("op_1549_cast_fp16")]; tensor tile_8 = const()[name = string("tile_8"), val = tensor([1, 1])]; int32 var_1552_axis_0 = const()[name = string("op_1552_axis_0"), val = int32(1)]; tensor var_1552_cast_fp16_0, tensor var_1552_cast_fp16_1 = split(axis = var_1552_axis_0, split_sizes = tile_8, x = var_1549_cast_fp16)[name = string("op_1552_cast_fp16")]; tensor var_1559_begin_0 = const()[name = string("op_1559_begin_0"), val = tensor([4, 0, 0, 0])]; tensor var_1559_end_0 = const()[name = string("op_1559_end_0"), val = tensor([5, 2, 2048, 64])]; tensor var_1559_end_mask_0 = const()[name = string("op_1559_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_1559_cast_fp16 = slice_by_index(begin = var_1559_begin_0, end = var_1559_end_0, end_mask = var_1559_end_mask_0, x = coreml_update_state_57)[name = string("op_1559_cast_fp16")]; tensor tile_9 = const()[name = string("tile_9"), val = tensor([1, 1])]; int32 var_1562_axis_0 = const()[name = string("op_1562_axis_0"), val = int32(1)]; tensor var_1562_cast_fp16_0, tensor var_1562_cast_fp16_1 = split(axis = var_1562_axis_0, split_sizes = tile_9, x = var_1559_cast_fp16)[name = string("op_1562_cast_fp16")]; tensor var_1565_split_sizes_0 = const()[name = string("op_1565_split_sizes_0"), val = tensor([8, 8])]; int32 var_1565_axis_0 = const()[name = string("op_1565_axis_0"), val = int32(1)]; tensor var_1565_cast_fp16_0, tensor var_1565_cast_fp16_1 = split(axis = var_1565_axis_0, split_sizes = var_1565_split_sizes_0, x = query_states_19_cast_fp16)[name = string("op_1565_cast_fp16")]; bool attn_weights_65_transpose_x_0 = const()[name = string("attn_weights_65_transpose_x_0"), val = bool(false)]; bool attn_weights_65_transpose_y_0 = const()[name = string("attn_weights_65_transpose_y_0"), val = bool(false)]; tensor attn_weights_65_cast_fp16 = matmul(transpose_x = attn_weights_65_transpose_x_0, transpose_y = attn_weights_65_transpose_y_0, x = var_1552_cast_fp16_0, y = var_1565_cast_fp16_0)[name = string("attn_weights_65_cast_fp16")]; fp16 _inversed_attn_weights_67_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_67_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_67_cast_fp16 = mul(x = attn_weights_65_cast_fp16, y = _inversed_attn_weights_67_y_0_to_fp16)[name = string("_inversed_attn_weights_67_cast_fp16")]; tensor attn_weights_69_cast_fp16 = add(x = _inversed_attn_weights_67_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_69_cast_fp16")]; int32 var_1572 = const()[name = string("op_1572"), val = int32(2)]; tensor attn_weights_71_cast_fp16 = softmax(axis = var_1572, x = attn_weights_69_cast_fp16)[name = string("attn_weights_71_cast_fp16")]; bool var_1578_transpose_x_1 = const()[name = string("op_1578_transpose_x_1"), val = bool(true)]; bool var_1578_transpose_y_1 = const()[name = string("op_1578_transpose_y_1"), val = bool(false)]; tensor var_1578_cast_fp16 = matmul(transpose_x = var_1578_transpose_x_1, transpose_y = var_1578_transpose_y_1, x = attn_weights_71_cast_fp16, y = var_1562_cast_fp16_0)[name = string("op_1578_cast_fp16")]; bool attn_weights_73_transpose_x_0 = const()[name = string("attn_weights_73_transpose_x_0"), val = bool(false)]; bool attn_weights_73_transpose_y_0 = const()[name = string("attn_weights_73_transpose_y_0"), val = bool(false)]; tensor attn_weights_73_cast_fp16 = matmul(transpose_x = attn_weights_73_transpose_x_0, transpose_y = attn_weights_73_transpose_y_0, x = var_1552_cast_fp16_1, y = var_1565_cast_fp16_1)[name = string("attn_weights_73_cast_fp16")]; fp16 _inversed_attn_weights_75_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_75_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_75_cast_fp16 = mul(x = attn_weights_73_cast_fp16, y = _inversed_attn_weights_75_y_0_to_fp16)[name = string("_inversed_attn_weights_75_cast_fp16")]; tensor attn_weights_77_cast_fp16 = add(x = _inversed_attn_weights_75_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_77_cast_fp16")]; int32 var_1584 = const()[name = string("op_1584"), val = int32(2)]; tensor attn_weights_79_cast_fp16 = softmax(axis = var_1584, x = attn_weights_77_cast_fp16)[name = string("attn_weights_79_cast_fp16")]; bool attn_output_25_transpose_x_1 = const()[name = string("attn_output_25_transpose_x_1"), val = bool(true)]; bool attn_output_25_transpose_y_1 = const()[name = string("attn_output_25_transpose_y_1"), val = bool(false)]; tensor attn_output_25_cast_fp16 = matmul(transpose_x = attn_output_25_transpose_x_1, transpose_y = attn_output_25_transpose_y_1, x = attn_weights_79_cast_fp16, y = var_1562_cast_fp16_1)[name = string("attn_output_25_cast_fp16")]; int32 var_1592 = const()[name = string("op_1592"), val = int32(1)]; bool attn_output_27_interleave_0 = const()[name = string("attn_output_27_interleave_0"), val = bool(false)]; tensor attn_output_27_cast_fp16 = concat(axis = var_1592, interleave = attn_output_27_interleave_0, values = (var_1578_cast_fp16, attn_output_25_cast_fp16))[name = string("attn_output_27_cast_fp16")]; tensor var_1596_perm_0 = const()[name = string("op_1596_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_1601 = const()[name = string("op_1601"), val = tensor([1, 1024, 1, 8])]; tensor var_1596_cast_fp16 = transpose(perm = var_1596_perm_0, x = attn_output_27_cast_fp16)[name = string("transpose_57")]; tensor x_77_cast_fp16 = reshape(shape = var_1601, x = var_1596_cast_fp16)[name = string("x_77_cast_fp16")]; string hidden_states_27_pad_type_0 = const()[name = string("hidden_states_27_pad_type_0"), val = string("valid")]; tensor hidden_states_27_strides_0 = const()[name = string("hidden_states_27_strides_0"), val = tensor([1, 1])]; tensor hidden_states_27_pad_0 = const()[name = string("hidden_states_27_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_27_dilations_0 = const()[name = string("hidden_states_27_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_27_groups_0 = const()[name = string("hidden_states_27_groups_0"), val = int32(1)]; tensor var_1608_to_fp16 = const()[name = string("op_1608_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(130599744)))]; tensor hidden_states_27_cast_fp16 = conv(dilations = hidden_states_27_dilations_0, groups = hidden_states_27_groups_0, pad = hidden_states_27_pad_0, pad_type = hidden_states_27_pad_type_0, strides = hidden_states_27_strides_0, weight = var_1608_to_fp16, x = x_77_cast_fp16)[name = string("hidden_states_27_cast_fp16")]; tensor x_79_cast_fp16 = add(x = x_71_cast_fp16, y = hidden_states_27_cast_fp16)[name = string("x_79_cast_fp16")]; int32 var_1620 = const()[name = string("op_1620"), val = int32(1)]; fp16 const_53_promoted_to_fp16 = const()[name = string("const_53_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_1623_cast_fp16 = mul(x = x_79_cast_fp16, y = const_53_promoted_to_fp16)[name = string("op_1623_cast_fp16")]; bool x_81_interleave_0 = const()[name = string("x_81_interleave_0"), val = bool(false)]; tensor x_81_cast_fp16 = concat(axis = var_1620, interleave = x_81_interleave_0, values = (x_79_cast_fp16, var_1623_cast_fp16))[name = string("x_81_cast_fp16")]; tensor out_55_axes_0 = const()[name = string("out_55_axes_0"), val = tensor([1])]; fp16 var_1633_to_fp16 = const()[name = string("op_1633_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_55_cast_fp16 = layer_norm(axes = out_55_axes_0, epsilon = var_1633_to_fp16, x = x_81_cast_fp16)[name = string("out_55_cast_fp16")]; tensor layer_layers_4_post_attention_layernorm_weight_to_fp16 = const()[name = string("layer_layers_4_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(132696960)))]; tensor out_57_cast_fp16 = mul(x = out_55_cast_fp16, y = layer_layers_4_post_attention_layernorm_weight_to_fp16)[name = string("out_57_cast_fp16")]; tensor var_1639_split_sizes_0 = const()[name = string("op_1639_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_1639_axis_0 = const()[name = string("op_1639_axis_0"), val = int32(1)]; tensor var_1639_cast_fp16_0, tensor var_1639_cast_fp16_1 = split(axis = var_1639_axis_0, split_sizes = var_1639_split_sizes_0, x = out_57_cast_fp16)[name = string("op_1639_cast_fp16")]; string input_9_pad_type_0 = const()[name = string("input_9_pad_type_0"), val = string("valid")]; tensor input_9_strides_0 = const()[name = string("input_9_strides_0"), val = tensor([1, 1])]; tensor input_9_pad_0 = const()[name = string("input_9_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_9_dilations_0 = const()[name = string("input_9_dilations_0"), val = tensor([1, 1])]; int32 input_9_groups_0 = const()[name = string("input_9_groups_0"), val = int32(1)]; tensor var_1644_to_fp16 = const()[name = string("op_1644_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(132701120)))]; tensor input_9_cast_fp16 = conv(dilations = input_9_dilations_0, groups = input_9_groups_0, pad = input_9_pad_0, pad_type = input_9_pad_type_0, strides = input_9_strides_0, weight = var_1644_to_fp16, x = var_1639_cast_fp16_0)[name = string("input_9_cast_fp16")]; tensor var_1655_cast_fp16 = silu(x = input_9_cast_fp16)[name = string("op_1655_cast_fp16")]; string var_1660_pad_type_0 = const()[name = string("op_1660_pad_type_0"), val = string("valid")]; tensor var_1660_strides_0 = const()[name = string("op_1660_strides_0"), val = tensor([1, 1])]; tensor var_1660_pad_0 = const()[name = string("op_1660_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_1660_dilations_0 = const()[name = string("op_1660_dilations_0"), val = tensor([1, 1])]; int32 var_1660_groups_0 = const()[name = string("op_1660_groups_0"), val = int32(1)]; tensor var_1643_to_fp16 = const()[name = string("op_1643_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(141089792)))]; tensor var_1660_cast_fp16 = conv(dilations = var_1660_dilations_0, groups = var_1660_groups_0, pad = var_1660_pad_0, pad_type = var_1660_pad_type_0, strides = var_1660_strides_0, weight = var_1643_to_fp16, x = var_1639_cast_fp16_0)[name = string("op_1660_cast_fp16")]; tensor x_87_cast_fp16 = mul(x = var_1655_cast_fp16, y = var_1660_cast_fp16)[name = string("x_87_cast_fp16")]; string hidden_states_29_pad_type_0 = const()[name = string("hidden_states_29_pad_type_0"), val = string("valid")]; tensor hidden_states_29_strides_0 = const()[name = string("hidden_states_29_strides_0"), val = tensor([1, 1])]; tensor hidden_states_29_pad_0 = const()[name = string("hidden_states_29_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_29_dilations_0 = const()[name = string("hidden_states_29_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_29_groups_0 = const()[name = string("hidden_states_29_groups_0"), val = int32(1)]; tensor var_1642_to_fp16 = const()[name = string("op_1642_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(149478464)))]; tensor hidden_states_29_cast_fp16 = conv(dilations = hidden_states_29_dilations_0, groups = hidden_states_29_groups_0, pad = hidden_states_29_pad_0, pad_type = hidden_states_29_pad_type_0, strides = hidden_states_29_strides_0, weight = var_1642_to_fp16, x = x_87_cast_fp16)[name = string("hidden_states_29_cast_fp16")]; tensor x_89_cast_fp16 = add(x = x_79_cast_fp16, y = hidden_states_29_cast_fp16)[name = string("x_89_cast_fp16")]; int32 var_1673 = const()[name = string("op_1673"), val = int32(1)]; fp16 const_54_promoted_to_fp16 = const()[name = string("const_54_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_1676_cast_fp16 = mul(x = x_89_cast_fp16, y = const_54_promoted_to_fp16)[name = string("op_1676_cast_fp16")]; bool x_91_interleave_0 = const()[name = string("x_91_interleave_0"), val = bool(false)]; tensor x_91_cast_fp16 = concat(axis = var_1673, interleave = x_91_interleave_0, values = (x_89_cast_fp16, var_1676_cast_fp16))[name = string("x_91_cast_fp16")]; tensor out_61_axes_0 = const()[name = string("out_61_axes_0"), val = tensor([1])]; fp16 var_1686_to_fp16 = const()[name = string("op_1686_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_61_cast_fp16 = layer_norm(axes = out_61_axes_0, epsilon = var_1686_to_fp16, x = x_91_cast_fp16)[name = string("out_61_cast_fp16")]; tensor layer_layers_5_input_layernorm_weight_to_fp16 = const()[name = string("layer_layers_5_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(157867136)))]; tensor out_63_cast_fp16 = mul(x = out_61_cast_fp16, y = layer_layers_5_input_layernorm_weight_to_fp16)[name = string("out_63_cast_fp16")]; tensor var_1692_split_sizes_0 = const()[name = string("op_1692_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_1692_axis_0 = const()[name = string("op_1692_axis_0"), val = int32(1)]; tensor var_1692_cast_fp16_0, tensor var_1692_cast_fp16_1 = split(axis = var_1692_axis_0, split_sizes = var_1692_split_sizes_0, x = out_63_cast_fp16)[name = string("op_1692_cast_fp16")]; string query_states_21_pad_type_0 = const()[name = string("query_states_21_pad_type_0"), val = string("valid")]; tensor query_states_21_strides_0 = const()[name = string("query_states_21_strides_0"), val = tensor([1, 1])]; tensor query_states_21_pad_0 = const()[name = string("query_states_21_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_states_21_dilations_0 = const()[name = string("query_states_21_dilations_0"), val = tensor([1, 1])]; int32 query_states_21_groups_0 = const()[name = string("query_states_21_groups_0"), val = int32(1)]; tensor var_1714_to_fp16 = const()[name = string("op_1714_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(157871296)))]; tensor query_states_21_cast_fp16 = conv(dilations = query_states_21_dilations_0, groups = query_states_21_groups_0, pad = query_states_21_pad_0, pad_type = query_states_21_pad_type_0, strides = query_states_21_strides_0, weight = var_1714_to_fp16, x = var_1692_cast_fp16_0)[name = string("query_states_21_cast_fp16")]; string key_states_21_pad_type_0 = const()[name = string("key_states_21_pad_type_0"), val = string("valid")]; tensor key_states_21_strides_0 = const()[name = string("key_states_21_strides_0"), val = tensor([1, 1])]; tensor key_states_21_pad_0 = const()[name = string("key_states_21_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_states_21_dilations_0 = const()[name = string("key_states_21_dilations_0"), val = tensor([1, 1])]; int32 key_states_21_groups_0 = const()[name = string("key_states_21_groups_0"), val = int32(1)]; tensor var_1725_to_fp16 = const()[name = string("op_1725_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(159968512)))]; tensor key_states_21_cast_fp16 = conv(dilations = key_states_21_dilations_0, groups = key_states_21_groups_0, pad = key_states_21_pad_0, pad_type = key_states_21_pad_type_0, strides = key_states_21_strides_0, weight = var_1725_to_fp16, x = var_1692_cast_fp16_0)[name = string("key_states_21_cast_fp16")]; string value_states_21_pad_type_0 = const()[name = string("value_states_21_pad_type_0"), val = string("valid")]; tensor value_states_21_strides_0 = const()[name = string("value_states_21_strides_0"), val = tensor([1, 1])]; tensor value_states_21_pad_0 = const()[name = string("value_states_21_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_states_21_dilations_0 = const()[name = string("value_states_21_dilations_0"), val = tensor([1, 1])]; int32 value_states_21_groups_0 = const()[name = string("value_states_21_groups_0"), val = int32(1)]; tensor var_1736_to_fp16 = const()[name = string("op_1736_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(160230720)))]; tensor value_states_21_cast_fp16 = conv(dilations = value_states_21_dilations_0, groups = value_states_21_groups_0, pad = value_states_21_pad_0, pad_type = value_states_21_pad_type_0, strides = value_states_21_strides_0, weight = var_1736_to_fp16, x = var_1692_cast_fp16_0)[name = string("value_states_21_cast_fp16")]; tensor var_1744 = const()[name = string("op_1744"), val = tensor([1, 16, 64, 8])]; tensor embed_21_cast_fp16 = reshape(shape = var_1744, x = query_states_21_cast_fp16)[name = string("embed_21_cast_fp16")]; tensor var_1748 = const()[name = string("op_1748"), val = tensor([1, 2, 64, 8])]; tensor var_1749_cast_fp16 = reshape(shape = var_1748, x = key_states_21_cast_fp16)[name = string("op_1749_cast_fp16")]; tensor embed_23_perm_0 = const()[name = string("embed_23_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_1755 = const()[name = string("op_1755"), val = tensor([1, 2, 64, 8])]; tensor var_1756_cast_fp16 = reshape(shape = var_1755, x = value_states_21_cast_fp16)[name = string("op_1756_cast_fp16")]; tensor value_states_23_perm_0 = const()[name = string("value_states_23_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_1760_cast_fp16 = mul(x = embed_21_cast_fp16, y = cos_1_cast_fp16)[name = string("op_1760_cast_fp16")]; tensor var_1761_split_sizes_0 = const()[name = string("op_1761_split_sizes_0"), val = tensor([32, 32])]; int32 var_1761_axis_0 = const()[name = string("op_1761_axis_0"), val = int32(-2)]; tensor var_1761_cast_fp16_0, tensor var_1761_cast_fp16_1 = split(axis = var_1761_axis_0, split_sizes = var_1761_split_sizes_0, x = embed_21_cast_fp16)[name = string("op_1761_cast_fp16")]; fp16 const_57_promoted_to_fp16 = const()[name = string("const_57_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_1763_cast_fp16 = mul(x = var_1761_cast_fp16_1, y = const_57_promoted_to_fp16)[name = string("op_1763_cast_fp16")]; int32 var_1765 = const()[name = string("op_1765"), val = int32(-2)]; bool var_1766_interleave_0 = const()[name = string("op_1766_interleave_0"), val = bool(false)]; tensor var_1766_cast_fp16 = concat(axis = var_1765, interleave = var_1766_interleave_0, values = (var_1763_cast_fp16, var_1761_cast_fp16_0))[name = string("op_1766_cast_fp16")]; tensor var_1767_cast_fp16 = mul(x = var_1766_cast_fp16, y = sin_1_cast_fp16)[name = string("op_1767_cast_fp16")]; tensor query_states_23_cast_fp16 = add(x = var_1760_cast_fp16, y = var_1767_cast_fp16)[name = string("query_states_23_cast_fp16")]; tensor embed_23_cast_fp16 = transpose(perm = embed_23_perm_0, x = var_1749_cast_fp16)[name = string("transpose_56")]; tensor var_1770_cast_fp16 = mul(x = embed_23_cast_fp16, y = cos_cast_fp16)[name = string("op_1770_cast_fp16")]; tensor var_1771_split_sizes_0 = const()[name = string("op_1771_split_sizes_0"), val = tensor([32, 32])]; int32 var_1771_axis_0 = const()[name = string("op_1771_axis_0"), val = int32(-1)]; tensor var_1771_cast_fp16_0, tensor var_1771_cast_fp16_1 = split(axis = var_1771_axis_0, split_sizes = var_1771_split_sizes_0, x = embed_23_cast_fp16)[name = string("op_1771_cast_fp16")]; fp16 const_58_promoted_to_fp16 = const()[name = string("const_58_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_1773_cast_fp16 = mul(x = var_1771_cast_fp16_1, y = const_58_promoted_to_fp16)[name = string("op_1773_cast_fp16")]; int32 var_1775 = const()[name = string("op_1775"), val = int32(-1)]; bool var_1776_interleave_0 = const()[name = string("op_1776_interleave_0"), val = bool(false)]; tensor var_1776_cast_fp16 = concat(axis = var_1775, interleave = var_1776_interleave_0, values = (var_1773_cast_fp16, var_1771_cast_fp16_0))[name = string("op_1776_cast_fp16")]; tensor var_1777_cast_fp16 = mul(x = var_1776_cast_fp16, y = sin_cast_fp16)[name = string("op_1777_cast_fp16")]; tensor key_states_23_cast_fp16 = add(x = var_1770_cast_fp16, y = var_1777_cast_fp16)[name = string("key_states_23_cast_fp16")]; tensor expand_dims_51 = const()[name = string("expand_dims_51"), val = tensor([5])]; tensor expand_dims_52 = const()[name = string("expand_dims_52"), val = tensor([0])]; tensor expand_dims_54 = const()[name = string("expand_dims_54"), val = tensor([6])]; tensor concat_43_values3_0 = const()[name = string("concat_43_values3_0"), val = tensor([0])]; int32 concat_43_axis_0 = const()[name = string("concat_43_axis_0"), val = int32(0)]; bool concat_43_interleave_0 = const()[name = string("concat_43_interleave_0"), val = bool(false)]; tensor concat_43 = concat(axis = concat_43_axis_0, interleave = concat_43_interleave_0, values = (expand_dims_51, expand_dims_52, position_id, concat_43_values3_0))[name = string("concat_43")]; tensor concat_44_values1_0 = const()[name = string("concat_44_values1_0"), val = tensor([0])]; tensor concat_44_values3_0 = const()[name = string("concat_44_values3_0"), val = tensor([0])]; int32 concat_44_axis_0 = const()[name = string("concat_44_axis_0"), val = int32(0)]; bool concat_44_interleave_0 = const()[name = string("concat_44_interleave_0"), val = bool(false)]; tensor concat_44 = concat(axis = concat_44_axis_0, interleave = concat_44_interleave_0, values = (expand_dims_54, concat_44_values1_0, var_426, concat_44_values3_0))[name = string("concat_44")]; tensor key_cache_internal_tensor_assign_6_stride_0 = const()[name = string("key_cache_internal_tensor_assign_6_stride_0"), val = tensor([1, 1, 1, 1])]; tensor key_cache_internal_tensor_assign_6_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_6_begin_mask_0"), val = tensor([false, false, false, true])]; tensor key_cache_internal_tensor_assign_6_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_6_end_mask_0"), val = tensor([false, true, false, true])]; tensor key_cache_internal_tensor_assign_6_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_6_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor key_cache_internal_tensor_assign_6_cast_fp16 = slice_update(begin = concat_43, begin_mask = key_cache_internal_tensor_assign_6_begin_mask_0, end = concat_44, end_mask = key_cache_internal_tensor_assign_6_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_6_squeeze_mask_0, stride = key_cache_internal_tensor_assign_6_stride_0, update = key_states_23_cast_fp16, x = coreml_update_state_56)[name = string("key_cache_internal_tensor_assign_6_cast_fp16")]; write_state(data = key_cache_internal_tensor_assign_6_cast_fp16, input = key_cache)[name = string("coreml_update_state_224_write_state")]; tensor coreml_update_state_58 = read_state(input = key_cache)[name = string("coreml_update_state_224")]; tensor value_cache_internal_tensor_assign_6_stride_0 = const()[name = string("value_cache_internal_tensor_assign_6_stride_0"), val = tensor([1, 1, 1, 1])]; tensor value_cache_internal_tensor_assign_6_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_6_begin_mask_0"), val = tensor([false, false, false, true])]; tensor value_cache_internal_tensor_assign_6_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_6_end_mask_0"), val = tensor([false, true, false, true])]; tensor value_cache_internal_tensor_assign_6_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_6_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor value_states_23_cast_fp16 = transpose(perm = value_states_23_perm_0, x = var_1756_cast_fp16)[name = string("transpose_55")]; tensor value_cache_internal_tensor_assign_6_cast_fp16 = slice_update(begin = concat_43, begin_mask = value_cache_internal_tensor_assign_6_begin_mask_0, end = concat_44, end_mask = value_cache_internal_tensor_assign_6_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_6_squeeze_mask_0, stride = value_cache_internal_tensor_assign_6_stride_0, update = value_states_23_cast_fp16, x = coreml_update_state_57)[name = string("value_cache_internal_tensor_assign_6_cast_fp16")]; write_state(data = value_cache_internal_tensor_assign_6_cast_fp16, input = value_cache)[name = string("coreml_update_state_225_write_state")]; tensor coreml_update_state_59 = read_state(input = value_cache)[name = string("coreml_update_state_225")]; tensor var_1820_begin_0 = const()[name = string("op_1820_begin_0"), val = tensor([5, 0, 0, 0])]; tensor var_1820_end_0 = const()[name = string("op_1820_end_0"), val = tensor([6, 2, 2048, 64])]; tensor var_1820_end_mask_0 = const()[name = string("op_1820_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_1820_cast_fp16 = slice_by_index(begin = var_1820_begin_0, end = var_1820_end_0, end_mask = var_1820_end_mask_0, x = coreml_update_state_58)[name = string("op_1820_cast_fp16")]; tensor tile_10 = const()[name = string("tile_10"), val = tensor([1, 1])]; int32 var_1823_axis_0 = const()[name = string("op_1823_axis_0"), val = int32(1)]; tensor var_1823_cast_fp16_0, tensor var_1823_cast_fp16_1 = split(axis = var_1823_axis_0, split_sizes = tile_10, x = var_1820_cast_fp16)[name = string("op_1823_cast_fp16")]; tensor var_1830_begin_0 = const()[name = string("op_1830_begin_0"), val = tensor([5, 0, 0, 0])]; tensor var_1830_end_0 = const()[name = string("op_1830_end_0"), val = tensor([6, 2, 2048, 64])]; tensor var_1830_end_mask_0 = const()[name = string("op_1830_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_1830_cast_fp16 = slice_by_index(begin = var_1830_begin_0, end = var_1830_end_0, end_mask = var_1830_end_mask_0, x = coreml_update_state_59)[name = string("op_1830_cast_fp16")]; tensor tile_11 = const()[name = string("tile_11"), val = tensor([1, 1])]; int32 var_1833_axis_0 = const()[name = string("op_1833_axis_0"), val = int32(1)]; tensor var_1833_cast_fp16_0, tensor var_1833_cast_fp16_1 = split(axis = var_1833_axis_0, split_sizes = tile_11, x = var_1830_cast_fp16)[name = string("op_1833_cast_fp16")]; tensor var_1836_split_sizes_0 = const()[name = string("op_1836_split_sizes_0"), val = tensor([8, 8])]; int32 var_1836_axis_0 = const()[name = string("op_1836_axis_0"), val = int32(1)]; tensor var_1836_cast_fp16_0, tensor var_1836_cast_fp16_1 = split(axis = var_1836_axis_0, split_sizes = var_1836_split_sizes_0, x = query_states_23_cast_fp16)[name = string("op_1836_cast_fp16")]; bool attn_weights_81_transpose_x_0 = const()[name = string("attn_weights_81_transpose_x_0"), val = bool(false)]; bool attn_weights_81_transpose_y_0 = const()[name = string("attn_weights_81_transpose_y_0"), val = bool(false)]; tensor attn_weights_81_cast_fp16 = matmul(transpose_x = attn_weights_81_transpose_x_0, transpose_y = attn_weights_81_transpose_y_0, x = var_1823_cast_fp16_0, y = var_1836_cast_fp16_0)[name = string("attn_weights_81_cast_fp16")]; fp16 _inversed_attn_weights_83_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_83_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_83_cast_fp16 = mul(x = attn_weights_81_cast_fp16, y = _inversed_attn_weights_83_y_0_to_fp16)[name = string("_inversed_attn_weights_83_cast_fp16")]; tensor attn_weights_85_cast_fp16 = add(x = _inversed_attn_weights_83_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_85_cast_fp16")]; int32 var_1843 = const()[name = string("op_1843"), val = int32(2)]; tensor attn_weights_87_cast_fp16 = softmax(axis = var_1843, x = attn_weights_85_cast_fp16)[name = string("attn_weights_87_cast_fp16")]; bool var_1849_transpose_x_1 = const()[name = string("op_1849_transpose_x_1"), val = bool(true)]; bool var_1849_transpose_y_1 = const()[name = string("op_1849_transpose_y_1"), val = bool(false)]; tensor var_1849_cast_fp16 = matmul(transpose_x = var_1849_transpose_x_1, transpose_y = var_1849_transpose_y_1, x = attn_weights_87_cast_fp16, y = var_1833_cast_fp16_0)[name = string("op_1849_cast_fp16")]; bool attn_weights_89_transpose_x_0 = const()[name = string("attn_weights_89_transpose_x_0"), val = bool(false)]; bool attn_weights_89_transpose_y_0 = const()[name = string("attn_weights_89_transpose_y_0"), val = bool(false)]; tensor attn_weights_89_cast_fp16 = matmul(transpose_x = attn_weights_89_transpose_x_0, transpose_y = attn_weights_89_transpose_y_0, x = var_1823_cast_fp16_1, y = var_1836_cast_fp16_1)[name = string("attn_weights_89_cast_fp16")]; fp16 _inversed_attn_weights_91_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_91_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_91_cast_fp16 = mul(x = attn_weights_89_cast_fp16, y = _inversed_attn_weights_91_y_0_to_fp16)[name = string("_inversed_attn_weights_91_cast_fp16")]; tensor attn_weights_93_cast_fp16 = add(x = _inversed_attn_weights_91_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_93_cast_fp16")]; int32 var_1855 = const()[name = string("op_1855"), val = int32(2)]; tensor attn_weights_95_cast_fp16 = softmax(axis = var_1855, x = attn_weights_93_cast_fp16)[name = string("attn_weights_95_cast_fp16")]; bool attn_output_31_transpose_x_1 = const()[name = string("attn_output_31_transpose_x_1"), val = bool(true)]; bool attn_output_31_transpose_y_1 = const()[name = string("attn_output_31_transpose_y_1"), val = bool(false)]; tensor attn_output_31_cast_fp16 = matmul(transpose_x = attn_output_31_transpose_x_1, transpose_y = attn_output_31_transpose_y_1, x = attn_weights_95_cast_fp16, y = var_1833_cast_fp16_1)[name = string("attn_output_31_cast_fp16")]; int32 var_1863 = const()[name = string("op_1863"), val = int32(1)]; bool attn_output_33_interleave_0 = const()[name = string("attn_output_33_interleave_0"), val = bool(false)]; tensor attn_output_33_cast_fp16 = concat(axis = var_1863, interleave = attn_output_33_interleave_0, values = (var_1849_cast_fp16, attn_output_31_cast_fp16))[name = string("attn_output_33_cast_fp16")]; tensor var_1867_perm_0 = const()[name = string("op_1867_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_1872 = const()[name = string("op_1872"), val = tensor([1, 1024, 1, 8])]; tensor var_1867_cast_fp16 = transpose(perm = var_1867_perm_0, x = attn_output_33_cast_fp16)[name = string("transpose_54")]; tensor x_95_cast_fp16 = reshape(shape = var_1872, x = var_1867_cast_fp16)[name = string("x_95_cast_fp16")]; string hidden_states_33_pad_type_0 = const()[name = string("hidden_states_33_pad_type_0"), val = string("valid")]; tensor hidden_states_33_strides_0 = const()[name = string("hidden_states_33_strides_0"), val = tensor([1, 1])]; tensor hidden_states_33_pad_0 = const()[name = string("hidden_states_33_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_33_dilations_0 = const()[name = string("hidden_states_33_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_33_groups_0 = const()[name = string("hidden_states_33_groups_0"), val = int32(1)]; tensor var_1879_to_fp16 = const()[name = string("op_1879_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(160492928)))]; tensor hidden_states_33_cast_fp16 = conv(dilations = hidden_states_33_dilations_0, groups = hidden_states_33_groups_0, pad = hidden_states_33_pad_0, pad_type = hidden_states_33_pad_type_0, strides = hidden_states_33_strides_0, weight = var_1879_to_fp16, x = x_95_cast_fp16)[name = string("hidden_states_33_cast_fp16")]; tensor x_97_cast_fp16 = add(x = x_89_cast_fp16, y = hidden_states_33_cast_fp16)[name = string("x_97_cast_fp16")]; int32 var_1891 = const()[name = string("op_1891"), val = int32(1)]; fp16 const_63_promoted_to_fp16 = const()[name = string("const_63_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_1894_cast_fp16 = mul(x = x_97_cast_fp16, y = const_63_promoted_to_fp16)[name = string("op_1894_cast_fp16")]; bool x_99_interleave_0 = const()[name = string("x_99_interleave_0"), val = bool(false)]; tensor x_99_cast_fp16 = concat(axis = var_1891, interleave = x_99_interleave_0, values = (x_97_cast_fp16, var_1894_cast_fp16))[name = string("x_99_cast_fp16")]; tensor out_67_axes_0 = const()[name = string("out_67_axes_0"), val = tensor([1])]; fp16 var_1904_to_fp16 = const()[name = string("op_1904_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_67_cast_fp16 = layer_norm(axes = out_67_axes_0, epsilon = var_1904_to_fp16, x = x_99_cast_fp16)[name = string("out_67_cast_fp16")]; tensor layer_layers_5_post_attention_layernorm_weight_to_fp16 = const()[name = string("layer_layers_5_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(162590144)))]; tensor out_69_cast_fp16 = mul(x = out_67_cast_fp16, y = layer_layers_5_post_attention_layernorm_weight_to_fp16)[name = string("out_69_cast_fp16")]; tensor var_1910_split_sizes_0 = const()[name = string("op_1910_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_1910_axis_0 = const()[name = string("op_1910_axis_0"), val = int32(1)]; tensor var_1910_cast_fp16_0, tensor var_1910_cast_fp16_1 = split(axis = var_1910_axis_0, split_sizes = var_1910_split_sizes_0, x = out_69_cast_fp16)[name = string("op_1910_cast_fp16")]; string input_11_pad_type_0 = const()[name = string("input_11_pad_type_0"), val = string("valid")]; tensor input_11_strides_0 = const()[name = string("input_11_strides_0"), val = tensor([1, 1])]; tensor input_11_pad_0 = const()[name = string("input_11_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_11_dilations_0 = const()[name = string("input_11_dilations_0"), val = tensor([1, 1])]; int32 input_11_groups_0 = const()[name = string("input_11_groups_0"), val = int32(1)]; tensor var_1915_to_fp16 = const()[name = string("op_1915_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(162594304)))]; tensor input_11_cast_fp16 = conv(dilations = input_11_dilations_0, groups = input_11_groups_0, pad = input_11_pad_0, pad_type = input_11_pad_type_0, strides = input_11_strides_0, weight = var_1915_to_fp16, x = var_1910_cast_fp16_0)[name = string("input_11_cast_fp16")]; tensor var_1926_cast_fp16 = silu(x = input_11_cast_fp16)[name = string("op_1926_cast_fp16")]; string var_1931_pad_type_0 = const()[name = string("op_1931_pad_type_0"), val = string("valid")]; tensor var_1931_strides_0 = const()[name = string("op_1931_strides_0"), val = tensor([1, 1])]; tensor var_1931_pad_0 = const()[name = string("op_1931_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_1931_dilations_0 = const()[name = string("op_1931_dilations_0"), val = tensor([1, 1])]; int32 var_1931_groups_0 = const()[name = string("op_1931_groups_0"), val = int32(1)]; tensor var_1914_to_fp16 = const()[name = string("op_1914_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(170982976)))]; tensor var_1931_cast_fp16 = conv(dilations = var_1931_dilations_0, groups = var_1931_groups_0, pad = var_1931_pad_0, pad_type = var_1931_pad_type_0, strides = var_1931_strides_0, weight = var_1914_to_fp16, x = var_1910_cast_fp16_0)[name = string("op_1931_cast_fp16")]; tensor x_105_cast_fp16 = mul(x = var_1926_cast_fp16, y = var_1931_cast_fp16)[name = string("x_105_cast_fp16")]; string hidden_states_35_pad_type_0 = const()[name = string("hidden_states_35_pad_type_0"), val = string("valid")]; tensor hidden_states_35_strides_0 = const()[name = string("hidden_states_35_strides_0"), val = tensor([1, 1])]; tensor hidden_states_35_pad_0 = const()[name = string("hidden_states_35_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_35_dilations_0 = const()[name = string("hidden_states_35_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_35_groups_0 = const()[name = string("hidden_states_35_groups_0"), val = int32(1)]; tensor var_1913_to_fp16 = const()[name = string("op_1913_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(179371648)))]; tensor hidden_states_35_cast_fp16 = conv(dilations = hidden_states_35_dilations_0, groups = hidden_states_35_groups_0, pad = hidden_states_35_pad_0, pad_type = hidden_states_35_pad_type_0, strides = hidden_states_35_strides_0, weight = var_1913_to_fp16, x = x_105_cast_fp16)[name = string("hidden_states_35_cast_fp16")]; tensor x_107_cast_fp16 = add(x = x_97_cast_fp16, y = hidden_states_35_cast_fp16)[name = string("x_107_cast_fp16")]; int32 var_1944 = const()[name = string("op_1944"), val = int32(1)]; fp16 const_64_promoted_to_fp16 = const()[name = string("const_64_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_1947_cast_fp16 = mul(x = x_107_cast_fp16, y = const_64_promoted_to_fp16)[name = string("op_1947_cast_fp16")]; bool x_109_interleave_0 = const()[name = string("x_109_interleave_0"), val = bool(false)]; tensor x_109_cast_fp16 = concat(axis = var_1944, interleave = x_109_interleave_0, values = (x_107_cast_fp16, var_1947_cast_fp16))[name = string("x_109_cast_fp16")]; tensor out_73_axes_0 = const()[name = string("out_73_axes_0"), val = tensor([1])]; fp16 var_1957_to_fp16 = const()[name = string("op_1957_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_73_cast_fp16 = layer_norm(axes = out_73_axes_0, epsilon = var_1957_to_fp16, x = x_109_cast_fp16)[name = string("out_73_cast_fp16")]; tensor layer_layers_6_input_layernorm_weight_to_fp16 = const()[name = string("layer_layers_6_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(187760320)))]; tensor out_75_cast_fp16 = mul(x = out_73_cast_fp16, y = layer_layers_6_input_layernorm_weight_to_fp16)[name = string("out_75_cast_fp16")]; tensor var_1963_split_sizes_0 = const()[name = string("op_1963_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_1963_axis_0 = const()[name = string("op_1963_axis_0"), val = int32(1)]; tensor var_1963_cast_fp16_0, tensor var_1963_cast_fp16_1 = split(axis = var_1963_axis_0, split_sizes = var_1963_split_sizes_0, x = out_75_cast_fp16)[name = string("op_1963_cast_fp16")]; string query_states_25_pad_type_0 = const()[name = string("query_states_25_pad_type_0"), val = string("valid")]; tensor query_states_25_strides_0 = const()[name = string("query_states_25_strides_0"), val = tensor([1, 1])]; tensor query_states_25_pad_0 = const()[name = string("query_states_25_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_states_25_dilations_0 = const()[name = string("query_states_25_dilations_0"), val = tensor([1, 1])]; int32 query_states_25_groups_0 = const()[name = string("query_states_25_groups_0"), val = int32(1)]; tensor var_1985_to_fp16 = const()[name = string("op_1985_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(187764480)))]; tensor query_states_25_cast_fp16 = conv(dilations = query_states_25_dilations_0, groups = query_states_25_groups_0, pad = query_states_25_pad_0, pad_type = query_states_25_pad_type_0, strides = query_states_25_strides_0, weight = var_1985_to_fp16, x = var_1963_cast_fp16_0)[name = string("query_states_25_cast_fp16")]; string key_states_25_pad_type_0 = const()[name = string("key_states_25_pad_type_0"), val = string("valid")]; tensor key_states_25_strides_0 = const()[name = string("key_states_25_strides_0"), val = tensor([1, 1])]; tensor key_states_25_pad_0 = const()[name = string("key_states_25_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_states_25_dilations_0 = const()[name = string("key_states_25_dilations_0"), val = tensor([1, 1])]; int32 key_states_25_groups_0 = const()[name = string("key_states_25_groups_0"), val = int32(1)]; tensor var_1996_to_fp16 = const()[name = string("op_1996_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(189861696)))]; tensor key_states_25_cast_fp16 = conv(dilations = key_states_25_dilations_0, groups = key_states_25_groups_0, pad = key_states_25_pad_0, pad_type = key_states_25_pad_type_0, strides = key_states_25_strides_0, weight = var_1996_to_fp16, x = var_1963_cast_fp16_0)[name = string("key_states_25_cast_fp16")]; string value_states_25_pad_type_0 = const()[name = string("value_states_25_pad_type_0"), val = string("valid")]; tensor value_states_25_strides_0 = const()[name = string("value_states_25_strides_0"), val = tensor([1, 1])]; tensor value_states_25_pad_0 = const()[name = string("value_states_25_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_states_25_dilations_0 = const()[name = string("value_states_25_dilations_0"), val = tensor([1, 1])]; int32 value_states_25_groups_0 = const()[name = string("value_states_25_groups_0"), val = int32(1)]; tensor var_2007_to_fp16 = const()[name = string("op_2007_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(190123904)))]; tensor value_states_25_cast_fp16 = conv(dilations = value_states_25_dilations_0, groups = value_states_25_groups_0, pad = value_states_25_pad_0, pad_type = value_states_25_pad_type_0, strides = value_states_25_strides_0, weight = var_2007_to_fp16, x = var_1963_cast_fp16_0)[name = string("value_states_25_cast_fp16")]; tensor var_2015 = const()[name = string("op_2015"), val = tensor([1, 16, 64, 8])]; tensor embed_25_cast_fp16 = reshape(shape = var_2015, x = query_states_25_cast_fp16)[name = string("embed_25_cast_fp16")]; tensor var_2019 = const()[name = string("op_2019"), val = tensor([1, 2, 64, 8])]; tensor var_2020_cast_fp16 = reshape(shape = var_2019, x = key_states_25_cast_fp16)[name = string("op_2020_cast_fp16")]; tensor embed_27_perm_0 = const()[name = string("embed_27_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_2026 = const()[name = string("op_2026"), val = tensor([1, 2, 64, 8])]; tensor var_2027_cast_fp16 = reshape(shape = var_2026, x = value_states_25_cast_fp16)[name = string("op_2027_cast_fp16")]; tensor value_states_27_perm_0 = const()[name = string("value_states_27_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_2031_cast_fp16 = mul(x = embed_25_cast_fp16, y = cos_1_cast_fp16)[name = string("op_2031_cast_fp16")]; tensor var_2032_split_sizes_0 = const()[name = string("op_2032_split_sizes_0"), val = tensor([32, 32])]; int32 var_2032_axis_0 = const()[name = string("op_2032_axis_0"), val = int32(-2)]; tensor var_2032_cast_fp16_0, tensor var_2032_cast_fp16_1 = split(axis = var_2032_axis_0, split_sizes = var_2032_split_sizes_0, x = embed_25_cast_fp16)[name = string("op_2032_cast_fp16")]; fp16 const_67_promoted_to_fp16 = const()[name = string("const_67_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_2034_cast_fp16 = mul(x = var_2032_cast_fp16_1, y = const_67_promoted_to_fp16)[name = string("op_2034_cast_fp16")]; int32 var_2036 = const()[name = string("op_2036"), val = int32(-2)]; bool var_2037_interleave_0 = const()[name = string("op_2037_interleave_0"), val = bool(false)]; tensor var_2037_cast_fp16 = concat(axis = var_2036, interleave = var_2037_interleave_0, values = (var_2034_cast_fp16, var_2032_cast_fp16_0))[name = string("op_2037_cast_fp16")]; tensor var_2038_cast_fp16 = mul(x = var_2037_cast_fp16, y = sin_1_cast_fp16)[name = string("op_2038_cast_fp16")]; tensor query_states_27_cast_fp16 = add(x = var_2031_cast_fp16, y = var_2038_cast_fp16)[name = string("query_states_27_cast_fp16")]; tensor embed_27_cast_fp16 = transpose(perm = embed_27_perm_0, x = var_2020_cast_fp16)[name = string("transpose_53")]; tensor var_2041_cast_fp16 = mul(x = embed_27_cast_fp16, y = cos_cast_fp16)[name = string("op_2041_cast_fp16")]; tensor var_2042_split_sizes_0 = const()[name = string("op_2042_split_sizes_0"), val = tensor([32, 32])]; int32 var_2042_axis_0 = const()[name = string("op_2042_axis_0"), val = int32(-1)]; tensor var_2042_cast_fp16_0, tensor var_2042_cast_fp16_1 = split(axis = var_2042_axis_0, split_sizes = var_2042_split_sizes_0, x = embed_27_cast_fp16)[name = string("op_2042_cast_fp16")]; fp16 const_68_promoted_to_fp16 = const()[name = string("const_68_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_2044_cast_fp16 = mul(x = var_2042_cast_fp16_1, y = const_68_promoted_to_fp16)[name = string("op_2044_cast_fp16")]; int32 var_2046 = const()[name = string("op_2046"), val = int32(-1)]; bool var_2047_interleave_0 = const()[name = string("op_2047_interleave_0"), val = bool(false)]; tensor var_2047_cast_fp16 = concat(axis = var_2046, interleave = var_2047_interleave_0, values = (var_2044_cast_fp16, var_2042_cast_fp16_0))[name = string("op_2047_cast_fp16")]; tensor var_2048_cast_fp16 = mul(x = var_2047_cast_fp16, y = sin_cast_fp16)[name = string("op_2048_cast_fp16")]; tensor key_states_27_cast_fp16 = add(x = var_2041_cast_fp16, y = var_2048_cast_fp16)[name = string("key_states_27_cast_fp16")]; tensor expand_dims_61 = const()[name = string("expand_dims_61"), val = tensor([6])]; tensor expand_dims_62 = const()[name = string("expand_dims_62"), val = tensor([0])]; tensor expand_dims_64 = const()[name = string("expand_dims_64"), val = tensor([7])]; tensor concat_51_values3_0 = const()[name = string("concat_51_values3_0"), val = tensor([0])]; int32 concat_51_axis_0 = const()[name = string("concat_51_axis_0"), val = int32(0)]; bool concat_51_interleave_0 = const()[name = string("concat_51_interleave_0"), val = bool(false)]; tensor concat_51 = concat(axis = concat_51_axis_0, interleave = concat_51_interleave_0, values = (expand_dims_61, expand_dims_62, position_id, concat_51_values3_0))[name = string("concat_51")]; tensor concat_52_values1_0 = const()[name = string("concat_52_values1_0"), val = tensor([0])]; tensor concat_52_values3_0 = const()[name = string("concat_52_values3_0"), val = tensor([0])]; int32 concat_52_axis_0 = const()[name = string("concat_52_axis_0"), val = int32(0)]; bool concat_52_interleave_0 = const()[name = string("concat_52_interleave_0"), val = bool(false)]; tensor concat_52 = concat(axis = concat_52_axis_0, interleave = concat_52_interleave_0, values = (expand_dims_64, concat_52_values1_0, var_426, concat_52_values3_0))[name = string("concat_52")]; tensor key_cache_internal_tensor_assign_7_stride_0 = const()[name = string("key_cache_internal_tensor_assign_7_stride_0"), val = tensor([1, 1, 1, 1])]; tensor key_cache_internal_tensor_assign_7_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_7_begin_mask_0"), val = tensor([false, false, false, true])]; tensor key_cache_internal_tensor_assign_7_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_7_end_mask_0"), val = tensor([false, true, false, true])]; tensor key_cache_internal_tensor_assign_7_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_7_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor key_cache_internal_tensor_assign_7_cast_fp16 = slice_update(begin = concat_51, begin_mask = key_cache_internal_tensor_assign_7_begin_mask_0, end = concat_52, end_mask = key_cache_internal_tensor_assign_7_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_7_squeeze_mask_0, stride = key_cache_internal_tensor_assign_7_stride_0, update = key_states_27_cast_fp16, x = coreml_update_state_58)[name = string("key_cache_internal_tensor_assign_7_cast_fp16")]; write_state(data = key_cache_internal_tensor_assign_7_cast_fp16, input = key_cache)[name = string("coreml_update_state_226_write_state")]; tensor coreml_update_state_60 = read_state(input = key_cache)[name = string("coreml_update_state_226")]; tensor value_cache_internal_tensor_assign_7_stride_0 = const()[name = string("value_cache_internal_tensor_assign_7_stride_0"), val = tensor([1, 1, 1, 1])]; tensor value_cache_internal_tensor_assign_7_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_7_begin_mask_0"), val = tensor([false, false, false, true])]; tensor value_cache_internal_tensor_assign_7_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_7_end_mask_0"), val = tensor([false, true, false, true])]; tensor value_cache_internal_tensor_assign_7_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_7_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor value_states_27_cast_fp16 = transpose(perm = value_states_27_perm_0, x = var_2027_cast_fp16)[name = string("transpose_52")]; tensor value_cache_internal_tensor_assign_7_cast_fp16 = slice_update(begin = concat_51, begin_mask = value_cache_internal_tensor_assign_7_begin_mask_0, end = concat_52, end_mask = value_cache_internal_tensor_assign_7_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_7_squeeze_mask_0, stride = value_cache_internal_tensor_assign_7_stride_0, update = value_states_27_cast_fp16, x = coreml_update_state_59)[name = string("value_cache_internal_tensor_assign_7_cast_fp16")]; write_state(data = value_cache_internal_tensor_assign_7_cast_fp16, input = value_cache)[name = string("coreml_update_state_227_write_state")]; tensor coreml_update_state_61 = read_state(input = value_cache)[name = string("coreml_update_state_227")]; tensor var_2091_begin_0 = const()[name = string("op_2091_begin_0"), val = tensor([6, 0, 0, 0])]; tensor var_2091_end_0 = const()[name = string("op_2091_end_0"), val = tensor([7, 2, 2048, 64])]; tensor var_2091_end_mask_0 = const()[name = string("op_2091_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_2091_cast_fp16 = slice_by_index(begin = var_2091_begin_0, end = var_2091_end_0, end_mask = var_2091_end_mask_0, x = coreml_update_state_60)[name = string("op_2091_cast_fp16")]; tensor tile_12 = const()[name = string("tile_12"), val = tensor([1, 1])]; int32 var_2094_axis_0 = const()[name = string("op_2094_axis_0"), val = int32(1)]; tensor var_2094_cast_fp16_0, tensor var_2094_cast_fp16_1 = split(axis = var_2094_axis_0, split_sizes = tile_12, x = var_2091_cast_fp16)[name = string("op_2094_cast_fp16")]; tensor var_2101_begin_0 = const()[name = string("op_2101_begin_0"), val = tensor([6, 0, 0, 0])]; tensor var_2101_end_0 = const()[name = string("op_2101_end_0"), val = tensor([7, 2, 2048, 64])]; tensor var_2101_end_mask_0 = const()[name = string("op_2101_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_2101_cast_fp16 = slice_by_index(begin = var_2101_begin_0, end = var_2101_end_0, end_mask = var_2101_end_mask_0, x = coreml_update_state_61)[name = string("op_2101_cast_fp16")]; tensor tile_13 = const()[name = string("tile_13"), val = tensor([1, 1])]; int32 var_2104_axis_0 = const()[name = string("op_2104_axis_0"), val = int32(1)]; tensor var_2104_cast_fp16_0, tensor var_2104_cast_fp16_1 = split(axis = var_2104_axis_0, split_sizes = tile_13, x = var_2101_cast_fp16)[name = string("op_2104_cast_fp16")]; tensor var_2107_split_sizes_0 = const()[name = string("op_2107_split_sizes_0"), val = tensor([8, 8])]; int32 var_2107_axis_0 = const()[name = string("op_2107_axis_0"), val = int32(1)]; tensor var_2107_cast_fp16_0, tensor var_2107_cast_fp16_1 = split(axis = var_2107_axis_0, split_sizes = var_2107_split_sizes_0, x = query_states_27_cast_fp16)[name = string("op_2107_cast_fp16")]; bool attn_weights_97_transpose_x_0 = const()[name = string("attn_weights_97_transpose_x_0"), val = bool(false)]; bool attn_weights_97_transpose_y_0 = const()[name = string("attn_weights_97_transpose_y_0"), val = bool(false)]; tensor attn_weights_97_cast_fp16 = matmul(transpose_x = attn_weights_97_transpose_x_0, transpose_y = attn_weights_97_transpose_y_0, x = var_2094_cast_fp16_0, y = var_2107_cast_fp16_0)[name = string("attn_weights_97_cast_fp16")]; fp16 _inversed_attn_weights_99_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_99_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_99_cast_fp16 = mul(x = attn_weights_97_cast_fp16, y = _inversed_attn_weights_99_y_0_to_fp16)[name = string("_inversed_attn_weights_99_cast_fp16")]; tensor attn_weights_101_cast_fp16 = add(x = _inversed_attn_weights_99_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_101_cast_fp16")]; int32 var_2114 = const()[name = string("op_2114"), val = int32(2)]; tensor attn_weights_103_cast_fp16 = softmax(axis = var_2114, x = attn_weights_101_cast_fp16)[name = string("attn_weights_103_cast_fp16")]; bool var_2120_transpose_x_1 = const()[name = string("op_2120_transpose_x_1"), val = bool(true)]; bool var_2120_transpose_y_1 = const()[name = string("op_2120_transpose_y_1"), val = bool(false)]; tensor var_2120_cast_fp16 = matmul(transpose_x = var_2120_transpose_x_1, transpose_y = var_2120_transpose_y_1, x = attn_weights_103_cast_fp16, y = var_2104_cast_fp16_0)[name = string("op_2120_cast_fp16")]; bool attn_weights_105_transpose_x_0 = const()[name = string("attn_weights_105_transpose_x_0"), val = bool(false)]; bool attn_weights_105_transpose_y_0 = const()[name = string("attn_weights_105_transpose_y_0"), val = bool(false)]; tensor attn_weights_105_cast_fp16 = matmul(transpose_x = attn_weights_105_transpose_x_0, transpose_y = attn_weights_105_transpose_y_0, x = var_2094_cast_fp16_1, y = var_2107_cast_fp16_1)[name = string("attn_weights_105_cast_fp16")]; fp16 _inversed_attn_weights_107_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_107_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_107_cast_fp16 = mul(x = attn_weights_105_cast_fp16, y = _inversed_attn_weights_107_y_0_to_fp16)[name = string("_inversed_attn_weights_107_cast_fp16")]; tensor attn_weights_109_cast_fp16 = add(x = _inversed_attn_weights_107_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_109_cast_fp16")]; int32 var_2126 = const()[name = string("op_2126"), val = int32(2)]; tensor attn_weights_111_cast_fp16 = softmax(axis = var_2126, x = attn_weights_109_cast_fp16)[name = string("attn_weights_111_cast_fp16")]; bool attn_output_37_transpose_x_1 = const()[name = string("attn_output_37_transpose_x_1"), val = bool(true)]; bool attn_output_37_transpose_y_1 = const()[name = string("attn_output_37_transpose_y_1"), val = bool(false)]; tensor attn_output_37_cast_fp16 = matmul(transpose_x = attn_output_37_transpose_x_1, transpose_y = attn_output_37_transpose_y_1, x = attn_weights_111_cast_fp16, y = var_2104_cast_fp16_1)[name = string("attn_output_37_cast_fp16")]; int32 var_2134 = const()[name = string("op_2134"), val = int32(1)]; bool attn_output_39_interleave_0 = const()[name = string("attn_output_39_interleave_0"), val = bool(false)]; tensor attn_output_39_cast_fp16 = concat(axis = var_2134, interleave = attn_output_39_interleave_0, values = (var_2120_cast_fp16, attn_output_37_cast_fp16))[name = string("attn_output_39_cast_fp16")]; tensor var_2138_perm_0 = const()[name = string("op_2138_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_2143 = const()[name = string("op_2143"), val = tensor([1, 1024, 1, 8])]; tensor var_2138_cast_fp16 = transpose(perm = var_2138_perm_0, x = attn_output_39_cast_fp16)[name = string("transpose_51")]; tensor x_113_cast_fp16 = reshape(shape = var_2143, x = var_2138_cast_fp16)[name = string("x_113_cast_fp16")]; string hidden_states_39_pad_type_0 = const()[name = string("hidden_states_39_pad_type_0"), val = string("valid")]; tensor hidden_states_39_strides_0 = const()[name = string("hidden_states_39_strides_0"), val = tensor([1, 1])]; tensor hidden_states_39_pad_0 = const()[name = string("hidden_states_39_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_39_dilations_0 = const()[name = string("hidden_states_39_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_39_groups_0 = const()[name = string("hidden_states_39_groups_0"), val = int32(1)]; tensor var_2150_to_fp16 = const()[name = string("op_2150_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(190386112)))]; tensor hidden_states_39_cast_fp16 = conv(dilations = hidden_states_39_dilations_0, groups = hidden_states_39_groups_0, pad = hidden_states_39_pad_0, pad_type = hidden_states_39_pad_type_0, strides = hidden_states_39_strides_0, weight = var_2150_to_fp16, x = x_113_cast_fp16)[name = string("hidden_states_39_cast_fp16")]; tensor x_115_cast_fp16 = add(x = x_107_cast_fp16, y = hidden_states_39_cast_fp16)[name = string("x_115_cast_fp16")]; int32 var_2162 = const()[name = string("op_2162"), val = int32(1)]; fp16 const_73_promoted_to_fp16 = const()[name = string("const_73_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_2165_cast_fp16 = mul(x = x_115_cast_fp16, y = const_73_promoted_to_fp16)[name = string("op_2165_cast_fp16")]; bool x_117_interleave_0 = const()[name = string("x_117_interleave_0"), val = bool(false)]; tensor x_117_cast_fp16 = concat(axis = var_2162, interleave = x_117_interleave_0, values = (x_115_cast_fp16, var_2165_cast_fp16))[name = string("x_117_cast_fp16")]; tensor out_79_axes_0 = const()[name = string("out_79_axes_0"), val = tensor([1])]; fp16 var_2175_to_fp16 = const()[name = string("op_2175_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_79_cast_fp16 = layer_norm(axes = out_79_axes_0, epsilon = var_2175_to_fp16, x = x_117_cast_fp16)[name = string("out_79_cast_fp16")]; tensor layer_layers_6_post_attention_layernorm_weight_to_fp16 = const()[name = string("layer_layers_6_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(192483328)))]; tensor out_81_cast_fp16 = mul(x = out_79_cast_fp16, y = layer_layers_6_post_attention_layernorm_weight_to_fp16)[name = string("out_81_cast_fp16")]; tensor var_2181_split_sizes_0 = const()[name = string("op_2181_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_2181_axis_0 = const()[name = string("op_2181_axis_0"), val = int32(1)]; tensor var_2181_cast_fp16_0, tensor var_2181_cast_fp16_1 = split(axis = var_2181_axis_0, split_sizes = var_2181_split_sizes_0, x = out_81_cast_fp16)[name = string("op_2181_cast_fp16")]; string input_13_pad_type_0 = const()[name = string("input_13_pad_type_0"), val = string("valid")]; tensor input_13_strides_0 = const()[name = string("input_13_strides_0"), val = tensor([1, 1])]; tensor input_13_pad_0 = const()[name = string("input_13_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_13_dilations_0 = const()[name = string("input_13_dilations_0"), val = tensor([1, 1])]; int32 input_13_groups_0 = const()[name = string("input_13_groups_0"), val = int32(1)]; tensor var_2186_to_fp16 = const()[name = string("op_2186_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(192487488)))]; tensor input_13_cast_fp16 = conv(dilations = input_13_dilations_0, groups = input_13_groups_0, pad = input_13_pad_0, pad_type = input_13_pad_type_0, strides = input_13_strides_0, weight = var_2186_to_fp16, x = var_2181_cast_fp16_0)[name = string("input_13_cast_fp16")]; tensor var_2197_cast_fp16 = silu(x = input_13_cast_fp16)[name = string("op_2197_cast_fp16")]; string var_2202_pad_type_0 = const()[name = string("op_2202_pad_type_0"), val = string("valid")]; tensor var_2202_strides_0 = const()[name = string("op_2202_strides_0"), val = tensor([1, 1])]; tensor var_2202_pad_0 = const()[name = string("op_2202_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_2202_dilations_0 = const()[name = string("op_2202_dilations_0"), val = tensor([1, 1])]; int32 var_2202_groups_0 = const()[name = string("op_2202_groups_0"), val = int32(1)]; tensor var_2185_to_fp16 = const()[name = string("op_2185_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(200876160)))]; tensor var_2202_cast_fp16 = conv(dilations = var_2202_dilations_0, groups = var_2202_groups_0, pad = var_2202_pad_0, pad_type = var_2202_pad_type_0, strides = var_2202_strides_0, weight = var_2185_to_fp16, x = var_2181_cast_fp16_0)[name = string("op_2202_cast_fp16")]; tensor x_123_cast_fp16 = mul(x = var_2197_cast_fp16, y = var_2202_cast_fp16)[name = string("x_123_cast_fp16")]; string hidden_states_41_pad_type_0 = const()[name = string("hidden_states_41_pad_type_0"), val = string("valid")]; tensor hidden_states_41_strides_0 = const()[name = string("hidden_states_41_strides_0"), val = tensor([1, 1])]; tensor hidden_states_41_pad_0 = const()[name = string("hidden_states_41_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_41_dilations_0 = const()[name = string("hidden_states_41_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_41_groups_0 = const()[name = string("hidden_states_41_groups_0"), val = int32(1)]; tensor var_2184_to_fp16 = const()[name = string("op_2184_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(209264832)))]; tensor hidden_states_41_cast_fp16 = conv(dilations = hidden_states_41_dilations_0, groups = hidden_states_41_groups_0, pad = hidden_states_41_pad_0, pad_type = hidden_states_41_pad_type_0, strides = hidden_states_41_strides_0, weight = var_2184_to_fp16, x = x_123_cast_fp16)[name = string("hidden_states_41_cast_fp16")]; tensor x_125_cast_fp16 = add(x = x_115_cast_fp16, y = hidden_states_41_cast_fp16)[name = string("x_125_cast_fp16")]; int32 var_2215 = const()[name = string("op_2215"), val = int32(1)]; fp16 const_74_promoted_to_fp16 = const()[name = string("const_74_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_2218_cast_fp16 = mul(x = x_125_cast_fp16, y = const_74_promoted_to_fp16)[name = string("op_2218_cast_fp16")]; bool x_127_interleave_0 = const()[name = string("x_127_interleave_0"), val = bool(false)]; tensor x_127_cast_fp16 = concat(axis = var_2215, interleave = x_127_interleave_0, values = (x_125_cast_fp16, var_2218_cast_fp16))[name = string("x_127_cast_fp16")]; tensor out_85_axes_0 = const()[name = string("out_85_axes_0"), val = tensor([1])]; fp16 var_2228_to_fp16 = const()[name = string("op_2228_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_85_cast_fp16 = layer_norm(axes = out_85_axes_0, epsilon = var_2228_to_fp16, x = x_127_cast_fp16)[name = string("out_85_cast_fp16")]; tensor layer_layers_7_input_layernorm_weight_to_fp16 = const()[name = string("layer_layers_7_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(217653504)))]; tensor out_87_cast_fp16 = mul(x = out_85_cast_fp16, y = layer_layers_7_input_layernorm_weight_to_fp16)[name = string("out_87_cast_fp16")]; tensor var_2234_split_sizes_0 = const()[name = string("op_2234_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_2234_axis_0 = const()[name = string("op_2234_axis_0"), val = int32(1)]; tensor var_2234_cast_fp16_0, tensor var_2234_cast_fp16_1 = split(axis = var_2234_axis_0, split_sizes = var_2234_split_sizes_0, x = out_87_cast_fp16)[name = string("op_2234_cast_fp16")]; string query_states_29_pad_type_0 = const()[name = string("query_states_29_pad_type_0"), val = string("valid")]; tensor query_states_29_strides_0 = const()[name = string("query_states_29_strides_0"), val = tensor([1, 1])]; tensor query_states_29_pad_0 = const()[name = string("query_states_29_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_states_29_dilations_0 = const()[name = string("query_states_29_dilations_0"), val = tensor([1, 1])]; int32 query_states_29_groups_0 = const()[name = string("query_states_29_groups_0"), val = int32(1)]; tensor var_2256_to_fp16 = const()[name = string("op_2256_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(217657664)))]; tensor query_states_29_cast_fp16 = conv(dilations = query_states_29_dilations_0, groups = query_states_29_groups_0, pad = query_states_29_pad_0, pad_type = query_states_29_pad_type_0, strides = query_states_29_strides_0, weight = var_2256_to_fp16, x = var_2234_cast_fp16_0)[name = string("query_states_29_cast_fp16")]; string key_states_29_pad_type_0 = const()[name = string("key_states_29_pad_type_0"), val = string("valid")]; tensor key_states_29_strides_0 = const()[name = string("key_states_29_strides_0"), val = tensor([1, 1])]; tensor key_states_29_pad_0 = const()[name = string("key_states_29_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_states_29_dilations_0 = const()[name = string("key_states_29_dilations_0"), val = tensor([1, 1])]; int32 key_states_29_groups_0 = const()[name = string("key_states_29_groups_0"), val = int32(1)]; tensor var_2267_to_fp16 = const()[name = string("op_2267_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(219754880)))]; tensor key_states_29_cast_fp16 = conv(dilations = key_states_29_dilations_0, groups = key_states_29_groups_0, pad = key_states_29_pad_0, pad_type = key_states_29_pad_type_0, strides = key_states_29_strides_0, weight = var_2267_to_fp16, x = var_2234_cast_fp16_0)[name = string("key_states_29_cast_fp16")]; string value_states_29_pad_type_0 = const()[name = string("value_states_29_pad_type_0"), val = string("valid")]; tensor value_states_29_strides_0 = const()[name = string("value_states_29_strides_0"), val = tensor([1, 1])]; tensor value_states_29_pad_0 = const()[name = string("value_states_29_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_states_29_dilations_0 = const()[name = string("value_states_29_dilations_0"), val = tensor([1, 1])]; int32 value_states_29_groups_0 = const()[name = string("value_states_29_groups_0"), val = int32(1)]; tensor var_2278_to_fp16 = const()[name = string("op_2278_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(220017088)))]; tensor value_states_29_cast_fp16 = conv(dilations = value_states_29_dilations_0, groups = value_states_29_groups_0, pad = value_states_29_pad_0, pad_type = value_states_29_pad_type_0, strides = value_states_29_strides_0, weight = var_2278_to_fp16, x = var_2234_cast_fp16_0)[name = string("value_states_29_cast_fp16")]; tensor var_2286 = const()[name = string("op_2286"), val = tensor([1, 16, 64, 8])]; tensor embed_29_cast_fp16 = reshape(shape = var_2286, x = query_states_29_cast_fp16)[name = string("embed_29_cast_fp16")]; tensor var_2290 = const()[name = string("op_2290"), val = tensor([1, 2, 64, 8])]; tensor var_2291_cast_fp16 = reshape(shape = var_2290, x = key_states_29_cast_fp16)[name = string("op_2291_cast_fp16")]; tensor embed_31_perm_0 = const()[name = string("embed_31_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_2297 = const()[name = string("op_2297"), val = tensor([1, 2, 64, 8])]; tensor var_2298_cast_fp16 = reshape(shape = var_2297, x = value_states_29_cast_fp16)[name = string("op_2298_cast_fp16")]; tensor value_states_31_perm_0 = const()[name = string("value_states_31_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_2302_cast_fp16 = mul(x = embed_29_cast_fp16, y = cos_1_cast_fp16)[name = string("op_2302_cast_fp16")]; tensor var_2303_split_sizes_0 = const()[name = string("op_2303_split_sizes_0"), val = tensor([32, 32])]; int32 var_2303_axis_0 = const()[name = string("op_2303_axis_0"), val = int32(-2)]; tensor var_2303_cast_fp16_0, tensor var_2303_cast_fp16_1 = split(axis = var_2303_axis_0, split_sizes = var_2303_split_sizes_0, x = embed_29_cast_fp16)[name = string("op_2303_cast_fp16")]; fp16 const_77_promoted_to_fp16 = const()[name = string("const_77_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_2305_cast_fp16 = mul(x = var_2303_cast_fp16_1, y = const_77_promoted_to_fp16)[name = string("op_2305_cast_fp16")]; int32 var_2307 = const()[name = string("op_2307"), val = int32(-2)]; bool var_2308_interleave_0 = const()[name = string("op_2308_interleave_0"), val = bool(false)]; tensor var_2308_cast_fp16 = concat(axis = var_2307, interleave = var_2308_interleave_0, values = (var_2305_cast_fp16, var_2303_cast_fp16_0))[name = string("op_2308_cast_fp16")]; tensor var_2309_cast_fp16 = mul(x = var_2308_cast_fp16, y = sin_1_cast_fp16)[name = string("op_2309_cast_fp16")]; tensor query_states_31_cast_fp16 = add(x = var_2302_cast_fp16, y = var_2309_cast_fp16)[name = string("query_states_31_cast_fp16")]; tensor embed_31_cast_fp16 = transpose(perm = embed_31_perm_0, x = var_2291_cast_fp16)[name = string("transpose_50")]; tensor var_2312_cast_fp16 = mul(x = embed_31_cast_fp16, y = cos_cast_fp16)[name = string("op_2312_cast_fp16")]; tensor var_2313_split_sizes_0 = const()[name = string("op_2313_split_sizes_0"), val = tensor([32, 32])]; int32 var_2313_axis_0 = const()[name = string("op_2313_axis_0"), val = int32(-1)]; tensor var_2313_cast_fp16_0, tensor var_2313_cast_fp16_1 = split(axis = var_2313_axis_0, split_sizes = var_2313_split_sizes_0, x = embed_31_cast_fp16)[name = string("op_2313_cast_fp16")]; fp16 const_78_promoted_to_fp16 = const()[name = string("const_78_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_2315_cast_fp16 = mul(x = var_2313_cast_fp16_1, y = const_78_promoted_to_fp16)[name = string("op_2315_cast_fp16")]; int32 var_2317 = const()[name = string("op_2317"), val = int32(-1)]; bool var_2318_interleave_0 = const()[name = string("op_2318_interleave_0"), val = bool(false)]; tensor var_2318_cast_fp16 = concat(axis = var_2317, interleave = var_2318_interleave_0, values = (var_2315_cast_fp16, var_2313_cast_fp16_0))[name = string("op_2318_cast_fp16")]; tensor var_2319_cast_fp16 = mul(x = var_2318_cast_fp16, y = sin_cast_fp16)[name = string("op_2319_cast_fp16")]; tensor key_states_31_cast_fp16 = add(x = var_2312_cast_fp16, y = var_2319_cast_fp16)[name = string("key_states_31_cast_fp16")]; tensor expand_dims_71 = const()[name = string("expand_dims_71"), val = tensor([7])]; tensor expand_dims_72 = const()[name = string("expand_dims_72"), val = tensor([0])]; tensor expand_dims_74 = const()[name = string("expand_dims_74"), val = tensor([8])]; tensor concat_59_values3_0 = const()[name = string("concat_59_values3_0"), val = tensor([0])]; int32 concat_59_axis_0 = const()[name = string("concat_59_axis_0"), val = int32(0)]; bool concat_59_interleave_0 = const()[name = string("concat_59_interleave_0"), val = bool(false)]; tensor concat_59 = concat(axis = concat_59_axis_0, interleave = concat_59_interleave_0, values = (expand_dims_71, expand_dims_72, position_id, concat_59_values3_0))[name = string("concat_59")]; tensor concat_60_values1_0 = const()[name = string("concat_60_values1_0"), val = tensor([0])]; tensor concat_60_values3_0 = const()[name = string("concat_60_values3_0"), val = tensor([0])]; int32 concat_60_axis_0 = const()[name = string("concat_60_axis_0"), val = int32(0)]; bool concat_60_interleave_0 = const()[name = string("concat_60_interleave_0"), val = bool(false)]; tensor concat_60 = concat(axis = concat_60_axis_0, interleave = concat_60_interleave_0, values = (expand_dims_74, concat_60_values1_0, var_426, concat_60_values3_0))[name = string("concat_60")]; tensor key_cache_internal_tensor_assign_8_stride_0 = const()[name = string("key_cache_internal_tensor_assign_8_stride_0"), val = tensor([1, 1, 1, 1])]; tensor key_cache_internal_tensor_assign_8_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_8_begin_mask_0"), val = tensor([false, false, false, true])]; tensor key_cache_internal_tensor_assign_8_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_8_end_mask_0"), val = tensor([false, true, false, true])]; tensor key_cache_internal_tensor_assign_8_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_8_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor key_cache_internal_tensor_assign_8_cast_fp16 = slice_update(begin = concat_59, begin_mask = key_cache_internal_tensor_assign_8_begin_mask_0, end = concat_60, end_mask = key_cache_internal_tensor_assign_8_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_8_squeeze_mask_0, stride = key_cache_internal_tensor_assign_8_stride_0, update = key_states_31_cast_fp16, x = coreml_update_state_60)[name = string("key_cache_internal_tensor_assign_8_cast_fp16")]; write_state(data = key_cache_internal_tensor_assign_8_cast_fp16, input = key_cache)[name = string("coreml_update_state_228_write_state")]; tensor coreml_update_state_62 = read_state(input = key_cache)[name = string("coreml_update_state_228")]; tensor value_cache_internal_tensor_assign_8_stride_0 = const()[name = string("value_cache_internal_tensor_assign_8_stride_0"), val = tensor([1, 1, 1, 1])]; tensor value_cache_internal_tensor_assign_8_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_8_begin_mask_0"), val = tensor([false, false, false, true])]; tensor value_cache_internal_tensor_assign_8_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_8_end_mask_0"), val = tensor([false, true, false, true])]; tensor value_cache_internal_tensor_assign_8_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_8_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor value_states_31_cast_fp16 = transpose(perm = value_states_31_perm_0, x = var_2298_cast_fp16)[name = string("transpose_49")]; tensor value_cache_internal_tensor_assign_8_cast_fp16 = slice_update(begin = concat_59, begin_mask = value_cache_internal_tensor_assign_8_begin_mask_0, end = concat_60, end_mask = value_cache_internal_tensor_assign_8_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_8_squeeze_mask_0, stride = value_cache_internal_tensor_assign_8_stride_0, update = value_states_31_cast_fp16, x = coreml_update_state_61)[name = string("value_cache_internal_tensor_assign_8_cast_fp16")]; write_state(data = value_cache_internal_tensor_assign_8_cast_fp16, input = value_cache)[name = string("coreml_update_state_229_write_state")]; tensor coreml_update_state_63 = read_state(input = value_cache)[name = string("coreml_update_state_229")]; tensor var_2362_begin_0 = const()[name = string("op_2362_begin_0"), val = tensor([7, 0, 0, 0])]; tensor var_2362_end_0 = const()[name = string("op_2362_end_0"), val = tensor([8, 2, 2048, 64])]; tensor var_2362_end_mask_0 = const()[name = string("op_2362_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_2362_cast_fp16 = slice_by_index(begin = var_2362_begin_0, end = var_2362_end_0, end_mask = var_2362_end_mask_0, x = coreml_update_state_62)[name = string("op_2362_cast_fp16")]; tensor tile_14 = const()[name = string("tile_14"), val = tensor([1, 1])]; int32 var_2365_axis_0 = const()[name = string("op_2365_axis_0"), val = int32(1)]; tensor var_2365_cast_fp16_0, tensor var_2365_cast_fp16_1 = split(axis = var_2365_axis_0, split_sizes = tile_14, x = var_2362_cast_fp16)[name = string("op_2365_cast_fp16")]; tensor var_2372_begin_0 = const()[name = string("op_2372_begin_0"), val = tensor([7, 0, 0, 0])]; tensor var_2372_end_0 = const()[name = string("op_2372_end_0"), val = tensor([8, 2, 2048, 64])]; tensor var_2372_end_mask_0 = const()[name = string("op_2372_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_2372_cast_fp16 = slice_by_index(begin = var_2372_begin_0, end = var_2372_end_0, end_mask = var_2372_end_mask_0, x = coreml_update_state_63)[name = string("op_2372_cast_fp16")]; tensor tile_15 = const()[name = string("tile_15"), val = tensor([1, 1])]; int32 var_2375_axis_0 = const()[name = string("op_2375_axis_0"), val = int32(1)]; tensor var_2375_cast_fp16_0, tensor var_2375_cast_fp16_1 = split(axis = var_2375_axis_0, split_sizes = tile_15, x = var_2372_cast_fp16)[name = string("op_2375_cast_fp16")]; tensor var_2378_split_sizes_0 = const()[name = string("op_2378_split_sizes_0"), val = tensor([8, 8])]; int32 var_2378_axis_0 = const()[name = string("op_2378_axis_0"), val = int32(1)]; tensor var_2378_cast_fp16_0, tensor var_2378_cast_fp16_1 = split(axis = var_2378_axis_0, split_sizes = var_2378_split_sizes_0, x = query_states_31_cast_fp16)[name = string("op_2378_cast_fp16")]; bool attn_weights_113_transpose_x_0 = const()[name = string("attn_weights_113_transpose_x_0"), val = bool(false)]; bool attn_weights_113_transpose_y_0 = const()[name = string("attn_weights_113_transpose_y_0"), val = bool(false)]; tensor attn_weights_113_cast_fp16 = matmul(transpose_x = attn_weights_113_transpose_x_0, transpose_y = attn_weights_113_transpose_y_0, x = var_2365_cast_fp16_0, y = var_2378_cast_fp16_0)[name = string("attn_weights_113_cast_fp16")]; fp16 _inversed_attn_weights_115_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_115_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_115_cast_fp16 = mul(x = attn_weights_113_cast_fp16, y = _inversed_attn_weights_115_y_0_to_fp16)[name = string("_inversed_attn_weights_115_cast_fp16")]; tensor attn_weights_117_cast_fp16 = add(x = _inversed_attn_weights_115_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_117_cast_fp16")]; int32 var_2385 = const()[name = string("op_2385"), val = int32(2)]; tensor attn_weights_119_cast_fp16 = softmax(axis = var_2385, x = attn_weights_117_cast_fp16)[name = string("attn_weights_119_cast_fp16")]; bool var_2391_transpose_x_1 = const()[name = string("op_2391_transpose_x_1"), val = bool(true)]; bool var_2391_transpose_y_1 = const()[name = string("op_2391_transpose_y_1"), val = bool(false)]; tensor var_2391_cast_fp16 = matmul(transpose_x = var_2391_transpose_x_1, transpose_y = var_2391_transpose_y_1, x = attn_weights_119_cast_fp16, y = var_2375_cast_fp16_0)[name = string("op_2391_cast_fp16")]; bool attn_weights_121_transpose_x_0 = const()[name = string("attn_weights_121_transpose_x_0"), val = bool(false)]; bool attn_weights_121_transpose_y_0 = const()[name = string("attn_weights_121_transpose_y_0"), val = bool(false)]; tensor attn_weights_121_cast_fp16 = matmul(transpose_x = attn_weights_121_transpose_x_0, transpose_y = attn_weights_121_transpose_y_0, x = var_2365_cast_fp16_1, y = var_2378_cast_fp16_1)[name = string("attn_weights_121_cast_fp16")]; fp16 _inversed_attn_weights_123_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_123_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_123_cast_fp16 = mul(x = attn_weights_121_cast_fp16, y = _inversed_attn_weights_123_y_0_to_fp16)[name = string("_inversed_attn_weights_123_cast_fp16")]; tensor attn_weights_125_cast_fp16 = add(x = _inversed_attn_weights_123_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_125_cast_fp16")]; int32 var_2397 = const()[name = string("op_2397"), val = int32(2)]; tensor attn_weights_127_cast_fp16 = softmax(axis = var_2397, x = attn_weights_125_cast_fp16)[name = string("attn_weights_127_cast_fp16")]; bool attn_output_43_transpose_x_1 = const()[name = string("attn_output_43_transpose_x_1"), val = bool(true)]; bool attn_output_43_transpose_y_1 = const()[name = string("attn_output_43_transpose_y_1"), val = bool(false)]; tensor attn_output_43_cast_fp16 = matmul(transpose_x = attn_output_43_transpose_x_1, transpose_y = attn_output_43_transpose_y_1, x = attn_weights_127_cast_fp16, y = var_2375_cast_fp16_1)[name = string("attn_output_43_cast_fp16")]; int32 var_2405 = const()[name = string("op_2405"), val = int32(1)]; bool attn_output_45_interleave_0 = const()[name = string("attn_output_45_interleave_0"), val = bool(false)]; tensor attn_output_45_cast_fp16 = concat(axis = var_2405, interleave = attn_output_45_interleave_0, values = (var_2391_cast_fp16, attn_output_43_cast_fp16))[name = string("attn_output_45_cast_fp16")]; tensor var_2409_perm_0 = const()[name = string("op_2409_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_2414 = const()[name = string("op_2414"), val = tensor([1, 1024, 1, 8])]; tensor var_2409_cast_fp16 = transpose(perm = var_2409_perm_0, x = attn_output_45_cast_fp16)[name = string("transpose_48")]; tensor x_131_cast_fp16 = reshape(shape = var_2414, x = var_2409_cast_fp16)[name = string("x_131_cast_fp16")]; string hidden_states_45_pad_type_0 = const()[name = string("hidden_states_45_pad_type_0"), val = string("valid")]; tensor hidden_states_45_strides_0 = const()[name = string("hidden_states_45_strides_0"), val = tensor([1, 1])]; tensor hidden_states_45_pad_0 = const()[name = string("hidden_states_45_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_45_dilations_0 = const()[name = string("hidden_states_45_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_45_groups_0 = const()[name = string("hidden_states_45_groups_0"), val = int32(1)]; tensor var_2421_to_fp16 = const()[name = string("op_2421_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(220279296)))]; tensor hidden_states_45_cast_fp16 = conv(dilations = hidden_states_45_dilations_0, groups = hidden_states_45_groups_0, pad = hidden_states_45_pad_0, pad_type = hidden_states_45_pad_type_0, strides = hidden_states_45_strides_0, weight = var_2421_to_fp16, x = x_131_cast_fp16)[name = string("hidden_states_45_cast_fp16")]; tensor x_133_cast_fp16 = add(x = x_125_cast_fp16, y = hidden_states_45_cast_fp16)[name = string("x_133_cast_fp16")]; int32 var_2433 = const()[name = string("op_2433"), val = int32(1)]; fp16 const_83_promoted_to_fp16 = const()[name = string("const_83_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_2436_cast_fp16 = mul(x = x_133_cast_fp16, y = const_83_promoted_to_fp16)[name = string("op_2436_cast_fp16")]; bool x_135_interleave_0 = const()[name = string("x_135_interleave_0"), val = bool(false)]; tensor x_135_cast_fp16 = concat(axis = var_2433, interleave = x_135_interleave_0, values = (x_133_cast_fp16, var_2436_cast_fp16))[name = string("x_135_cast_fp16")]; tensor out_91_axes_0 = const()[name = string("out_91_axes_0"), val = tensor([1])]; fp16 var_2446_to_fp16 = const()[name = string("op_2446_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_91_cast_fp16 = layer_norm(axes = out_91_axes_0, epsilon = var_2446_to_fp16, x = x_135_cast_fp16)[name = string("out_91_cast_fp16")]; tensor layer_layers_7_post_attention_layernorm_weight_to_fp16 = const()[name = string("layer_layers_7_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(222376512)))]; tensor out_93_cast_fp16 = mul(x = out_91_cast_fp16, y = layer_layers_7_post_attention_layernorm_weight_to_fp16)[name = string("out_93_cast_fp16")]; tensor var_2452_split_sizes_0 = const()[name = string("op_2452_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_2452_axis_0 = const()[name = string("op_2452_axis_0"), val = int32(1)]; tensor var_2452_cast_fp16_0, tensor var_2452_cast_fp16_1 = split(axis = var_2452_axis_0, split_sizes = var_2452_split_sizes_0, x = out_93_cast_fp16)[name = string("op_2452_cast_fp16")]; string input_15_pad_type_0 = const()[name = string("input_15_pad_type_0"), val = string("valid")]; tensor input_15_strides_0 = const()[name = string("input_15_strides_0"), val = tensor([1, 1])]; tensor input_15_pad_0 = const()[name = string("input_15_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_15_dilations_0 = const()[name = string("input_15_dilations_0"), val = tensor([1, 1])]; int32 input_15_groups_0 = const()[name = string("input_15_groups_0"), val = int32(1)]; tensor var_2457_to_fp16 = const()[name = string("op_2457_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(222380672)))]; tensor input_15_cast_fp16 = conv(dilations = input_15_dilations_0, groups = input_15_groups_0, pad = input_15_pad_0, pad_type = input_15_pad_type_0, strides = input_15_strides_0, weight = var_2457_to_fp16, x = var_2452_cast_fp16_0)[name = string("input_15_cast_fp16")]; tensor var_2468_cast_fp16 = silu(x = input_15_cast_fp16)[name = string("op_2468_cast_fp16")]; string var_2473_pad_type_0 = const()[name = string("op_2473_pad_type_0"), val = string("valid")]; tensor var_2473_strides_0 = const()[name = string("op_2473_strides_0"), val = tensor([1, 1])]; tensor var_2473_pad_0 = const()[name = string("op_2473_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_2473_dilations_0 = const()[name = string("op_2473_dilations_0"), val = tensor([1, 1])]; int32 var_2473_groups_0 = const()[name = string("op_2473_groups_0"), val = int32(1)]; tensor var_2456_to_fp16 = const()[name = string("op_2456_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(230769344)))]; tensor var_2473_cast_fp16 = conv(dilations = var_2473_dilations_0, groups = var_2473_groups_0, pad = var_2473_pad_0, pad_type = var_2473_pad_type_0, strides = var_2473_strides_0, weight = var_2456_to_fp16, x = var_2452_cast_fp16_0)[name = string("op_2473_cast_fp16")]; tensor x_141_cast_fp16 = mul(x = var_2468_cast_fp16, y = var_2473_cast_fp16)[name = string("x_141_cast_fp16")]; string hidden_states_47_pad_type_0 = const()[name = string("hidden_states_47_pad_type_0"), val = string("valid")]; tensor hidden_states_47_strides_0 = const()[name = string("hidden_states_47_strides_0"), val = tensor([1, 1])]; tensor hidden_states_47_pad_0 = const()[name = string("hidden_states_47_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_47_dilations_0 = const()[name = string("hidden_states_47_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_47_groups_0 = const()[name = string("hidden_states_47_groups_0"), val = int32(1)]; tensor var_2455_to_fp16 = const()[name = string("op_2455_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(239158016)))]; tensor hidden_states_47_cast_fp16 = conv(dilations = hidden_states_47_dilations_0, groups = hidden_states_47_groups_0, pad = hidden_states_47_pad_0, pad_type = hidden_states_47_pad_type_0, strides = hidden_states_47_strides_0, weight = var_2455_to_fp16, x = x_141_cast_fp16)[name = string("hidden_states_47_cast_fp16")]; tensor x_143_cast_fp16 = add(x = x_133_cast_fp16, y = hidden_states_47_cast_fp16)[name = string("x_143_cast_fp16")]; int32 var_2486 = const()[name = string("op_2486"), val = int32(1)]; fp16 const_84_promoted_to_fp16 = const()[name = string("const_84_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_2489_cast_fp16 = mul(x = x_143_cast_fp16, y = const_84_promoted_to_fp16)[name = string("op_2489_cast_fp16")]; bool x_145_interleave_0 = const()[name = string("x_145_interleave_0"), val = bool(false)]; tensor x_145_cast_fp16 = concat(axis = var_2486, interleave = x_145_interleave_0, values = (x_143_cast_fp16, var_2489_cast_fp16))[name = string("x_145_cast_fp16")]; tensor out_97_axes_0 = const()[name = string("out_97_axes_0"), val = tensor([1])]; fp16 var_2499_to_fp16 = const()[name = string("op_2499_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_97_cast_fp16 = layer_norm(axes = out_97_axes_0, epsilon = var_2499_to_fp16, x = x_145_cast_fp16)[name = string("out_97_cast_fp16")]; tensor layer_layers_8_input_layernorm_weight_to_fp16 = const()[name = string("layer_layers_8_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(247546688)))]; tensor out_99_cast_fp16 = mul(x = out_97_cast_fp16, y = layer_layers_8_input_layernorm_weight_to_fp16)[name = string("out_99_cast_fp16")]; tensor var_2505_split_sizes_0 = const()[name = string("op_2505_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_2505_axis_0 = const()[name = string("op_2505_axis_0"), val = int32(1)]; tensor var_2505_cast_fp16_0, tensor var_2505_cast_fp16_1 = split(axis = var_2505_axis_0, split_sizes = var_2505_split_sizes_0, x = out_99_cast_fp16)[name = string("op_2505_cast_fp16")]; string query_states_33_pad_type_0 = const()[name = string("query_states_33_pad_type_0"), val = string("valid")]; tensor query_states_33_strides_0 = const()[name = string("query_states_33_strides_0"), val = tensor([1, 1])]; tensor query_states_33_pad_0 = const()[name = string("query_states_33_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_states_33_dilations_0 = const()[name = string("query_states_33_dilations_0"), val = tensor([1, 1])]; int32 query_states_33_groups_0 = const()[name = string("query_states_33_groups_0"), val = int32(1)]; tensor var_2527_to_fp16 = const()[name = string("op_2527_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(247550848)))]; tensor query_states_33_cast_fp16 = conv(dilations = query_states_33_dilations_0, groups = query_states_33_groups_0, pad = query_states_33_pad_0, pad_type = query_states_33_pad_type_0, strides = query_states_33_strides_0, weight = var_2527_to_fp16, x = var_2505_cast_fp16_0)[name = string("query_states_33_cast_fp16")]; string key_states_33_pad_type_0 = const()[name = string("key_states_33_pad_type_0"), val = string("valid")]; tensor key_states_33_strides_0 = const()[name = string("key_states_33_strides_0"), val = tensor([1, 1])]; tensor key_states_33_pad_0 = const()[name = string("key_states_33_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_states_33_dilations_0 = const()[name = string("key_states_33_dilations_0"), val = tensor([1, 1])]; int32 key_states_33_groups_0 = const()[name = string("key_states_33_groups_0"), val = int32(1)]; tensor var_2538_to_fp16 = const()[name = string("op_2538_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(249648064)))]; tensor key_states_33_cast_fp16 = conv(dilations = key_states_33_dilations_0, groups = key_states_33_groups_0, pad = key_states_33_pad_0, pad_type = key_states_33_pad_type_0, strides = key_states_33_strides_0, weight = var_2538_to_fp16, x = var_2505_cast_fp16_0)[name = string("key_states_33_cast_fp16")]; string value_states_33_pad_type_0 = const()[name = string("value_states_33_pad_type_0"), val = string("valid")]; tensor value_states_33_strides_0 = const()[name = string("value_states_33_strides_0"), val = tensor([1, 1])]; tensor value_states_33_pad_0 = const()[name = string("value_states_33_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_states_33_dilations_0 = const()[name = string("value_states_33_dilations_0"), val = tensor([1, 1])]; int32 value_states_33_groups_0 = const()[name = string("value_states_33_groups_0"), val = int32(1)]; tensor var_2549_to_fp16 = const()[name = string("op_2549_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(249910272)))]; tensor value_states_33_cast_fp16 = conv(dilations = value_states_33_dilations_0, groups = value_states_33_groups_0, pad = value_states_33_pad_0, pad_type = value_states_33_pad_type_0, strides = value_states_33_strides_0, weight = var_2549_to_fp16, x = var_2505_cast_fp16_0)[name = string("value_states_33_cast_fp16")]; tensor var_2557 = const()[name = string("op_2557"), val = tensor([1, 16, 64, 8])]; tensor embed_33_cast_fp16 = reshape(shape = var_2557, x = query_states_33_cast_fp16)[name = string("embed_33_cast_fp16")]; tensor var_2561 = const()[name = string("op_2561"), val = tensor([1, 2, 64, 8])]; tensor var_2562_cast_fp16 = reshape(shape = var_2561, x = key_states_33_cast_fp16)[name = string("op_2562_cast_fp16")]; tensor embed_35_perm_0 = const()[name = string("embed_35_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_2568 = const()[name = string("op_2568"), val = tensor([1, 2, 64, 8])]; tensor var_2569_cast_fp16 = reshape(shape = var_2568, x = value_states_33_cast_fp16)[name = string("op_2569_cast_fp16")]; tensor value_states_35_perm_0 = const()[name = string("value_states_35_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_2573_cast_fp16 = mul(x = embed_33_cast_fp16, y = cos_1_cast_fp16)[name = string("op_2573_cast_fp16")]; tensor var_2574_split_sizes_0 = const()[name = string("op_2574_split_sizes_0"), val = tensor([32, 32])]; int32 var_2574_axis_0 = const()[name = string("op_2574_axis_0"), val = int32(-2)]; tensor var_2574_cast_fp16_0, tensor var_2574_cast_fp16_1 = split(axis = var_2574_axis_0, split_sizes = var_2574_split_sizes_0, x = embed_33_cast_fp16)[name = string("op_2574_cast_fp16")]; fp16 const_87_promoted_to_fp16 = const()[name = string("const_87_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_2576_cast_fp16 = mul(x = var_2574_cast_fp16_1, y = const_87_promoted_to_fp16)[name = string("op_2576_cast_fp16")]; int32 var_2578 = const()[name = string("op_2578"), val = int32(-2)]; bool var_2579_interleave_0 = const()[name = string("op_2579_interleave_0"), val = bool(false)]; tensor var_2579_cast_fp16 = concat(axis = var_2578, interleave = var_2579_interleave_0, values = (var_2576_cast_fp16, var_2574_cast_fp16_0))[name = string("op_2579_cast_fp16")]; tensor var_2580_cast_fp16 = mul(x = var_2579_cast_fp16, y = sin_1_cast_fp16)[name = string("op_2580_cast_fp16")]; tensor query_states_35_cast_fp16 = add(x = var_2573_cast_fp16, y = var_2580_cast_fp16)[name = string("query_states_35_cast_fp16")]; tensor embed_35_cast_fp16 = transpose(perm = embed_35_perm_0, x = var_2562_cast_fp16)[name = string("transpose_47")]; tensor var_2583_cast_fp16 = mul(x = embed_35_cast_fp16, y = cos_cast_fp16)[name = string("op_2583_cast_fp16")]; tensor var_2584_split_sizes_0 = const()[name = string("op_2584_split_sizes_0"), val = tensor([32, 32])]; int32 var_2584_axis_0 = const()[name = string("op_2584_axis_0"), val = int32(-1)]; tensor var_2584_cast_fp16_0, tensor var_2584_cast_fp16_1 = split(axis = var_2584_axis_0, split_sizes = var_2584_split_sizes_0, x = embed_35_cast_fp16)[name = string("op_2584_cast_fp16")]; fp16 const_88_promoted_to_fp16 = const()[name = string("const_88_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_2586_cast_fp16 = mul(x = var_2584_cast_fp16_1, y = const_88_promoted_to_fp16)[name = string("op_2586_cast_fp16")]; int32 var_2588 = const()[name = string("op_2588"), val = int32(-1)]; bool var_2589_interleave_0 = const()[name = string("op_2589_interleave_0"), val = bool(false)]; tensor var_2589_cast_fp16 = concat(axis = var_2588, interleave = var_2589_interleave_0, values = (var_2586_cast_fp16, var_2584_cast_fp16_0))[name = string("op_2589_cast_fp16")]; tensor var_2590_cast_fp16 = mul(x = var_2589_cast_fp16, y = sin_cast_fp16)[name = string("op_2590_cast_fp16")]; tensor key_states_35_cast_fp16 = add(x = var_2583_cast_fp16, y = var_2590_cast_fp16)[name = string("key_states_35_cast_fp16")]; tensor expand_dims_81 = const()[name = string("expand_dims_81"), val = tensor([8])]; tensor expand_dims_82 = const()[name = string("expand_dims_82"), val = tensor([0])]; tensor expand_dims_84 = const()[name = string("expand_dims_84"), val = tensor([9])]; tensor concat_67_values3_0 = const()[name = string("concat_67_values3_0"), val = tensor([0])]; int32 concat_67_axis_0 = const()[name = string("concat_67_axis_0"), val = int32(0)]; bool concat_67_interleave_0 = const()[name = string("concat_67_interleave_0"), val = bool(false)]; tensor concat_67 = concat(axis = concat_67_axis_0, interleave = concat_67_interleave_0, values = (expand_dims_81, expand_dims_82, position_id, concat_67_values3_0))[name = string("concat_67")]; tensor concat_68_values1_0 = const()[name = string("concat_68_values1_0"), val = tensor([0])]; tensor concat_68_values3_0 = const()[name = string("concat_68_values3_0"), val = tensor([0])]; int32 concat_68_axis_0 = const()[name = string("concat_68_axis_0"), val = int32(0)]; bool concat_68_interleave_0 = const()[name = string("concat_68_interleave_0"), val = bool(false)]; tensor concat_68 = concat(axis = concat_68_axis_0, interleave = concat_68_interleave_0, values = (expand_dims_84, concat_68_values1_0, var_426, concat_68_values3_0))[name = string("concat_68")]; tensor key_cache_internal_tensor_assign_9_stride_0 = const()[name = string("key_cache_internal_tensor_assign_9_stride_0"), val = tensor([1, 1, 1, 1])]; tensor key_cache_internal_tensor_assign_9_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_9_begin_mask_0"), val = tensor([false, false, false, true])]; tensor key_cache_internal_tensor_assign_9_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_9_end_mask_0"), val = tensor([false, true, false, true])]; tensor key_cache_internal_tensor_assign_9_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_9_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor key_cache_internal_tensor_assign_9_cast_fp16 = slice_update(begin = concat_67, begin_mask = key_cache_internal_tensor_assign_9_begin_mask_0, end = concat_68, end_mask = key_cache_internal_tensor_assign_9_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_9_squeeze_mask_0, stride = key_cache_internal_tensor_assign_9_stride_0, update = key_states_35_cast_fp16, x = coreml_update_state_62)[name = string("key_cache_internal_tensor_assign_9_cast_fp16")]; write_state(data = key_cache_internal_tensor_assign_9_cast_fp16, input = key_cache)[name = string("coreml_update_state_230_write_state")]; tensor coreml_update_state_64 = read_state(input = key_cache)[name = string("coreml_update_state_230")]; tensor value_cache_internal_tensor_assign_9_stride_0 = const()[name = string("value_cache_internal_tensor_assign_9_stride_0"), val = tensor([1, 1, 1, 1])]; tensor value_cache_internal_tensor_assign_9_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_9_begin_mask_0"), val = tensor([false, false, false, true])]; tensor value_cache_internal_tensor_assign_9_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_9_end_mask_0"), val = tensor([false, true, false, true])]; tensor value_cache_internal_tensor_assign_9_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_9_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor value_states_35_cast_fp16 = transpose(perm = value_states_35_perm_0, x = var_2569_cast_fp16)[name = string("transpose_46")]; tensor value_cache_internal_tensor_assign_9_cast_fp16 = slice_update(begin = concat_67, begin_mask = value_cache_internal_tensor_assign_9_begin_mask_0, end = concat_68, end_mask = value_cache_internal_tensor_assign_9_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_9_squeeze_mask_0, stride = value_cache_internal_tensor_assign_9_stride_0, update = value_states_35_cast_fp16, x = coreml_update_state_63)[name = string("value_cache_internal_tensor_assign_9_cast_fp16")]; write_state(data = value_cache_internal_tensor_assign_9_cast_fp16, input = value_cache)[name = string("coreml_update_state_231_write_state")]; tensor coreml_update_state_65 = read_state(input = value_cache)[name = string("coreml_update_state_231")]; tensor var_2633_begin_0 = const()[name = string("op_2633_begin_0"), val = tensor([8, 0, 0, 0])]; tensor var_2633_end_0 = const()[name = string("op_2633_end_0"), val = tensor([9, 2, 2048, 64])]; tensor var_2633_end_mask_0 = const()[name = string("op_2633_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_2633_cast_fp16 = slice_by_index(begin = var_2633_begin_0, end = var_2633_end_0, end_mask = var_2633_end_mask_0, x = coreml_update_state_64)[name = string("op_2633_cast_fp16")]; tensor tile_16 = const()[name = string("tile_16"), val = tensor([1, 1])]; int32 var_2636_axis_0 = const()[name = string("op_2636_axis_0"), val = int32(1)]; tensor var_2636_cast_fp16_0, tensor var_2636_cast_fp16_1 = split(axis = var_2636_axis_0, split_sizes = tile_16, x = var_2633_cast_fp16)[name = string("op_2636_cast_fp16")]; tensor var_2643_begin_0 = const()[name = string("op_2643_begin_0"), val = tensor([8, 0, 0, 0])]; tensor var_2643_end_0 = const()[name = string("op_2643_end_0"), val = tensor([9, 2, 2048, 64])]; tensor var_2643_end_mask_0 = const()[name = string("op_2643_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_2643_cast_fp16 = slice_by_index(begin = var_2643_begin_0, end = var_2643_end_0, end_mask = var_2643_end_mask_0, x = coreml_update_state_65)[name = string("op_2643_cast_fp16")]; tensor tile_17 = const()[name = string("tile_17"), val = tensor([1, 1])]; int32 var_2646_axis_0 = const()[name = string("op_2646_axis_0"), val = int32(1)]; tensor var_2646_cast_fp16_0, tensor var_2646_cast_fp16_1 = split(axis = var_2646_axis_0, split_sizes = tile_17, x = var_2643_cast_fp16)[name = string("op_2646_cast_fp16")]; tensor var_2649_split_sizes_0 = const()[name = string("op_2649_split_sizes_0"), val = tensor([8, 8])]; int32 var_2649_axis_0 = const()[name = string("op_2649_axis_0"), val = int32(1)]; tensor var_2649_cast_fp16_0, tensor var_2649_cast_fp16_1 = split(axis = var_2649_axis_0, split_sizes = var_2649_split_sizes_0, x = query_states_35_cast_fp16)[name = string("op_2649_cast_fp16")]; bool attn_weights_129_transpose_x_0 = const()[name = string("attn_weights_129_transpose_x_0"), val = bool(false)]; bool attn_weights_129_transpose_y_0 = const()[name = string("attn_weights_129_transpose_y_0"), val = bool(false)]; tensor attn_weights_129_cast_fp16 = matmul(transpose_x = attn_weights_129_transpose_x_0, transpose_y = attn_weights_129_transpose_y_0, x = var_2636_cast_fp16_0, y = var_2649_cast_fp16_0)[name = string("attn_weights_129_cast_fp16")]; fp16 _inversed_attn_weights_131_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_131_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_131_cast_fp16 = mul(x = attn_weights_129_cast_fp16, y = _inversed_attn_weights_131_y_0_to_fp16)[name = string("_inversed_attn_weights_131_cast_fp16")]; tensor attn_weights_133_cast_fp16 = add(x = _inversed_attn_weights_131_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_133_cast_fp16")]; int32 var_2656 = const()[name = string("op_2656"), val = int32(2)]; tensor attn_weights_135_cast_fp16 = softmax(axis = var_2656, x = attn_weights_133_cast_fp16)[name = string("attn_weights_135_cast_fp16")]; bool var_2662_transpose_x_1 = const()[name = string("op_2662_transpose_x_1"), val = bool(true)]; bool var_2662_transpose_y_1 = const()[name = string("op_2662_transpose_y_1"), val = bool(false)]; tensor var_2662_cast_fp16 = matmul(transpose_x = var_2662_transpose_x_1, transpose_y = var_2662_transpose_y_1, x = attn_weights_135_cast_fp16, y = var_2646_cast_fp16_0)[name = string("op_2662_cast_fp16")]; bool attn_weights_137_transpose_x_0 = const()[name = string("attn_weights_137_transpose_x_0"), val = bool(false)]; bool attn_weights_137_transpose_y_0 = const()[name = string("attn_weights_137_transpose_y_0"), val = bool(false)]; tensor attn_weights_137_cast_fp16 = matmul(transpose_x = attn_weights_137_transpose_x_0, transpose_y = attn_weights_137_transpose_y_0, x = var_2636_cast_fp16_1, y = var_2649_cast_fp16_1)[name = string("attn_weights_137_cast_fp16")]; fp16 _inversed_attn_weights_139_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_139_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_139_cast_fp16 = mul(x = attn_weights_137_cast_fp16, y = _inversed_attn_weights_139_y_0_to_fp16)[name = string("_inversed_attn_weights_139_cast_fp16")]; tensor attn_weights_141_cast_fp16 = add(x = _inversed_attn_weights_139_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_141_cast_fp16")]; int32 var_2668 = const()[name = string("op_2668"), val = int32(2)]; tensor attn_weights_143_cast_fp16 = softmax(axis = var_2668, x = attn_weights_141_cast_fp16)[name = string("attn_weights_143_cast_fp16")]; bool attn_output_49_transpose_x_1 = const()[name = string("attn_output_49_transpose_x_1"), val = bool(true)]; bool attn_output_49_transpose_y_1 = const()[name = string("attn_output_49_transpose_y_1"), val = bool(false)]; tensor attn_output_49_cast_fp16 = matmul(transpose_x = attn_output_49_transpose_x_1, transpose_y = attn_output_49_transpose_y_1, x = attn_weights_143_cast_fp16, y = var_2646_cast_fp16_1)[name = string("attn_output_49_cast_fp16")]; int32 var_2676 = const()[name = string("op_2676"), val = int32(1)]; bool attn_output_51_interleave_0 = const()[name = string("attn_output_51_interleave_0"), val = bool(false)]; tensor attn_output_51_cast_fp16 = concat(axis = var_2676, interleave = attn_output_51_interleave_0, values = (var_2662_cast_fp16, attn_output_49_cast_fp16))[name = string("attn_output_51_cast_fp16")]; tensor var_2680_perm_0 = const()[name = string("op_2680_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_2685 = const()[name = string("op_2685"), val = tensor([1, 1024, 1, 8])]; tensor var_2680_cast_fp16 = transpose(perm = var_2680_perm_0, x = attn_output_51_cast_fp16)[name = string("transpose_45")]; tensor x_149_cast_fp16 = reshape(shape = var_2685, x = var_2680_cast_fp16)[name = string("x_149_cast_fp16")]; string hidden_states_51_pad_type_0 = const()[name = string("hidden_states_51_pad_type_0"), val = string("valid")]; tensor hidden_states_51_strides_0 = const()[name = string("hidden_states_51_strides_0"), val = tensor([1, 1])]; tensor hidden_states_51_pad_0 = const()[name = string("hidden_states_51_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_51_dilations_0 = const()[name = string("hidden_states_51_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_51_groups_0 = const()[name = string("hidden_states_51_groups_0"), val = int32(1)]; tensor var_2692_to_fp16 = const()[name = string("op_2692_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(250172480)))]; tensor hidden_states_51_cast_fp16 = conv(dilations = hidden_states_51_dilations_0, groups = hidden_states_51_groups_0, pad = hidden_states_51_pad_0, pad_type = hidden_states_51_pad_type_0, strides = hidden_states_51_strides_0, weight = var_2692_to_fp16, x = x_149_cast_fp16)[name = string("hidden_states_51_cast_fp16")]; tensor x_151_cast_fp16 = add(x = x_143_cast_fp16, y = hidden_states_51_cast_fp16)[name = string("x_151_cast_fp16")]; int32 var_2704 = const()[name = string("op_2704"), val = int32(1)]; fp16 const_93_promoted_to_fp16 = const()[name = string("const_93_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_2707_cast_fp16 = mul(x = x_151_cast_fp16, y = const_93_promoted_to_fp16)[name = string("op_2707_cast_fp16")]; bool x_153_interleave_0 = const()[name = string("x_153_interleave_0"), val = bool(false)]; tensor x_153_cast_fp16 = concat(axis = var_2704, interleave = x_153_interleave_0, values = (x_151_cast_fp16, var_2707_cast_fp16))[name = string("x_153_cast_fp16")]; tensor out_103_axes_0 = const()[name = string("out_103_axes_0"), val = tensor([1])]; fp16 var_2717_to_fp16 = const()[name = string("op_2717_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_103_cast_fp16 = layer_norm(axes = out_103_axes_0, epsilon = var_2717_to_fp16, x = x_153_cast_fp16)[name = string("out_103_cast_fp16")]; tensor layer_layers_8_post_attention_layernorm_weight_to_fp16 = const()[name = string("layer_layers_8_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(252269696)))]; tensor out_105_cast_fp16 = mul(x = out_103_cast_fp16, y = layer_layers_8_post_attention_layernorm_weight_to_fp16)[name = string("out_105_cast_fp16")]; tensor var_2723_split_sizes_0 = const()[name = string("op_2723_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_2723_axis_0 = const()[name = string("op_2723_axis_0"), val = int32(1)]; tensor var_2723_cast_fp16_0, tensor var_2723_cast_fp16_1 = split(axis = var_2723_axis_0, split_sizes = var_2723_split_sizes_0, x = out_105_cast_fp16)[name = string("op_2723_cast_fp16")]; string input_17_pad_type_0 = const()[name = string("input_17_pad_type_0"), val = string("valid")]; tensor input_17_strides_0 = const()[name = string("input_17_strides_0"), val = tensor([1, 1])]; tensor input_17_pad_0 = const()[name = string("input_17_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_17_dilations_0 = const()[name = string("input_17_dilations_0"), val = tensor([1, 1])]; int32 input_17_groups_0 = const()[name = string("input_17_groups_0"), val = int32(1)]; tensor var_2728_to_fp16 = const()[name = string("op_2728_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(252273856)))]; tensor input_17_cast_fp16 = conv(dilations = input_17_dilations_0, groups = input_17_groups_0, pad = input_17_pad_0, pad_type = input_17_pad_type_0, strides = input_17_strides_0, weight = var_2728_to_fp16, x = var_2723_cast_fp16_0)[name = string("input_17_cast_fp16")]; tensor var_2739_cast_fp16 = silu(x = input_17_cast_fp16)[name = string("op_2739_cast_fp16")]; string var_2744_pad_type_0 = const()[name = string("op_2744_pad_type_0"), val = string("valid")]; tensor var_2744_strides_0 = const()[name = string("op_2744_strides_0"), val = tensor([1, 1])]; tensor var_2744_pad_0 = const()[name = string("op_2744_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_2744_dilations_0 = const()[name = string("op_2744_dilations_0"), val = tensor([1, 1])]; int32 var_2744_groups_0 = const()[name = string("op_2744_groups_0"), val = int32(1)]; tensor var_2727_to_fp16 = const()[name = string("op_2727_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(260662528)))]; tensor var_2744_cast_fp16 = conv(dilations = var_2744_dilations_0, groups = var_2744_groups_0, pad = var_2744_pad_0, pad_type = var_2744_pad_type_0, strides = var_2744_strides_0, weight = var_2727_to_fp16, x = var_2723_cast_fp16_0)[name = string("op_2744_cast_fp16")]; tensor x_159_cast_fp16 = mul(x = var_2739_cast_fp16, y = var_2744_cast_fp16)[name = string("x_159_cast_fp16")]; string hidden_states_53_pad_type_0 = const()[name = string("hidden_states_53_pad_type_0"), val = string("valid")]; tensor hidden_states_53_strides_0 = const()[name = string("hidden_states_53_strides_0"), val = tensor([1, 1])]; tensor hidden_states_53_pad_0 = const()[name = string("hidden_states_53_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_53_dilations_0 = const()[name = string("hidden_states_53_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_53_groups_0 = const()[name = string("hidden_states_53_groups_0"), val = int32(1)]; tensor var_2726_to_fp16 = const()[name = string("op_2726_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(269051200)))]; tensor hidden_states_53_cast_fp16 = conv(dilations = hidden_states_53_dilations_0, groups = hidden_states_53_groups_0, pad = hidden_states_53_pad_0, pad_type = hidden_states_53_pad_type_0, strides = hidden_states_53_strides_0, weight = var_2726_to_fp16, x = x_159_cast_fp16)[name = string("hidden_states_53_cast_fp16")]; tensor x_161_cast_fp16 = add(x = x_151_cast_fp16, y = hidden_states_53_cast_fp16)[name = string("x_161_cast_fp16")]; int32 var_2757 = const()[name = string("op_2757"), val = int32(1)]; fp16 const_94_promoted_to_fp16 = const()[name = string("const_94_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_2760_cast_fp16 = mul(x = x_161_cast_fp16, y = const_94_promoted_to_fp16)[name = string("op_2760_cast_fp16")]; bool x_163_interleave_0 = const()[name = string("x_163_interleave_0"), val = bool(false)]; tensor x_163_cast_fp16 = concat(axis = var_2757, interleave = x_163_interleave_0, values = (x_161_cast_fp16, var_2760_cast_fp16))[name = string("x_163_cast_fp16")]; tensor out_109_axes_0 = const()[name = string("out_109_axes_0"), val = tensor([1])]; fp16 var_2770_to_fp16 = const()[name = string("op_2770_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_109_cast_fp16 = layer_norm(axes = out_109_axes_0, epsilon = var_2770_to_fp16, x = x_163_cast_fp16)[name = string("out_109_cast_fp16")]; tensor layer_layers_9_input_layernorm_weight_to_fp16 = const()[name = string("layer_layers_9_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(277439872)))]; tensor out_111_cast_fp16 = mul(x = out_109_cast_fp16, y = layer_layers_9_input_layernorm_weight_to_fp16)[name = string("out_111_cast_fp16")]; tensor var_2776_split_sizes_0 = const()[name = string("op_2776_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_2776_axis_0 = const()[name = string("op_2776_axis_0"), val = int32(1)]; tensor var_2776_cast_fp16_0, tensor var_2776_cast_fp16_1 = split(axis = var_2776_axis_0, split_sizes = var_2776_split_sizes_0, x = out_111_cast_fp16)[name = string("op_2776_cast_fp16")]; string query_states_37_pad_type_0 = const()[name = string("query_states_37_pad_type_0"), val = string("valid")]; tensor query_states_37_strides_0 = const()[name = string("query_states_37_strides_0"), val = tensor([1, 1])]; tensor query_states_37_pad_0 = const()[name = string("query_states_37_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_states_37_dilations_0 = const()[name = string("query_states_37_dilations_0"), val = tensor([1, 1])]; int32 query_states_37_groups_0 = const()[name = string("query_states_37_groups_0"), val = int32(1)]; tensor var_2798_to_fp16 = const()[name = string("op_2798_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(277444032)))]; tensor query_states_37_cast_fp16 = conv(dilations = query_states_37_dilations_0, groups = query_states_37_groups_0, pad = query_states_37_pad_0, pad_type = query_states_37_pad_type_0, strides = query_states_37_strides_0, weight = var_2798_to_fp16, x = var_2776_cast_fp16_0)[name = string("query_states_37_cast_fp16")]; string key_states_37_pad_type_0 = const()[name = string("key_states_37_pad_type_0"), val = string("valid")]; tensor key_states_37_strides_0 = const()[name = string("key_states_37_strides_0"), val = tensor([1, 1])]; tensor key_states_37_pad_0 = const()[name = string("key_states_37_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_states_37_dilations_0 = const()[name = string("key_states_37_dilations_0"), val = tensor([1, 1])]; int32 key_states_37_groups_0 = const()[name = string("key_states_37_groups_0"), val = int32(1)]; tensor var_2809_to_fp16 = const()[name = string("op_2809_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(279541248)))]; tensor key_states_37_cast_fp16 = conv(dilations = key_states_37_dilations_0, groups = key_states_37_groups_0, pad = key_states_37_pad_0, pad_type = key_states_37_pad_type_0, strides = key_states_37_strides_0, weight = var_2809_to_fp16, x = var_2776_cast_fp16_0)[name = string("key_states_37_cast_fp16")]; string value_states_37_pad_type_0 = const()[name = string("value_states_37_pad_type_0"), val = string("valid")]; tensor value_states_37_strides_0 = const()[name = string("value_states_37_strides_0"), val = tensor([1, 1])]; tensor value_states_37_pad_0 = const()[name = string("value_states_37_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_states_37_dilations_0 = const()[name = string("value_states_37_dilations_0"), val = tensor([1, 1])]; int32 value_states_37_groups_0 = const()[name = string("value_states_37_groups_0"), val = int32(1)]; tensor var_2820_to_fp16 = const()[name = string("op_2820_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(279803456)))]; tensor value_states_37_cast_fp16 = conv(dilations = value_states_37_dilations_0, groups = value_states_37_groups_0, pad = value_states_37_pad_0, pad_type = value_states_37_pad_type_0, strides = value_states_37_strides_0, weight = var_2820_to_fp16, x = var_2776_cast_fp16_0)[name = string("value_states_37_cast_fp16")]; tensor var_2828 = const()[name = string("op_2828"), val = tensor([1, 16, 64, 8])]; tensor embed_37_cast_fp16 = reshape(shape = var_2828, x = query_states_37_cast_fp16)[name = string("embed_37_cast_fp16")]; tensor var_2832 = const()[name = string("op_2832"), val = tensor([1, 2, 64, 8])]; tensor var_2833_cast_fp16 = reshape(shape = var_2832, x = key_states_37_cast_fp16)[name = string("op_2833_cast_fp16")]; tensor embed_39_perm_0 = const()[name = string("embed_39_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_2839 = const()[name = string("op_2839"), val = tensor([1, 2, 64, 8])]; tensor var_2840_cast_fp16 = reshape(shape = var_2839, x = value_states_37_cast_fp16)[name = string("op_2840_cast_fp16")]; tensor value_states_39_perm_0 = const()[name = string("value_states_39_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_2844_cast_fp16 = mul(x = embed_37_cast_fp16, y = cos_1_cast_fp16)[name = string("op_2844_cast_fp16")]; tensor var_2845_split_sizes_0 = const()[name = string("op_2845_split_sizes_0"), val = tensor([32, 32])]; int32 var_2845_axis_0 = const()[name = string("op_2845_axis_0"), val = int32(-2)]; tensor var_2845_cast_fp16_0, tensor var_2845_cast_fp16_1 = split(axis = var_2845_axis_0, split_sizes = var_2845_split_sizes_0, x = embed_37_cast_fp16)[name = string("op_2845_cast_fp16")]; fp16 const_97_promoted_to_fp16 = const()[name = string("const_97_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_2847_cast_fp16 = mul(x = var_2845_cast_fp16_1, y = const_97_promoted_to_fp16)[name = string("op_2847_cast_fp16")]; int32 var_2849 = const()[name = string("op_2849"), val = int32(-2)]; bool var_2850_interleave_0 = const()[name = string("op_2850_interleave_0"), val = bool(false)]; tensor var_2850_cast_fp16 = concat(axis = var_2849, interleave = var_2850_interleave_0, values = (var_2847_cast_fp16, var_2845_cast_fp16_0))[name = string("op_2850_cast_fp16")]; tensor var_2851_cast_fp16 = mul(x = var_2850_cast_fp16, y = sin_1_cast_fp16)[name = string("op_2851_cast_fp16")]; tensor query_states_39_cast_fp16 = add(x = var_2844_cast_fp16, y = var_2851_cast_fp16)[name = string("query_states_39_cast_fp16")]; tensor embed_39_cast_fp16 = transpose(perm = embed_39_perm_0, x = var_2833_cast_fp16)[name = string("transpose_44")]; tensor var_2854_cast_fp16 = mul(x = embed_39_cast_fp16, y = cos_cast_fp16)[name = string("op_2854_cast_fp16")]; tensor var_2855_split_sizes_0 = const()[name = string("op_2855_split_sizes_0"), val = tensor([32, 32])]; int32 var_2855_axis_0 = const()[name = string("op_2855_axis_0"), val = int32(-1)]; tensor var_2855_cast_fp16_0, tensor var_2855_cast_fp16_1 = split(axis = var_2855_axis_0, split_sizes = var_2855_split_sizes_0, x = embed_39_cast_fp16)[name = string("op_2855_cast_fp16")]; fp16 const_98_promoted_to_fp16 = const()[name = string("const_98_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_2857_cast_fp16 = mul(x = var_2855_cast_fp16_1, y = const_98_promoted_to_fp16)[name = string("op_2857_cast_fp16")]; int32 var_2859 = const()[name = string("op_2859"), val = int32(-1)]; bool var_2860_interleave_0 = const()[name = string("op_2860_interleave_0"), val = bool(false)]; tensor var_2860_cast_fp16 = concat(axis = var_2859, interleave = var_2860_interleave_0, values = (var_2857_cast_fp16, var_2855_cast_fp16_0))[name = string("op_2860_cast_fp16")]; tensor var_2861_cast_fp16 = mul(x = var_2860_cast_fp16, y = sin_cast_fp16)[name = string("op_2861_cast_fp16")]; tensor key_states_39_cast_fp16 = add(x = var_2854_cast_fp16, y = var_2861_cast_fp16)[name = string("key_states_39_cast_fp16")]; tensor expand_dims_91 = const()[name = string("expand_dims_91"), val = tensor([9])]; tensor expand_dims_92 = const()[name = string("expand_dims_92"), val = tensor([0])]; tensor expand_dims_94 = const()[name = string("expand_dims_94"), val = tensor([10])]; tensor concat_75_values3_0 = const()[name = string("concat_75_values3_0"), val = tensor([0])]; int32 concat_75_axis_0 = const()[name = string("concat_75_axis_0"), val = int32(0)]; bool concat_75_interleave_0 = const()[name = string("concat_75_interleave_0"), val = bool(false)]; tensor concat_75 = concat(axis = concat_75_axis_0, interleave = concat_75_interleave_0, values = (expand_dims_91, expand_dims_92, position_id, concat_75_values3_0))[name = string("concat_75")]; tensor concat_76_values1_0 = const()[name = string("concat_76_values1_0"), val = tensor([0])]; tensor concat_76_values3_0 = const()[name = string("concat_76_values3_0"), val = tensor([0])]; int32 concat_76_axis_0 = const()[name = string("concat_76_axis_0"), val = int32(0)]; bool concat_76_interleave_0 = const()[name = string("concat_76_interleave_0"), val = bool(false)]; tensor concat_76 = concat(axis = concat_76_axis_0, interleave = concat_76_interleave_0, values = (expand_dims_94, concat_76_values1_0, var_426, concat_76_values3_0))[name = string("concat_76")]; tensor key_cache_internal_tensor_assign_10_stride_0 = const()[name = string("key_cache_internal_tensor_assign_10_stride_0"), val = tensor([1, 1, 1, 1])]; tensor key_cache_internal_tensor_assign_10_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_10_begin_mask_0"), val = tensor([false, false, false, true])]; tensor key_cache_internal_tensor_assign_10_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_10_end_mask_0"), val = tensor([false, true, false, true])]; tensor key_cache_internal_tensor_assign_10_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_10_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor key_cache_internal_tensor_assign_10_cast_fp16 = slice_update(begin = concat_75, begin_mask = key_cache_internal_tensor_assign_10_begin_mask_0, end = concat_76, end_mask = key_cache_internal_tensor_assign_10_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_10_squeeze_mask_0, stride = key_cache_internal_tensor_assign_10_stride_0, update = key_states_39_cast_fp16, x = coreml_update_state_64)[name = string("key_cache_internal_tensor_assign_10_cast_fp16")]; write_state(data = key_cache_internal_tensor_assign_10_cast_fp16, input = key_cache)[name = string("coreml_update_state_232_write_state")]; tensor coreml_update_state_66 = read_state(input = key_cache)[name = string("coreml_update_state_232")]; tensor value_cache_internal_tensor_assign_10_stride_0 = const()[name = string("value_cache_internal_tensor_assign_10_stride_0"), val = tensor([1, 1, 1, 1])]; tensor value_cache_internal_tensor_assign_10_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_10_begin_mask_0"), val = tensor([false, false, false, true])]; tensor value_cache_internal_tensor_assign_10_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_10_end_mask_0"), val = tensor([false, true, false, true])]; tensor value_cache_internal_tensor_assign_10_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_10_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor value_states_39_cast_fp16 = transpose(perm = value_states_39_perm_0, x = var_2840_cast_fp16)[name = string("transpose_43")]; tensor value_cache_internal_tensor_assign_10_cast_fp16 = slice_update(begin = concat_75, begin_mask = value_cache_internal_tensor_assign_10_begin_mask_0, end = concat_76, end_mask = value_cache_internal_tensor_assign_10_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_10_squeeze_mask_0, stride = value_cache_internal_tensor_assign_10_stride_0, update = value_states_39_cast_fp16, x = coreml_update_state_65)[name = string("value_cache_internal_tensor_assign_10_cast_fp16")]; write_state(data = value_cache_internal_tensor_assign_10_cast_fp16, input = value_cache)[name = string("coreml_update_state_233_write_state")]; tensor coreml_update_state_67 = read_state(input = value_cache)[name = string("coreml_update_state_233")]; tensor var_2904_begin_0 = const()[name = string("op_2904_begin_0"), val = tensor([9, 0, 0, 0])]; tensor var_2904_end_0 = const()[name = string("op_2904_end_0"), val = tensor([10, 2, 2048, 64])]; tensor var_2904_end_mask_0 = const()[name = string("op_2904_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_2904_cast_fp16 = slice_by_index(begin = var_2904_begin_0, end = var_2904_end_0, end_mask = var_2904_end_mask_0, x = coreml_update_state_66)[name = string("op_2904_cast_fp16")]; tensor tile_18 = const()[name = string("tile_18"), val = tensor([1, 1])]; int32 var_2907_axis_0 = const()[name = string("op_2907_axis_0"), val = int32(1)]; tensor var_2907_cast_fp16_0, tensor var_2907_cast_fp16_1 = split(axis = var_2907_axis_0, split_sizes = tile_18, x = var_2904_cast_fp16)[name = string("op_2907_cast_fp16")]; tensor var_2914_begin_0 = const()[name = string("op_2914_begin_0"), val = tensor([9, 0, 0, 0])]; tensor var_2914_end_0 = const()[name = string("op_2914_end_0"), val = tensor([10, 2, 2048, 64])]; tensor var_2914_end_mask_0 = const()[name = string("op_2914_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_2914_cast_fp16 = slice_by_index(begin = var_2914_begin_0, end = var_2914_end_0, end_mask = var_2914_end_mask_0, x = coreml_update_state_67)[name = string("op_2914_cast_fp16")]; tensor tile_19 = const()[name = string("tile_19"), val = tensor([1, 1])]; int32 var_2917_axis_0 = const()[name = string("op_2917_axis_0"), val = int32(1)]; tensor var_2917_cast_fp16_0, tensor var_2917_cast_fp16_1 = split(axis = var_2917_axis_0, split_sizes = tile_19, x = var_2914_cast_fp16)[name = string("op_2917_cast_fp16")]; tensor var_2920_split_sizes_0 = const()[name = string("op_2920_split_sizes_0"), val = tensor([8, 8])]; int32 var_2920_axis_0 = const()[name = string("op_2920_axis_0"), val = int32(1)]; tensor var_2920_cast_fp16_0, tensor var_2920_cast_fp16_1 = split(axis = var_2920_axis_0, split_sizes = var_2920_split_sizes_0, x = query_states_39_cast_fp16)[name = string("op_2920_cast_fp16")]; bool attn_weights_145_transpose_x_0 = const()[name = string("attn_weights_145_transpose_x_0"), val = bool(false)]; bool attn_weights_145_transpose_y_0 = const()[name = string("attn_weights_145_transpose_y_0"), val = bool(false)]; tensor attn_weights_145_cast_fp16 = matmul(transpose_x = attn_weights_145_transpose_x_0, transpose_y = attn_weights_145_transpose_y_0, x = var_2907_cast_fp16_0, y = var_2920_cast_fp16_0)[name = string("attn_weights_145_cast_fp16")]; fp16 _inversed_attn_weights_147_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_147_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_147_cast_fp16 = mul(x = attn_weights_145_cast_fp16, y = _inversed_attn_weights_147_y_0_to_fp16)[name = string("_inversed_attn_weights_147_cast_fp16")]; tensor attn_weights_149_cast_fp16 = add(x = _inversed_attn_weights_147_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_149_cast_fp16")]; int32 var_2927 = const()[name = string("op_2927"), val = int32(2)]; tensor attn_weights_151_cast_fp16 = softmax(axis = var_2927, x = attn_weights_149_cast_fp16)[name = string("attn_weights_151_cast_fp16")]; bool var_2933_transpose_x_1 = const()[name = string("op_2933_transpose_x_1"), val = bool(true)]; bool var_2933_transpose_y_1 = const()[name = string("op_2933_transpose_y_1"), val = bool(false)]; tensor var_2933_cast_fp16 = matmul(transpose_x = var_2933_transpose_x_1, transpose_y = var_2933_transpose_y_1, x = attn_weights_151_cast_fp16, y = var_2917_cast_fp16_0)[name = string("op_2933_cast_fp16")]; bool attn_weights_153_transpose_x_0 = const()[name = string("attn_weights_153_transpose_x_0"), val = bool(false)]; bool attn_weights_153_transpose_y_0 = const()[name = string("attn_weights_153_transpose_y_0"), val = bool(false)]; tensor attn_weights_153_cast_fp16 = matmul(transpose_x = attn_weights_153_transpose_x_0, transpose_y = attn_weights_153_transpose_y_0, x = var_2907_cast_fp16_1, y = var_2920_cast_fp16_1)[name = string("attn_weights_153_cast_fp16")]; fp16 _inversed_attn_weights_155_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_155_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_155_cast_fp16 = mul(x = attn_weights_153_cast_fp16, y = _inversed_attn_weights_155_y_0_to_fp16)[name = string("_inversed_attn_weights_155_cast_fp16")]; tensor attn_weights_157_cast_fp16 = add(x = _inversed_attn_weights_155_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_157_cast_fp16")]; int32 var_2939 = const()[name = string("op_2939"), val = int32(2)]; tensor attn_weights_159_cast_fp16 = softmax(axis = var_2939, x = attn_weights_157_cast_fp16)[name = string("attn_weights_159_cast_fp16")]; bool attn_output_55_transpose_x_1 = const()[name = string("attn_output_55_transpose_x_1"), val = bool(true)]; bool attn_output_55_transpose_y_1 = const()[name = string("attn_output_55_transpose_y_1"), val = bool(false)]; tensor attn_output_55_cast_fp16 = matmul(transpose_x = attn_output_55_transpose_x_1, transpose_y = attn_output_55_transpose_y_1, x = attn_weights_159_cast_fp16, y = var_2917_cast_fp16_1)[name = string("attn_output_55_cast_fp16")]; int32 var_2947 = const()[name = string("op_2947"), val = int32(1)]; bool attn_output_57_interleave_0 = const()[name = string("attn_output_57_interleave_0"), val = bool(false)]; tensor attn_output_57_cast_fp16 = concat(axis = var_2947, interleave = attn_output_57_interleave_0, values = (var_2933_cast_fp16, attn_output_55_cast_fp16))[name = string("attn_output_57_cast_fp16")]; tensor var_2951_perm_0 = const()[name = string("op_2951_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_2956 = const()[name = string("op_2956"), val = tensor([1, 1024, 1, 8])]; tensor var_2951_cast_fp16 = transpose(perm = var_2951_perm_0, x = attn_output_57_cast_fp16)[name = string("transpose_42")]; tensor x_167_cast_fp16 = reshape(shape = var_2956, x = var_2951_cast_fp16)[name = string("x_167_cast_fp16")]; string hidden_states_57_pad_type_0 = const()[name = string("hidden_states_57_pad_type_0"), val = string("valid")]; tensor hidden_states_57_strides_0 = const()[name = string("hidden_states_57_strides_0"), val = tensor([1, 1])]; tensor hidden_states_57_pad_0 = const()[name = string("hidden_states_57_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_57_dilations_0 = const()[name = string("hidden_states_57_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_57_groups_0 = const()[name = string("hidden_states_57_groups_0"), val = int32(1)]; tensor var_2963_to_fp16 = const()[name = string("op_2963_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(280065664)))]; tensor hidden_states_57_cast_fp16 = conv(dilations = hidden_states_57_dilations_0, groups = hidden_states_57_groups_0, pad = hidden_states_57_pad_0, pad_type = hidden_states_57_pad_type_0, strides = hidden_states_57_strides_0, weight = var_2963_to_fp16, x = x_167_cast_fp16)[name = string("hidden_states_57_cast_fp16")]; tensor x_169_cast_fp16 = add(x = x_161_cast_fp16, y = hidden_states_57_cast_fp16)[name = string("x_169_cast_fp16")]; int32 var_2975 = const()[name = string("op_2975"), val = int32(1)]; fp16 const_103_promoted_to_fp16 = const()[name = string("const_103_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_2978_cast_fp16 = mul(x = x_169_cast_fp16, y = const_103_promoted_to_fp16)[name = string("op_2978_cast_fp16")]; bool x_171_interleave_0 = const()[name = string("x_171_interleave_0"), val = bool(false)]; tensor x_171_cast_fp16 = concat(axis = var_2975, interleave = x_171_interleave_0, values = (x_169_cast_fp16, var_2978_cast_fp16))[name = string("x_171_cast_fp16")]; tensor out_115_axes_0 = const()[name = string("out_115_axes_0"), val = tensor([1])]; fp16 var_2988_to_fp16 = const()[name = string("op_2988_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_115_cast_fp16 = layer_norm(axes = out_115_axes_0, epsilon = var_2988_to_fp16, x = x_171_cast_fp16)[name = string("out_115_cast_fp16")]; tensor layer_layers_9_post_attention_layernorm_weight_to_fp16 = const()[name = string("layer_layers_9_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(282162880)))]; tensor out_117_cast_fp16 = mul(x = out_115_cast_fp16, y = layer_layers_9_post_attention_layernorm_weight_to_fp16)[name = string("out_117_cast_fp16")]; tensor var_2994_split_sizes_0 = const()[name = string("op_2994_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_2994_axis_0 = const()[name = string("op_2994_axis_0"), val = int32(1)]; tensor var_2994_cast_fp16_0, tensor var_2994_cast_fp16_1 = split(axis = var_2994_axis_0, split_sizes = var_2994_split_sizes_0, x = out_117_cast_fp16)[name = string("op_2994_cast_fp16")]; string input_19_pad_type_0 = const()[name = string("input_19_pad_type_0"), val = string("valid")]; tensor input_19_strides_0 = const()[name = string("input_19_strides_0"), val = tensor([1, 1])]; tensor input_19_pad_0 = const()[name = string("input_19_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_19_dilations_0 = const()[name = string("input_19_dilations_0"), val = tensor([1, 1])]; int32 input_19_groups_0 = const()[name = string("input_19_groups_0"), val = int32(1)]; tensor var_2999_to_fp16 = const()[name = string("op_2999_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(282167040)))]; tensor input_19_cast_fp16 = conv(dilations = input_19_dilations_0, groups = input_19_groups_0, pad = input_19_pad_0, pad_type = input_19_pad_type_0, strides = input_19_strides_0, weight = var_2999_to_fp16, x = var_2994_cast_fp16_0)[name = string("input_19_cast_fp16")]; tensor var_3010_cast_fp16 = silu(x = input_19_cast_fp16)[name = string("op_3010_cast_fp16")]; string var_3015_pad_type_0 = const()[name = string("op_3015_pad_type_0"), val = string("valid")]; tensor var_3015_strides_0 = const()[name = string("op_3015_strides_0"), val = tensor([1, 1])]; tensor var_3015_pad_0 = const()[name = string("op_3015_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_3015_dilations_0 = const()[name = string("op_3015_dilations_0"), val = tensor([1, 1])]; int32 var_3015_groups_0 = const()[name = string("op_3015_groups_0"), val = int32(1)]; tensor var_2998_to_fp16 = const()[name = string("op_2998_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(290555712)))]; tensor var_3015_cast_fp16 = conv(dilations = var_3015_dilations_0, groups = var_3015_groups_0, pad = var_3015_pad_0, pad_type = var_3015_pad_type_0, strides = var_3015_strides_0, weight = var_2998_to_fp16, x = var_2994_cast_fp16_0)[name = string("op_3015_cast_fp16")]; tensor x_177_cast_fp16 = mul(x = var_3010_cast_fp16, y = var_3015_cast_fp16)[name = string("x_177_cast_fp16")]; string hidden_states_59_pad_type_0 = const()[name = string("hidden_states_59_pad_type_0"), val = string("valid")]; tensor hidden_states_59_strides_0 = const()[name = string("hidden_states_59_strides_0"), val = tensor([1, 1])]; tensor hidden_states_59_pad_0 = const()[name = string("hidden_states_59_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_59_dilations_0 = const()[name = string("hidden_states_59_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_59_groups_0 = const()[name = string("hidden_states_59_groups_0"), val = int32(1)]; tensor var_2997_to_fp16 = const()[name = string("op_2997_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(298944384)))]; tensor hidden_states_59_cast_fp16 = conv(dilations = hidden_states_59_dilations_0, groups = hidden_states_59_groups_0, pad = hidden_states_59_pad_0, pad_type = hidden_states_59_pad_type_0, strides = hidden_states_59_strides_0, weight = var_2997_to_fp16, x = x_177_cast_fp16)[name = string("hidden_states_59_cast_fp16")]; tensor x_179_cast_fp16 = add(x = x_169_cast_fp16, y = hidden_states_59_cast_fp16)[name = string("x_179_cast_fp16")]; int32 var_3028 = const()[name = string("op_3028"), val = int32(1)]; fp16 const_104_promoted_to_fp16 = const()[name = string("const_104_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_3031_cast_fp16 = mul(x = x_179_cast_fp16, y = const_104_promoted_to_fp16)[name = string("op_3031_cast_fp16")]; bool x_181_interleave_0 = const()[name = string("x_181_interleave_0"), val = bool(false)]; tensor x_181_cast_fp16 = concat(axis = var_3028, interleave = x_181_interleave_0, values = (x_179_cast_fp16, var_3031_cast_fp16))[name = string("x_181_cast_fp16")]; tensor out_121_axes_0 = const()[name = string("out_121_axes_0"), val = tensor([1])]; fp16 var_3041_to_fp16 = const()[name = string("op_3041_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_121_cast_fp16 = layer_norm(axes = out_121_axes_0, epsilon = var_3041_to_fp16, x = x_181_cast_fp16)[name = string("out_121_cast_fp16")]; tensor layer_layers_10_input_layernorm_weight_to_fp16 = const()[name = string("layer_layers_10_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(307333056)))]; tensor out_123_cast_fp16 = mul(x = out_121_cast_fp16, y = layer_layers_10_input_layernorm_weight_to_fp16)[name = string("out_123_cast_fp16")]; tensor var_3047_split_sizes_0 = const()[name = string("op_3047_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_3047_axis_0 = const()[name = string("op_3047_axis_0"), val = int32(1)]; tensor var_3047_cast_fp16_0, tensor var_3047_cast_fp16_1 = split(axis = var_3047_axis_0, split_sizes = var_3047_split_sizes_0, x = out_123_cast_fp16)[name = string("op_3047_cast_fp16")]; string query_states_41_pad_type_0 = const()[name = string("query_states_41_pad_type_0"), val = string("valid")]; tensor query_states_41_strides_0 = const()[name = string("query_states_41_strides_0"), val = tensor([1, 1])]; tensor query_states_41_pad_0 = const()[name = string("query_states_41_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_states_41_dilations_0 = const()[name = string("query_states_41_dilations_0"), val = tensor([1, 1])]; int32 query_states_41_groups_0 = const()[name = string("query_states_41_groups_0"), val = int32(1)]; tensor var_3069_to_fp16 = const()[name = string("op_3069_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(307337216)))]; tensor query_states_41_cast_fp16 = conv(dilations = query_states_41_dilations_0, groups = query_states_41_groups_0, pad = query_states_41_pad_0, pad_type = query_states_41_pad_type_0, strides = query_states_41_strides_0, weight = var_3069_to_fp16, x = var_3047_cast_fp16_0)[name = string("query_states_41_cast_fp16")]; string key_states_41_pad_type_0 = const()[name = string("key_states_41_pad_type_0"), val = string("valid")]; tensor key_states_41_strides_0 = const()[name = string("key_states_41_strides_0"), val = tensor([1, 1])]; tensor key_states_41_pad_0 = const()[name = string("key_states_41_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_states_41_dilations_0 = const()[name = string("key_states_41_dilations_0"), val = tensor([1, 1])]; int32 key_states_41_groups_0 = const()[name = string("key_states_41_groups_0"), val = int32(1)]; tensor var_3080_to_fp16 = const()[name = string("op_3080_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(309434432)))]; tensor key_states_41_cast_fp16 = conv(dilations = key_states_41_dilations_0, groups = key_states_41_groups_0, pad = key_states_41_pad_0, pad_type = key_states_41_pad_type_0, strides = key_states_41_strides_0, weight = var_3080_to_fp16, x = var_3047_cast_fp16_0)[name = string("key_states_41_cast_fp16")]; string value_states_41_pad_type_0 = const()[name = string("value_states_41_pad_type_0"), val = string("valid")]; tensor value_states_41_strides_0 = const()[name = string("value_states_41_strides_0"), val = tensor([1, 1])]; tensor value_states_41_pad_0 = const()[name = string("value_states_41_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_states_41_dilations_0 = const()[name = string("value_states_41_dilations_0"), val = tensor([1, 1])]; int32 value_states_41_groups_0 = const()[name = string("value_states_41_groups_0"), val = int32(1)]; tensor var_3091_to_fp16 = const()[name = string("op_3091_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(309696640)))]; tensor value_states_41_cast_fp16 = conv(dilations = value_states_41_dilations_0, groups = value_states_41_groups_0, pad = value_states_41_pad_0, pad_type = value_states_41_pad_type_0, strides = value_states_41_strides_0, weight = var_3091_to_fp16, x = var_3047_cast_fp16_0)[name = string("value_states_41_cast_fp16")]; tensor var_3099 = const()[name = string("op_3099"), val = tensor([1, 16, 64, 8])]; tensor embed_41_cast_fp16 = reshape(shape = var_3099, x = query_states_41_cast_fp16)[name = string("embed_41_cast_fp16")]; tensor var_3103 = const()[name = string("op_3103"), val = tensor([1, 2, 64, 8])]; tensor var_3104_cast_fp16 = reshape(shape = var_3103, x = key_states_41_cast_fp16)[name = string("op_3104_cast_fp16")]; tensor embed_43_perm_0 = const()[name = string("embed_43_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_3110 = const()[name = string("op_3110"), val = tensor([1, 2, 64, 8])]; tensor var_3111_cast_fp16 = reshape(shape = var_3110, x = value_states_41_cast_fp16)[name = string("op_3111_cast_fp16")]; tensor value_states_43_perm_0 = const()[name = string("value_states_43_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_3115_cast_fp16 = mul(x = embed_41_cast_fp16, y = cos_1_cast_fp16)[name = string("op_3115_cast_fp16")]; tensor var_3116_split_sizes_0 = const()[name = string("op_3116_split_sizes_0"), val = tensor([32, 32])]; int32 var_3116_axis_0 = const()[name = string("op_3116_axis_0"), val = int32(-2)]; tensor var_3116_cast_fp16_0, tensor var_3116_cast_fp16_1 = split(axis = var_3116_axis_0, split_sizes = var_3116_split_sizes_0, x = embed_41_cast_fp16)[name = string("op_3116_cast_fp16")]; fp16 const_107_promoted_to_fp16 = const()[name = string("const_107_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_3118_cast_fp16 = mul(x = var_3116_cast_fp16_1, y = const_107_promoted_to_fp16)[name = string("op_3118_cast_fp16")]; int32 var_3120 = const()[name = string("op_3120"), val = int32(-2)]; bool var_3121_interleave_0 = const()[name = string("op_3121_interleave_0"), val = bool(false)]; tensor var_3121_cast_fp16 = concat(axis = var_3120, interleave = var_3121_interleave_0, values = (var_3118_cast_fp16, var_3116_cast_fp16_0))[name = string("op_3121_cast_fp16")]; tensor var_3122_cast_fp16 = mul(x = var_3121_cast_fp16, y = sin_1_cast_fp16)[name = string("op_3122_cast_fp16")]; tensor query_states_43_cast_fp16 = add(x = var_3115_cast_fp16, y = var_3122_cast_fp16)[name = string("query_states_43_cast_fp16")]; tensor embed_43_cast_fp16 = transpose(perm = embed_43_perm_0, x = var_3104_cast_fp16)[name = string("transpose_41")]; tensor var_3125_cast_fp16 = mul(x = embed_43_cast_fp16, y = cos_cast_fp16)[name = string("op_3125_cast_fp16")]; tensor var_3126_split_sizes_0 = const()[name = string("op_3126_split_sizes_0"), val = tensor([32, 32])]; int32 var_3126_axis_0 = const()[name = string("op_3126_axis_0"), val = int32(-1)]; tensor var_3126_cast_fp16_0, tensor var_3126_cast_fp16_1 = split(axis = var_3126_axis_0, split_sizes = var_3126_split_sizes_0, x = embed_43_cast_fp16)[name = string("op_3126_cast_fp16")]; fp16 const_108_promoted_to_fp16 = const()[name = string("const_108_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_3128_cast_fp16 = mul(x = var_3126_cast_fp16_1, y = const_108_promoted_to_fp16)[name = string("op_3128_cast_fp16")]; int32 var_3130 = const()[name = string("op_3130"), val = int32(-1)]; bool var_3131_interleave_0 = const()[name = string("op_3131_interleave_0"), val = bool(false)]; tensor var_3131_cast_fp16 = concat(axis = var_3130, interleave = var_3131_interleave_0, values = (var_3128_cast_fp16, var_3126_cast_fp16_0))[name = string("op_3131_cast_fp16")]; tensor var_3132_cast_fp16 = mul(x = var_3131_cast_fp16, y = sin_cast_fp16)[name = string("op_3132_cast_fp16")]; tensor key_states_43_cast_fp16 = add(x = var_3125_cast_fp16, y = var_3132_cast_fp16)[name = string("key_states_43_cast_fp16")]; tensor expand_dims_101 = const()[name = string("expand_dims_101"), val = tensor([10])]; tensor expand_dims_102 = const()[name = string("expand_dims_102"), val = tensor([0])]; tensor expand_dims_104 = const()[name = string("expand_dims_104"), val = tensor([11])]; tensor concat_83_values3_0 = const()[name = string("concat_83_values3_0"), val = tensor([0])]; int32 concat_83_axis_0 = const()[name = string("concat_83_axis_0"), val = int32(0)]; bool concat_83_interleave_0 = const()[name = string("concat_83_interleave_0"), val = bool(false)]; tensor concat_83 = concat(axis = concat_83_axis_0, interleave = concat_83_interleave_0, values = (expand_dims_101, expand_dims_102, position_id, concat_83_values3_0))[name = string("concat_83")]; tensor concat_84_values1_0 = const()[name = string("concat_84_values1_0"), val = tensor([0])]; tensor concat_84_values3_0 = const()[name = string("concat_84_values3_0"), val = tensor([0])]; int32 concat_84_axis_0 = const()[name = string("concat_84_axis_0"), val = int32(0)]; bool concat_84_interleave_0 = const()[name = string("concat_84_interleave_0"), val = bool(false)]; tensor concat_84 = concat(axis = concat_84_axis_0, interleave = concat_84_interleave_0, values = (expand_dims_104, concat_84_values1_0, var_426, concat_84_values3_0))[name = string("concat_84")]; tensor key_cache_internal_tensor_assign_11_stride_0 = const()[name = string("key_cache_internal_tensor_assign_11_stride_0"), val = tensor([1, 1, 1, 1])]; tensor key_cache_internal_tensor_assign_11_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_11_begin_mask_0"), val = tensor([false, false, false, true])]; tensor key_cache_internal_tensor_assign_11_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_11_end_mask_0"), val = tensor([false, true, false, true])]; tensor key_cache_internal_tensor_assign_11_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_11_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor key_cache_internal_tensor_assign_11_cast_fp16 = slice_update(begin = concat_83, begin_mask = key_cache_internal_tensor_assign_11_begin_mask_0, end = concat_84, end_mask = key_cache_internal_tensor_assign_11_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_11_squeeze_mask_0, stride = key_cache_internal_tensor_assign_11_stride_0, update = key_states_43_cast_fp16, x = coreml_update_state_66)[name = string("key_cache_internal_tensor_assign_11_cast_fp16")]; write_state(data = key_cache_internal_tensor_assign_11_cast_fp16, input = key_cache)[name = string("coreml_update_state_234_write_state")]; tensor coreml_update_state_68 = read_state(input = key_cache)[name = string("coreml_update_state_234")]; tensor value_cache_internal_tensor_assign_11_stride_0 = const()[name = string("value_cache_internal_tensor_assign_11_stride_0"), val = tensor([1, 1, 1, 1])]; tensor value_cache_internal_tensor_assign_11_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_11_begin_mask_0"), val = tensor([false, false, false, true])]; tensor value_cache_internal_tensor_assign_11_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_11_end_mask_0"), val = tensor([false, true, false, true])]; tensor value_cache_internal_tensor_assign_11_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_11_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor value_states_43_cast_fp16 = transpose(perm = value_states_43_perm_0, x = var_3111_cast_fp16)[name = string("transpose_40")]; tensor value_cache_internal_tensor_assign_11_cast_fp16 = slice_update(begin = concat_83, begin_mask = value_cache_internal_tensor_assign_11_begin_mask_0, end = concat_84, end_mask = value_cache_internal_tensor_assign_11_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_11_squeeze_mask_0, stride = value_cache_internal_tensor_assign_11_stride_0, update = value_states_43_cast_fp16, x = coreml_update_state_67)[name = string("value_cache_internal_tensor_assign_11_cast_fp16")]; write_state(data = value_cache_internal_tensor_assign_11_cast_fp16, input = value_cache)[name = string("coreml_update_state_235_write_state")]; tensor coreml_update_state_69 = read_state(input = value_cache)[name = string("coreml_update_state_235")]; tensor var_3175_begin_0 = const()[name = string("op_3175_begin_0"), val = tensor([10, 0, 0, 0])]; tensor var_3175_end_0 = const()[name = string("op_3175_end_0"), val = tensor([11, 2, 2048, 64])]; tensor var_3175_end_mask_0 = const()[name = string("op_3175_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_3175_cast_fp16 = slice_by_index(begin = var_3175_begin_0, end = var_3175_end_0, end_mask = var_3175_end_mask_0, x = coreml_update_state_68)[name = string("op_3175_cast_fp16")]; tensor tile_20 = const()[name = string("tile_20"), val = tensor([1, 1])]; int32 var_3178_axis_0 = const()[name = string("op_3178_axis_0"), val = int32(1)]; tensor var_3178_cast_fp16_0, tensor var_3178_cast_fp16_1 = split(axis = var_3178_axis_0, split_sizes = tile_20, x = var_3175_cast_fp16)[name = string("op_3178_cast_fp16")]; tensor var_3185_begin_0 = const()[name = string("op_3185_begin_0"), val = tensor([10, 0, 0, 0])]; tensor var_3185_end_0 = const()[name = string("op_3185_end_0"), val = tensor([11, 2, 2048, 64])]; tensor var_3185_end_mask_0 = const()[name = string("op_3185_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_3185_cast_fp16 = slice_by_index(begin = var_3185_begin_0, end = var_3185_end_0, end_mask = var_3185_end_mask_0, x = coreml_update_state_69)[name = string("op_3185_cast_fp16")]; tensor tile_21 = const()[name = string("tile_21"), val = tensor([1, 1])]; int32 var_3188_axis_0 = const()[name = string("op_3188_axis_0"), val = int32(1)]; tensor var_3188_cast_fp16_0, tensor var_3188_cast_fp16_1 = split(axis = var_3188_axis_0, split_sizes = tile_21, x = var_3185_cast_fp16)[name = string("op_3188_cast_fp16")]; tensor var_3191_split_sizes_0 = const()[name = string("op_3191_split_sizes_0"), val = tensor([8, 8])]; int32 var_3191_axis_0 = const()[name = string("op_3191_axis_0"), val = int32(1)]; tensor var_3191_cast_fp16_0, tensor var_3191_cast_fp16_1 = split(axis = var_3191_axis_0, split_sizes = var_3191_split_sizes_0, x = query_states_43_cast_fp16)[name = string("op_3191_cast_fp16")]; bool attn_weights_161_transpose_x_0 = const()[name = string("attn_weights_161_transpose_x_0"), val = bool(false)]; bool attn_weights_161_transpose_y_0 = const()[name = string("attn_weights_161_transpose_y_0"), val = bool(false)]; tensor attn_weights_161_cast_fp16 = matmul(transpose_x = attn_weights_161_transpose_x_0, transpose_y = attn_weights_161_transpose_y_0, x = var_3178_cast_fp16_0, y = var_3191_cast_fp16_0)[name = string("attn_weights_161_cast_fp16")]; fp16 _inversed_attn_weights_163_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_163_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_163_cast_fp16 = mul(x = attn_weights_161_cast_fp16, y = _inversed_attn_weights_163_y_0_to_fp16)[name = string("_inversed_attn_weights_163_cast_fp16")]; tensor attn_weights_165_cast_fp16 = add(x = _inversed_attn_weights_163_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_165_cast_fp16")]; int32 var_3198 = const()[name = string("op_3198"), val = int32(2)]; tensor attn_weights_167_cast_fp16 = softmax(axis = var_3198, x = attn_weights_165_cast_fp16)[name = string("attn_weights_167_cast_fp16")]; bool var_3204_transpose_x_1 = const()[name = string("op_3204_transpose_x_1"), val = bool(true)]; bool var_3204_transpose_y_1 = const()[name = string("op_3204_transpose_y_1"), val = bool(false)]; tensor var_3204_cast_fp16 = matmul(transpose_x = var_3204_transpose_x_1, transpose_y = var_3204_transpose_y_1, x = attn_weights_167_cast_fp16, y = var_3188_cast_fp16_0)[name = string("op_3204_cast_fp16")]; bool attn_weights_169_transpose_x_0 = const()[name = string("attn_weights_169_transpose_x_0"), val = bool(false)]; bool attn_weights_169_transpose_y_0 = const()[name = string("attn_weights_169_transpose_y_0"), val = bool(false)]; tensor attn_weights_169_cast_fp16 = matmul(transpose_x = attn_weights_169_transpose_x_0, transpose_y = attn_weights_169_transpose_y_0, x = var_3178_cast_fp16_1, y = var_3191_cast_fp16_1)[name = string("attn_weights_169_cast_fp16")]; fp16 _inversed_attn_weights_171_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_171_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_171_cast_fp16 = mul(x = attn_weights_169_cast_fp16, y = _inversed_attn_weights_171_y_0_to_fp16)[name = string("_inversed_attn_weights_171_cast_fp16")]; tensor attn_weights_173_cast_fp16 = add(x = _inversed_attn_weights_171_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_173_cast_fp16")]; int32 var_3210 = const()[name = string("op_3210"), val = int32(2)]; tensor attn_weights_175_cast_fp16 = softmax(axis = var_3210, x = attn_weights_173_cast_fp16)[name = string("attn_weights_175_cast_fp16")]; bool attn_output_61_transpose_x_1 = const()[name = string("attn_output_61_transpose_x_1"), val = bool(true)]; bool attn_output_61_transpose_y_1 = const()[name = string("attn_output_61_transpose_y_1"), val = bool(false)]; tensor attn_output_61_cast_fp16 = matmul(transpose_x = attn_output_61_transpose_x_1, transpose_y = attn_output_61_transpose_y_1, x = attn_weights_175_cast_fp16, y = var_3188_cast_fp16_1)[name = string("attn_output_61_cast_fp16")]; int32 var_3218 = const()[name = string("op_3218"), val = int32(1)]; bool attn_output_63_interleave_0 = const()[name = string("attn_output_63_interleave_0"), val = bool(false)]; tensor attn_output_63_cast_fp16 = concat(axis = var_3218, interleave = attn_output_63_interleave_0, values = (var_3204_cast_fp16, attn_output_61_cast_fp16))[name = string("attn_output_63_cast_fp16")]; tensor var_3222_perm_0 = const()[name = string("op_3222_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_3227 = const()[name = string("op_3227"), val = tensor([1, 1024, 1, 8])]; tensor var_3222_cast_fp16 = transpose(perm = var_3222_perm_0, x = attn_output_63_cast_fp16)[name = string("transpose_39")]; tensor x_185_cast_fp16 = reshape(shape = var_3227, x = var_3222_cast_fp16)[name = string("x_185_cast_fp16")]; string hidden_states_63_pad_type_0 = const()[name = string("hidden_states_63_pad_type_0"), val = string("valid")]; tensor hidden_states_63_strides_0 = const()[name = string("hidden_states_63_strides_0"), val = tensor([1, 1])]; tensor hidden_states_63_pad_0 = const()[name = string("hidden_states_63_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_63_dilations_0 = const()[name = string("hidden_states_63_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_63_groups_0 = const()[name = string("hidden_states_63_groups_0"), val = int32(1)]; tensor var_3234_to_fp16 = const()[name = string("op_3234_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(309958848)))]; tensor hidden_states_63_cast_fp16 = conv(dilations = hidden_states_63_dilations_0, groups = hidden_states_63_groups_0, pad = hidden_states_63_pad_0, pad_type = hidden_states_63_pad_type_0, strides = hidden_states_63_strides_0, weight = var_3234_to_fp16, x = x_185_cast_fp16)[name = string("hidden_states_63_cast_fp16")]; tensor x_187_cast_fp16 = add(x = x_179_cast_fp16, y = hidden_states_63_cast_fp16)[name = string("x_187_cast_fp16")]; int32 var_3246 = const()[name = string("op_3246"), val = int32(1)]; fp16 const_113_promoted_to_fp16 = const()[name = string("const_113_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_3249_cast_fp16 = mul(x = x_187_cast_fp16, y = const_113_promoted_to_fp16)[name = string("op_3249_cast_fp16")]; bool x_189_interleave_0 = const()[name = string("x_189_interleave_0"), val = bool(false)]; tensor x_189_cast_fp16 = concat(axis = var_3246, interleave = x_189_interleave_0, values = (x_187_cast_fp16, var_3249_cast_fp16))[name = string("x_189_cast_fp16")]; tensor out_127_axes_0 = const()[name = string("out_127_axes_0"), val = tensor([1])]; fp16 var_3259_to_fp16 = const()[name = string("op_3259_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_127_cast_fp16 = layer_norm(axes = out_127_axes_0, epsilon = var_3259_to_fp16, x = x_189_cast_fp16)[name = string("out_127_cast_fp16")]; tensor layer_layers_10_post_attention_layernorm_weight_to_fp16 = const()[name = string("layer_layers_10_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(312056064)))]; tensor out_129_cast_fp16 = mul(x = out_127_cast_fp16, y = layer_layers_10_post_attention_layernorm_weight_to_fp16)[name = string("out_129_cast_fp16")]; tensor var_3265_split_sizes_0 = const()[name = string("op_3265_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_3265_axis_0 = const()[name = string("op_3265_axis_0"), val = int32(1)]; tensor var_3265_cast_fp16_0, tensor var_3265_cast_fp16_1 = split(axis = var_3265_axis_0, split_sizes = var_3265_split_sizes_0, x = out_129_cast_fp16)[name = string("op_3265_cast_fp16")]; string input_21_pad_type_0 = const()[name = string("input_21_pad_type_0"), val = string("valid")]; tensor input_21_strides_0 = const()[name = string("input_21_strides_0"), val = tensor([1, 1])]; tensor input_21_pad_0 = const()[name = string("input_21_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_21_dilations_0 = const()[name = string("input_21_dilations_0"), val = tensor([1, 1])]; int32 input_21_groups_0 = const()[name = string("input_21_groups_0"), val = int32(1)]; tensor var_3270_to_fp16 = const()[name = string("op_3270_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(312060224)))]; tensor input_21_cast_fp16 = conv(dilations = input_21_dilations_0, groups = input_21_groups_0, pad = input_21_pad_0, pad_type = input_21_pad_type_0, strides = input_21_strides_0, weight = var_3270_to_fp16, x = var_3265_cast_fp16_0)[name = string("input_21_cast_fp16")]; tensor var_3281_cast_fp16 = silu(x = input_21_cast_fp16)[name = string("op_3281_cast_fp16")]; string var_3286_pad_type_0 = const()[name = string("op_3286_pad_type_0"), val = string("valid")]; tensor var_3286_strides_0 = const()[name = string("op_3286_strides_0"), val = tensor([1, 1])]; tensor var_3286_pad_0 = const()[name = string("op_3286_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_3286_dilations_0 = const()[name = string("op_3286_dilations_0"), val = tensor([1, 1])]; int32 var_3286_groups_0 = const()[name = string("op_3286_groups_0"), val = int32(1)]; tensor var_3269_to_fp16 = const()[name = string("op_3269_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(320448896)))]; tensor var_3286_cast_fp16 = conv(dilations = var_3286_dilations_0, groups = var_3286_groups_0, pad = var_3286_pad_0, pad_type = var_3286_pad_type_0, strides = var_3286_strides_0, weight = var_3269_to_fp16, x = var_3265_cast_fp16_0)[name = string("op_3286_cast_fp16")]; tensor x_195_cast_fp16 = mul(x = var_3281_cast_fp16, y = var_3286_cast_fp16)[name = string("x_195_cast_fp16")]; string hidden_states_65_pad_type_0 = const()[name = string("hidden_states_65_pad_type_0"), val = string("valid")]; tensor hidden_states_65_strides_0 = const()[name = string("hidden_states_65_strides_0"), val = tensor([1, 1])]; tensor hidden_states_65_pad_0 = const()[name = string("hidden_states_65_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_65_dilations_0 = const()[name = string("hidden_states_65_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_65_groups_0 = const()[name = string("hidden_states_65_groups_0"), val = int32(1)]; tensor var_3268_to_fp16 = const()[name = string("op_3268_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(328837568)))]; tensor hidden_states_65_cast_fp16 = conv(dilations = hidden_states_65_dilations_0, groups = hidden_states_65_groups_0, pad = hidden_states_65_pad_0, pad_type = hidden_states_65_pad_type_0, strides = hidden_states_65_strides_0, weight = var_3268_to_fp16, x = x_195_cast_fp16)[name = string("hidden_states_65_cast_fp16")]; tensor x_197_cast_fp16 = add(x = x_187_cast_fp16, y = hidden_states_65_cast_fp16)[name = string("x_197_cast_fp16")]; int32 var_3299 = const()[name = string("op_3299"), val = int32(1)]; fp16 const_114_promoted_to_fp16 = const()[name = string("const_114_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_3302_cast_fp16 = mul(x = x_197_cast_fp16, y = const_114_promoted_to_fp16)[name = string("op_3302_cast_fp16")]; bool x_199_interleave_0 = const()[name = string("x_199_interleave_0"), val = bool(false)]; tensor x_199_cast_fp16 = concat(axis = var_3299, interleave = x_199_interleave_0, values = (x_197_cast_fp16, var_3302_cast_fp16))[name = string("x_199_cast_fp16")]; tensor out_133_axes_0 = const()[name = string("out_133_axes_0"), val = tensor([1])]; fp16 var_3312_to_fp16 = const()[name = string("op_3312_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_133_cast_fp16 = layer_norm(axes = out_133_axes_0, epsilon = var_3312_to_fp16, x = x_199_cast_fp16)[name = string("out_133_cast_fp16")]; tensor layer_layers_11_input_layernorm_weight_to_fp16 = const()[name = string("layer_layers_11_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(337226240)))]; tensor out_135_cast_fp16 = mul(x = out_133_cast_fp16, y = layer_layers_11_input_layernorm_weight_to_fp16)[name = string("out_135_cast_fp16")]; tensor var_3318_split_sizes_0 = const()[name = string("op_3318_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_3318_axis_0 = const()[name = string("op_3318_axis_0"), val = int32(1)]; tensor var_3318_cast_fp16_0, tensor var_3318_cast_fp16_1 = split(axis = var_3318_axis_0, split_sizes = var_3318_split_sizes_0, x = out_135_cast_fp16)[name = string("op_3318_cast_fp16")]; string query_states_45_pad_type_0 = const()[name = string("query_states_45_pad_type_0"), val = string("valid")]; tensor query_states_45_strides_0 = const()[name = string("query_states_45_strides_0"), val = tensor([1, 1])]; tensor query_states_45_pad_0 = const()[name = string("query_states_45_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_states_45_dilations_0 = const()[name = string("query_states_45_dilations_0"), val = tensor([1, 1])]; int32 query_states_45_groups_0 = const()[name = string("query_states_45_groups_0"), val = int32(1)]; tensor var_3340_to_fp16 = const()[name = string("op_3340_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(337230400)))]; tensor query_states_45_cast_fp16 = conv(dilations = query_states_45_dilations_0, groups = query_states_45_groups_0, pad = query_states_45_pad_0, pad_type = query_states_45_pad_type_0, strides = query_states_45_strides_0, weight = var_3340_to_fp16, x = var_3318_cast_fp16_0)[name = string("query_states_45_cast_fp16")]; string key_states_45_pad_type_0 = const()[name = string("key_states_45_pad_type_0"), val = string("valid")]; tensor key_states_45_strides_0 = const()[name = string("key_states_45_strides_0"), val = tensor([1, 1])]; tensor key_states_45_pad_0 = const()[name = string("key_states_45_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_states_45_dilations_0 = const()[name = string("key_states_45_dilations_0"), val = tensor([1, 1])]; int32 key_states_45_groups_0 = const()[name = string("key_states_45_groups_0"), val = int32(1)]; tensor var_3351_to_fp16 = const()[name = string("op_3351_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(339327616)))]; tensor key_states_45_cast_fp16 = conv(dilations = key_states_45_dilations_0, groups = key_states_45_groups_0, pad = key_states_45_pad_0, pad_type = key_states_45_pad_type_0, strides = key_states_45_strides_0, weight = var_3351_to_fp16, x = var_3318_cast_fp16_0)[name = string("key_states_45_cast_fp16")]; string value_states_45_pad_type_0 = const()[name = string("value_states_45_pad_type_0"), val = string("valid")]; tensor value_states_45_strides_0 = const()[name = string("value_states_45_strides_0"), val = tensor([1, 1])]; tensor value_states_45_pad_0 = const()[name = string("value_states_45_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_states_45_dilations_0 = const()[name = string("value_states_45_dilations_0"), val = tensor([1, 1])]; int32 value_states_45_groups_0 = const()[name = string("value_states_45_groups_0"), val = int32(1)]; tensor var_3362_to_fp16 = const()[name = string("op_3362_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(339589824)))]; tensor value_states_45_cast_fp16 = conv(dilations = value_states_45_dilations_0, groups = value_states_45_groups_0, pad = value_states_45_pad_0, pad_type = value_states_45_pad_type_0, strides = value_states_45_strides_0, weight = var_3362_to_fp16, x = var_3318_cast_fp16_0)[name = string("value_states_45_cast_fp16")]; tensor var_3370 = const()[name = string("op_3370"), val = tensor([1, 16, 64, 8])]; tensor embed_45_cast_fp16 = reshape(shape = var_3370, x = query_states_45_cast_fp16)[name = string("embed_45_cast_fp16")]; tensor var_3374 = const()[name = string("op_3374"), val = tensor([1, 2, 64, 8])]; tensor var_3375_cast_fp16 = reshape(shape = var_3374, x = key_states_45_cast_fp16)[name = string("op_3375_cast_fp16")]; tensor embed_47_perm_0 = const()[name = string("embed_47_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_3381 = const()[name = string("op_3381"), val = tensor([1, 2, 64, 8])]; tensor var_3382_cast_fp16 = reshape(shape = var_3381, x = value_states_45_cast_fp16)[name = string("op_3382_cast_fp16")]; tensor value_states_47_perm_0 = const()[name = string("value_states_47_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_3386_cast_fp16 = mul(x = embed_45_cast_fp16, y = cos_1_cast_fp16)[name = string("op_3386_cast_fp16")]; tensor var_3387_split_sizes_0 = const()[name = string("op_3387_split_sizes_0"), val = tensor([32, 32])]; int32 var_3387_axis_0 = const()[name = string("op_3387_axis_0"), val = int32(-2)]; tensor var_3387_cast_fp16_0, tensor var_3387_cast_fp16_1 = split(axis = var_3387_axis_0, split_sizes = var_3387_split_sizes_0, x = embed_45_cast_fp16)[name = string("op_3387_cast_fp16")]; fp16 const_117_promoted_to_fp16 = const()[name = string("const_117_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_3389_cast_fp16 = mul(x = var_3387_cast_fp16_1, y = const_117_promoted_to_fp16)[name = string("op_3389_cast_fp16")]; int32 var_3391 = const()[name = string("op_3391"), val = int32(-2)]; bool var_3392_interleave_0 = const()[name = string("op_3392_interleave_0"), val = bool(false)]; tensor var_3392_cast_fp16 = concat(axis = var_3391, interleave = var_3392_interleave_0, values = (var_3389_cast_fp16, var_3387_cast_fp16_0))[name = string("op_3392_cast_fp16")]; tensor var_3393_cast_fp16 = mul(x = var_3392_cast_fp16, y = sin_1_cast_fp16)[name = string("op_3393_cast_fp16")]; tensor query_states_47_cast_fp16 = add(x = var_3386_cast_fp16, y = var_3393_cast_fp16)[name = string("query_states_47_cast_fp16")]; tensor embed_47_cast_fp16 = transpose(perm = embed_47_perm_0, x = var_3375_cast_fp16)[name = string("transpose_38")]; tensor var_3396_cast_fp16 = mul(x = embed_47_cast_fp16, y = cos_cast_fp16)[name = string("op_3396_cast_fp16")]; tensor var_3397_split_sizes_0 = const()[name = string("op_3397_split_sizes_0"), val = tensor([32, 32])]; int32 var_3397_axis_0 = const()[name = string("op_3397_axis_0"), val = int32(-1)]; tensor var_3397_cast_fp16_0, tensor var_3397_cast_fp16_1 = split(axis = var_3397_axis_0, split_sizes = var_3397_split_sizes_0, x = embed_47_cast_fp16)[name = string("op_3397_cast_fp16")]; fp16 const_118_promoted_to_fp16 = const()[name = string("const_118_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_3399_cast_fp16 = mul(x = var_3397_cast_fp16_1, y = const_118_promoted_to_fp16)[name = string("op_3399_cast_fp16")]; int32 var_3401 = const()[name = string("op_3401"), val = int32(-1)]; bool var_3402_interleave_0 = const()[name = string("op_3402_interleave_0"), val = bool(false)]; tensor var_3402_cast_fp16 = concat(axis = var_3401, interleave = var_3402_interleave_0, values = (var_3399_cast_fp16, var_3397_cast_fp16_0))[name = string("op_3402_cast_fp16")]; tensor var_3403_cast_fp16 = mul(x = var_3402_cast_fp16, y = sin_cast_fp16)[name = string("op_3403_cast_fp16")]; tensor key_states_47_cast_fp16 = add(x = var_3396_cast_fp16, y = var_3403_cast_fp16)[name = string("key_states_47_cast_fp16")]; tensor expand_dims_111 = const()[name = string("expand_dims_111"), val = tensor([11])]; tensor expand_dims_112 = const()[name = string("expand_dims_112"), val = tensor([0])]; tensor expand_dims_114 = const()[name = string("expand_dims_114"), val = tensor([12])]; tensor concat_91_values3_0 = const()[name = string("concat_91_values3_0"), val = tensor([0])]; int32 concat_91_axis_0 = const()[name = string("concat_91_axis_0"), val = int32(0)]; bool concat_91_interleave_0 = const()[name = string("concat_91_interleave_0"), val = bool(false)]; tensor concat_91 = concat(axis = concat_91_axis_0, interleave = concat_91_interleave_0, values = (expand_dims_111, expand_dims_112, position_id, concat_91_values3_0))[name = string("concat_91")]; tensor concat_92_values1_0 = const()[name = string("concat_92_values1_0"), val = tensor([0])]; tensor concat_92_values3_0 = const()[name = string("concat_92_values3_0"), val = tensor([0])]; int32 concat_92_axis_0 = const()[name = string("concat_92_axis_0"), val = int32(0)]; bool concat_92_interleave_0 = const()[name = string("concat_92_interleave_0"), val = bool(false)]; tensor concat_92 = concat(axis = concat_92_axis_0, interleave = concat_92_interleave_0, values = (expand_dims_114, concat_92_values1_0, var_426, concat_92_values3_0))[name = string("concat_92")]; tensor key_cache_internal_tensor_assign_12_stride_0 = const()[name = string("key_cache_internal_tensor_assign_12_stride_0"), val = tensor([1, 1, 1, 1])]; tensor key_cache_internal_tensor_assign_12_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_12_begin_mask_0"), val = tensor([false, false, false, true])]; tensor key_cache_internal_tensor_assign_12_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_12_end_mask_0"), val = tensor([false, true, false, true])]; tensor key_cache_internal_tensor_assign_12_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_12_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor key_cache_internal_tensor_assign_12_cast_fp16 = slice_update(begin = concat_91, begin_mask = key_cache_internal_tensor_assign_12_begin_mask_0, end = concat_92, end_mask = key_cache_internal_tensor_assign_12_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_12_squeeze_mask_0, stride = key_cache_internal_tensor_assign_12_stride_0, update = key_states_47_cast_fp16, x = coreml_update_state_68)[name = string("key_cache_internal_tensor_assign_12_cast_fp16")]; write_state(data = key_cache_internal_tensor_assign_12_cast_fp16, input = key_cache)[name = string("coreml_update_state_236_write_state")]; tensor coreml_update_state_70 = read_state(input = key_cache)[name = string("coreml_update_state_236")]; tensor value_cache_internal_tensor_assign_12_stride_0 = const()[name = string("value_cache_internal_tensor_assign_12_stride_0"), val = tensor([1, 1, 1, 1])]; tensor value_cache_internal_tensor_assign_12_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_12_begin_mask_0"), val = tensor([false, false, false, true])]; tensor value_cache_internal_tensor_assign_12_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_12_end_mask_0"), val = tensor([false, true, false, true])]; tensor value_cache_internal_tensor_assign_12_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_12_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor value_states_47_cast_fp16 = transpose(perm = value_states_47_perm_0, x = var_3382_cast_fp16)[name = string("transpose_37")]; tensor value_cache_internal_tensor_assign_12_cast_fp16 = slice_update(begin = concat_91, begin_mask = value_cache_internal_tensor_assign_12_begin_mask_0, end = concat_92, end_mask = value_cache_internal_tensor_assign_12_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_12_squeeze_mask_0, stride = value_cache_internal_tensor_assign_12_stride_0, update = value_states_47_cast_fp16, x = coreml_update_state_69)[name = string("value_cache_internal_tensor_assign_12_cast_fp16")]; write_state(data = value_cache_internal_tensor_assign_12_cast_fp16, input = value_cache)[name = string("coreml_update_state_237_write_state")]; tensor coreml_update_state_71 = read_state(input = value_cache)[name = string("coreml_update_state_237")]; tensor var_3446_begin_0 = const()[name = string("op_3446_begin_0"), val = tensor([11, 0, 0, 0])]; tensor var_3446_end_0 = const()[name = string("op_3446_end_0"), val = tensor([12, 2, 2048, 64])]; tensor var_3446_end_mask_0 = const()[name = string("op_3446_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_3446_cast_fp16 = slice_by_index(begin = var_3446_begin_0, end = var_3446_end_0, end_mask = var_3446_end_mask_0, x = coreml_update_state_70)[name = string("op_3446_cast_fp16")]; tensor tile_22 = const()[name = string("tile_22"), val = tensor([1, 1])]; int32 var_3449_axis_0 = const()[name = string("op_3449_axis_0"), val = int32(1)]; tensor var_3449_cast_fp16_0, tensor var_3449_cast_fp16_1 = split(axis = var_3449_axis_0, split_sizes = tile_22, x = var_3446_cast_fp16)[name = string("op_3449_cast_fp16")]; tensor var_3456_begin_0 = const()[name = string("op_3456_begin_0"), val = tensor([11, 0, 0, 0])]; tensor var_3456_end_0 = const()[name = string("op_3456_end_0"), val = tensor([12, 2, 2048, 64])]; tensor var_3456_end_mask_0 = const()[name = string("op_3456_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_3456_cast_fp16 = slice_by_index(begin = var_3456_begin_0, end = var_3456_end_0, end_mask = var_3456_end_mask_0, x = coreml_update_state_71)[name = string("op_3456_cast_fp16")]; tensor tile_23 = const()[name = string("tile_23"), val = tensor([1, 1])]; int32 var_3459_axis_0 = const()[name = string("op_3459_axis_0"), val = int32(1)]; tensor var_3459_cast_fp16_0, tensor var_3459_cast_fp16_1 = split(axis = var_3459_axis_0, split_sizes = tile_23, x = var_3456_cast_fp16)[name = string("op_3459_cast_fp16")]; tensor var_3462_split_sizes_0 = const()[name = string("op_3462_split_sizes_0"), val = tensor([8, 8])]; int32 var_3462_axis_0 = const()[name = string("op_3462_axis_0"), val = int32(1)]; tensor var_3462_cast_fp16_0, tensor var_3462_cast_fp16_1 = split(axis = var_3462_axis_0, split_sizes = var_3462_split_sizes_0, x = query_states_47_cast_fp16)[name = string("op_3462_cast_fp16")]; bool attn_weights_177_transpose_x_0 = const()[name = string("attn_weights_177_transpose_x_0"), val = bool(false)]; bool attn_weights_177_transpose_y_0 = const()[name = string("attn_weights_177_transpose_y_0"), val = bool(false)]; tensor attn_weights_177_cast_fp16 = matmul(transpose_x = attn_weights_177_transpose_x_0, transpose_y = attn_weights_177_transpose_y_0, x = var_3449_cast_fp16_0, y = var_3462_cast_fp16_0)[name = string("attn_weights_177_cast_fp16")]; fp16 _inversed_attn_weights_179_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_179_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_179_cast_fp16 = mul(x = attn_weights_177_cast_fp16, y = _inversed_attn_weights_179_y_0_to_fp16)[name = string("_inversed_attn_weights_179_cast_fp16")]; tensor attn_weights_181_cast_fp16 = add(x = _inversed_attn_weights_179_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_181_cast_fp16")]; int32 var_3469 = const()[name = string("op_3469"), val = int32(2)]; tensor attn_weights_183_cast_fp16 = softmax(axis = var_3469, x = attn_weights_181_cast_fp16)[name = string("attn_weights_183_cast_fp16")]; bool var_3475_transpose_x_1 = const()[name = string("op_3475_transpose_x_1"), val = bool(true)]; bool var_3475_transpose_y_1 = const()[name = string("op_3475_transpose_y_1"), val = bool(false)]; tensor var_3475_cast_fp16 = matmul(transpose_x = var_3475_transpose_x_1, transpose_y = var_3475_transpose_y_1, x = attn_weights_183_cast_fp16, y = var_3459_cast_fp16_0)[name = string("op_3475_cast_fp16")]; bool attn_weights_185_transpose_x_0 = const()[name = string("attn_weights_185_transpose_x_0"), val = bool(false)]; bool attn_weights_185_transpose_y_0 = const()[name = string("attn_weights_185_transpose_y_0"), val = bool(false)]; tensor attn_weights_185_cast_fp16 = matmul(transpose_x = attn_weights_185_transpose_x_0, transpose_y = attn_weights_185_transpose_y_0, x = var_3449_cast_fp16_1, y = var_3462_cast_fp16_1)[name = string("attn_weights_185_cast_fp16")]; fp16 _inversed_attn_weights_187_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_187_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_187_cast_fp16 = mul(x = attn_weights_185_cast_fp16, y = _inversed_attn_weights_187_y_0_to_fp16)[name = string("_inversed_attn_weights_187_cast_fp16")]; tensor attn_weights_189_cast_fp16 = add(x = _inversed_attn_weights_187_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_189_cast_fp16")]; int32 var_3481 = const()[name = string("op_3481"), val = int32(2)]; tensor attn_weights_191_cast_fp16 = softmax(axis = var_3481, x = attn_weights_189_cast_fp16)[name = string("attn_weights_191_cast_fp16")]; bool attn_output_67_transpose_x_1 = const()[name = string("attn_output_67_transpose_x_1"), val = bool(true)]; bool attn_output_67_transpose_y_1 = const()[name = string("attn_output_67_transpose_y_1"), val = bool(false)]; tensor attn_output_67_cast_fp16 = matmul(transpose_x = attn_output_67_transpose_x_1, transpose_y = attn_output_67_transpose_y_1, x = attn_weights_191_cast_fp16, y = var_3459_cast_fp16_1)[name = string("attn_output_67_cast_fp16")]; int32 var_3489 = const()[name = string("op_3489"), val = int32(1)]; bool attn_output_69_interleave_0 = const()[name = string("attn_output_69_interleave_0"), val = bool(false)]; tensor attn_output_69_cast_fp16 = concat(axis = var_3489, interleave = attn_output_69_interleave_0, values = (var_3475_cast_fp16, attn_output_67_cast_fp16))[name = string("attn_output_69_cast_fp16")]; tensor var_3493_perm_0 = const()[name = string("op_3493_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_3498 = const()[name = string("op_3498"), val = tensor([1, 1024, 1, 8])]; tensor var_3493_cast_fp16 = transpose(perm = var_3493_perm_0, x = attn_output_69_cast_fp16)[name = string("transpose_36")]; tensor x_203_cast_fp16 = reshape(shape = var_3498, x = var_3493_cast_fp16)[name = string("x_203_cast_fp16")]; string hidden_states_69_pad_type_0 = const()[name = string("hidden_states_69_pad_type_0"), val = string("valid")]; tensor hidden_states_69_strides_0 = const()[name = string("hidden_states_69_strides_0"), val = tensor([1, 1])]; tensor hidden_states_69_pad_0 = const()[name = string("hidden_states_69_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_69_dilations_0 = const()[name = string("hidden_states_69_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_69_groups_0 = const()[name = string("hidden_states_69_groups_0"), val = int32(1)]; tensor var_3505_to_fp16 = const()[name = string("op_3505_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(339852032)))]; tensor hidden_states_69_cast_fp16 = conv(dilations = hidden_states_69_dilations_0, groups = hidden_states_69_groups_0, pad = hidden_states_69_pad_0, pad_type = hidden_states_69_pad_type_0, strides = hidden_states_69_strides_0, weight = var_3505_to_fp16, x = x_203_cast_fp16)[name = string("hidden_states_69_cast_fp16")]; tensor x_205_cast_fp16 = add(x = x_197_cast_fp16, y = hidden_states_69_cast_fp16)[name = string("x_205_cast_fp16")]; int32 var_3517 = const()[name = string("op_3517"), val = int32(1)]; fp16 const_123_promoted_to_fp16 = const()[name = string("const_123_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_3520_cast_fp16 = mul(x = x_205_cast_fp16, y = const_123_promoted_to_fp16)[name = string("op_3520_cast_fp16")]; bool x_207_interleave_0 = const()[name = string("x_207_interleave_0"), val = bool(false)]; tensor x_207_cast_fp16 = concat(axis = var_3517, interleave = x_207_interleave_0, values = (x_205_cast_fp16, var_3520_cast_fp16))[name = string("x_207_cast_fp16")]; tensor out_139_axes_0 = const()[name = string("out_139_axes_0"), val = tensor([1])]; fp16 var_3530_to_fp16 = const()[name = string("op_3530_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_139_cast_fp16 = layer_norm(axes = out_139_axes_0, epsilon = var_3530_to_fp16, x = x_207_cast_fp16)[name = string("out_139_cast_fp16")]; tensor layer_layers_11_post_attention_layernorm_weight_to_fp16 = const()[name = string("layer_layers_11_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(341949248)))]; tensor out_141_cast_fp16 = mul(x = out_139_cast_fp16, y = layer_layers_11_post_attention_layernorm_weight_to_fp16)[name = string("out_141_cast_fp16")]; tensor var_3536_split_sizes_0 = const()[name = string("op_3536_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_3536_axis_0 = const()[name = string("op_3536_axis_0"), val = int32(1)]; tensor var_3536_cast_fp16_0, tensor var_3536_cast_fp16_1 = split(axis = var_3536_axis_0, split_sizes = var_3536_split_sizes_0, x = out_141_cast_fp16)[name = string("op_3536_cast_fp16")]; string input_23_pad_type_0 = const()[name = string("input_23_pad_type_0"), val = string("valid")]; tensor input_23_strides_0 = const()[name = string("input_23_strides_0"), val = tensor([1, 1])]; tensor input_23_pad_0 = const()[name = string("input_23_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_23_dilations_0 = const()[name = string("input_23_dilations_0"), val = tensor([1, 1])]; int32 input_23_groups_0 = const()[name = string("input_23_groups_0"), val = int32(1)]; tensor var_3541_to_fp16 = const()[name = string("op_3541_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(341953408)))]; tensor input_23_cast_fp16 = conv(dilations = input_23_dilations_0, groups = input_23_groups_0, pad = input_23_pad_0, pad_type = input_23_pad_type_0, strides = input_23_strides_0, weight = var_3541_to_fp16, x = var_3536_cast_fp16_0)[name = string("input_23_cast_fp16")]; tensor var_3552_cast_fp16 = silu(x = input_23_cast_fp16)[name = string("op_3552_cast_fp16")]; string var_3557_pad_type_0 = const()[name = string("op_3557_pad_type_0"), val = string("valid")]; tensor var_3557_strides_0 = const()[name = string("op_3557_strides_0"), val = tensor([1, 1])]; tensor var_3557_pad_0 = const()[name = string("op_3557_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_3557_dilations_0 = const()[name = string("op_3557_dilations_0"), val = tensor([1, 1])]; int32 var_3557_groups_0 = const()[name = string("op_3557_groups_0"), val = int32(1)]; tensor var_3540_to_fp16 = const()[name = string("op_3540_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(350342080)))]; tensor var_3557_cast_fp16 = conv(dilations = var_3557_dilations_0, groups = var_3557_groups_0, pad = var_3557_pad_0, pad_type = var_3557_pad_type_0, strides = var_3557_strides_0, weight = var_3540_to_fp16, x = var_3536_cast_fp16_0)[name = string("op_3557_cast_fp16")]; tensor x_213_cast_fp16 = mul(x = var_3552_cast_fp16, y = var_3557_cast_fp16)[name = string("x_213_cast_fp16")]; string hidden_states_71_pad_type_0 = const()[name = string("hidden_states_71_pad_type_0"), val = string("valid")]; tensor hidden_states_71_strides_0 = const()[name = string("hidden_states_71_strides_0"), val = tensor([1, 1])]; tensor hidden_states_71_pad_0 = const()[name = string("hidden_states_71_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_71_dilations_0 = const()[name = string("hidden_states_71_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_71_groups_0 = const()[name = string("hidden_states_71_groups_0"), val = int32(1)]; tensor var_3539_to_fp16 = const()[name = string("op_3539_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(358730752)))]; tensor hidden_states_71_cast_fp16 = conv(dilations = hidden_states_71_dilations_0, groups = hidden_states_71_groups_0, pad = hidden_states_71_pad_0, pad_type = hidden_states_71_pad_type_0, strides = hidden_states_71_strides_0, weight = var_3539_to_fp16, x = x_213_cast_fp16)[name = string("hidden_states_71_cast_fp16")]; tensor x_215_cast_fp16 = add(x = x_205_cast_fp16, y = hidden_states_71_cast_fp16)[name = string("x_215_cast_fp16")]; int32 var_3570 = const()[name = string("op_3570"), val = int32(1)]; fp16 const_124_promoted_to_fp16 = const()[name = string("const_124_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_3573_cast_fp16 = mul(x = x_215_cast_fp16, y = const_124_promoted_to_fp16)[name = string("op_3573_cast_fp16")]; bool x_217_interleave_0 = const()[name = string("x_217_interleave_0"), val = bool(false)]; tensor x_217_cast_fp16 = concat(axis = var_3570, interleave = x_217_interleave_0, values = (x_215_cast_fp16, var_3573_cast_fp16))[name = string("x_217_cast_fp16")]; tensor out_145_axes_0 = const()[name = string("out_145_axes_0"), val = tensor([1])]; fp16 var_3583_to_fp16 = const()[name = string("op_3583_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_145_cast_fp16 = layer_norm(axes = out_145_axes_0, epsilon = var_3583_to_fp16, x = x_217_cast_fp16)[name = string("out_145_cast_fp16")]; tensor layer_layers_12_input_layernorm_weight_to_fp16 = const()[name = string("layer_layers_12_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(367119424)))]; tensor out_147_cast_fp16 = mul(x = out_145_cast_fp16, y = layer_layers_12_input_layernorm_weight_to_fp16)[name = string("out_147_cast_fp16")]; tensor var_3589_split_sizes_0 = const()[name = string("op_3589_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_3589_axis_0 = const()[name = string("op_3589_axis_0"), val = int32(1)]; tensor var_3589_cast_fp16_0, tensor var_3589_cast_fp16_1 = split(axis = var_3589_axis_0, split_sizes = var_3589_split_sizes_0, x = out_147_cast_fp16)[name = string("op_3589_cast_fp16")]; string query_states_49_pad_type_0 = const()[name = string("query_states_49_pad_type_0"), val = string("valid")]; tensor query_states_49_strides_0 = const()[name = string("query_states_49_strides_0"), val = tensor([1, 1])]; tensor query_states_49_pad_0 = const()[name = string("query_states_49_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_states_49_dilations_0 = const()[name = string("query_states_49_dilations_0"), val = tensor([1, 1])]; int32 query_states_49_groups_0 = const()[name = string("query_states_49_groups_0"), val = int32(1)]; tensor var_3611_to_fp16 = const()[name = string("op_3611_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(367123584)))]; tensor query_states_49_cast_fp16 = conv(dilations = query_states_49_dilations_0, groups = query_states_49_groups_0, pad = query_states_49_pad_0, pad_type = query_states_49_pad_type_0, strides = query_states_49_strides_0, weight = var_3611_to_fp16, x = var_3589_cast_fp16_0)[name = string("query_states_49_cast_fp16")]; string key_states_49_pad_type_0 = const()[name = string("key_states_49_pad_type_0"), val = string("valid")]; tensor key_states_49_strides_0 = const()[name = string("key_states_49_strides_0"), val = tensor([1, 1])]; tensor key_states_49_pad_0 = const()[name = string("key_states_49_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_states_49_dilations_0 = const()[name = string("key_states_49_dilations_0"), val = tensor([1, 1])]; int32 key_states_49_groups_0 = const()[name = string("key_states_49_groups_0"), val = int32(1)]; tensor var_3622_to_fp16 = const()[name = string("op_3622_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(369220800)))]; tensor key_states_49_cast_fp16 = conv(dilations = key_states_49_dilations_0, groups = key_states_49_groups_0, pad = key_states_49_pad_0, pad_type = key_states_49_pad_type_0, strides = key_states_49_strides_0, weight = var_3622_to_fp16, x = var_3589_cast_fp16_0)[name = string("key_states_49_cast_fp16")]; string value_states_49_pad_type_0 = const()[name = string("value_states_49_pad_type_0"), val = string("valid")]; tensor value_states_49_strides_0 = const()[name = string("value_states_49_strides_0"), val = tensor([1, 1])]; tensor value_states_49_pad_0 = const()[name = string("value_states_49_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_states_49_dilations_0 = const()[name = string("value_states_49_dilations_0"), val = tensor([1, 1])]; int32 value_states_49_groups_0 = const()[name = string("value_states_49_groups_0"), val = int32(1)]; tensor var_3633_to_fp16 = const()[name = string("op_3633_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(369483008)))]; tensor value_states_49_cast_fp16 = conv(dilations = value_states_49_dilations_0, groups = value_states_49_groups_0, pad = value_states_49_pad_0, pad_type = value_states_49_pad_type_0, strides = value_states_49_strides_0, weight = var_3633_to_fp16, x = var_3589_cast_fp16_0)[name = string("value_states_49_cast_fp16")]; tensor var_3641 = const()[name = string("op_3641"), val = tensor([1, 16, 64, 8])]; tensor embed_49_cast_fp16 = reshape(shape = var_3641, x = query_states_49_cast_fp16)[name = string("embed_49_cast_fp16")]; tensor var_3645 = const()[name = string("op_3645"), val = tensor([1, 2, 64, 8])]; tensor var_3646_cast_fp16 = reshape(shape = var_3645, x = key_states_49_cast_fp16)[name = string("op_3646_cast_fp16")]; tensor embed_51_perm_0 = const()[name = string("embed_51_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_3652 = const()[name = string("op_3652"), val = tensor([1, 2, 64, 8])]; tensor var_3653_cast_fp16 = reshape(shape = var_3652, x = value_states_49_cast_fp16)[name = string("op_3653_cast_fp16")]; tensor value_states_51_perm_0 = const()[name = string("value_states_51_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_3657_cast_fp16 = mul(x = embed_49_cast_fp16, y = cos_1_cast_fp16)[name = string("op_3657_cast_fp16")]; tensor var_3658_split_sizes_0 = const()[name = string("op_3658_split_sizes_0"), val = tensor([32, 32])]; int32 var_3658_axis_0 = const()[name = string("op_3658_axis_0"), val = int32(-2)]; tensor var_3658_cast_fp16_0, tensor var_3658_cast_fp16_1 = split(axis = var_3658_axis_0, split_sizes = var_3658_split_sizes_0, x = embed_49_cast_fp16)[name = string("op_3658_cast_fp16")]; fp16 const_127_promoted_to_fp16 = const()[name = string("const_127_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_3660_cast_fp16 = mul(x = var_3658_cast_fp16_1, y = const_127_promoted_to_fp16)[name = string("op_3660_cast_fp16")]; int32 var_3662 = const()[name = string("op_3662"), val = int32(-2)]; bool var_3663_interleave_0 = const()[name = string("op_3663_interleave_0"), val = bool(false)]; tensor var_3663_cast_fp16 = concat(axis = var_3662, interleave = var_3663_interleave_0, values = (var_3660_cast_fp16, var_3658_cast_fp16_0))[name = string("op_3663_cast_fp16")]; tensor var_3664_cast_fp16 = mul(x = var_3663_cast_fp16, y = sin_1_cast_fp16)[name = string("op_3664_cast_fp16")]; tensor query_states_51_cast_fp16 = add(x = var_3657_cast_fp16, y = var_3664_cast_fp16)[name = string("query_states_51_cast_fp16")]; tensor embed_51_cast_fp16 = transpose(perm = embed_51_perm_0, x = var_3646_cast_fp16)[name = string("transpose_35")]; tensor var_3667_cast_fp16 = mul(x = embed_51_cast_fp16, y = cos_cast_fp16)[name = string("op_3667_cast_fp16")]; tensor var_3668_split_sizes_0 = const()[name = string("op_3668_split_sizes_0"), val = tensor([32, 32])]; int32 var_3668_axis_0 = const()[name = string("op_3668_axis_0"), val = int32(-1)]; tensor var_3668_cast_fp16_0, tensor var_3668_cast_fp16_1 = split(axis = var_3668_axis_0, split_sizes = var_3668_split_sizes_0, x = embed_51_cast_fp16)[name = string("op_3668_cast_fp16")]; fp16 const_128_promoted_to_fp16 = const()[name = string("const_128_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_3670_cast_fp16 = mul(x = var_3668_cast_fp16_1, y = const_128_promoted_to_fp16)[name = string("op_3670_cast_fp16")]; int32 var_3672 = const()[name = string("op_3672"), val = int32(-1)]; bool var_3673_interleave_0 = const()[name = string("op_3673_interleave_0"), val = bool(false)]; tensor var_3673_cast_fp16 = concat(axis = var_3672, interleave = var_3673_interleave_0, values = (var_3670_cast_fp16, var_3668_cast_fp16_0))[name = string("op_3673_cast_fp16")]; tensor var_3674_cast_fp16 = mul(x = var_3673_cast_fp16, y = sin_cast_fp16)[name = string("op_3674_cast_fp16")]; tensor key_states_51_cast_fp16 = add(x = var_3667_cast_fp16, y = var_3674_cast_fp16)[name = string("key_states_51_cast_fp16")]; tensor expand_dims_121 = const()[name = string("expand_dims_121"), val = tensor([12])]; tensor expand_dims_122 = const()[name = string("expand_dims_122"), val = tensor([0])]; tensor expand_dims_124 = const()[name = string("expand_dims_124"), val = tensor([13])]; tensor concat_99_values3_0 = const()[name = string("concat_99_values3_0"), val = tensor([0])]; int32 concat_99_axis_0 = const()[name = string("concat_99_axis_0"), val = int32(0)]; bool concat_99_interleave_0 = const()[name = string("concat_99_interleave_0"), val = bool(false)]; tensor concat_99 = concat(axis = concat_99_axis_0, interleave = concat_99_interleave_0, values = (expand_dims_121, expand_dims_122, position_id, concat_99_values3_0))[name = string("concat_99")]; tensor concat_100_values1_0 = const()[name = string("concat_100_values1_0"), val = tensor([0])]; tensor concat_100_values3_0 = const()[name = string("concat_100_values3_0"), val = tensor([0])]; int32 concat_100_axis_0 = const()[name = string("concat_100_axis_0"), val = int32(0)]; bool concat_100_interleave_0 = const()[name = string("concat_100_interleave_0"), val = bool(false)]; tensor concat_100 = concat(axis = concat_100_axis_0, interleave = concat_100_interleave_0, values = (expand_dims_124, concat_100_values1_0, var_426, concat_100_values3_0))[name = string("concat_100")]; tensor key_cache_internal_tensor_assign_13_stride_0 = const()[name = string("key_cache_internal_tensor_assign_13_stride_0"), val = tensor([1, 1, 1, 1])]; tensor key_cache_internal_tensor_assign_13_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_13_begin_mask_0"), val = tensor([false, false, false, true])]; tensor key_cache_internal_tensor_assign_13_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_13_end_mask_0"), val = tensor([false, true, false, true])]; tensor key_cache_internal_tensor_assign_13_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_13_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor key_cache_internal_tensor_assign_13_cast_fp16 = slice_update(begin = concat_99, begin_mask = key_cache_internal_tensor_assign_13_begin_mask_0, end = concat_100, end_mask = key_cache_internal_tensor_assign_13_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_13_squeeze_mask_0, stride = key_cache_internal_tensor_assign_13_stride_0, update = key_states_51_cast_fp16, x = coreml_update_state_70)[name = string("key_cache_internal_tensor_assign_13_cast_fp16")]; write_state(data = key_cache_internal_tensor_assign_13_cast_fp16, input = key_cache)[name = string("coreml_update_state_238_write_state")]; tensor coreml_update_state_72 = read_state(input = key_cache)[name = string("coreml_update_state_238")]; tensor value_cache_internal_tensor_assign_13_stride_0 = const()[name = string("value_cache_internal_tensor_assign_13_stride_0"), val = tensor([1, 1, 1, 1])]; tensor value_cache_internal_tensor_assign_13_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_13_begin_mask_0"), val = tensor([false, false, false, true])]; tensor value_cache_internal_tensor_assign_13_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_13_end_mask_0"), val = tensor([false, true, false, true])]; tensor value_cache_internal_tensor_assign_13_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_13_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor value_states_51_cast_fp16 = transpose(perm = value_states_51_perm_0, x = var_3653_cast_fp16)[name = string("transpose_34")]; tensor value_cache_internal_tensor_assign_13_cast_fp16 = slice_update(begin = concat_99, begin_mask = value_cache_internal_tensor_assign_13_begin_mask_0, end = concat_100, end_mask = value_cache_internal_tensor_assign_13_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_13_squeeze_mask_0, stride = value_cache_internal_tensor_assign_13_stride_0, update = value_states_51_cast_fp16, x = coreml_update_state_71)[name = string("value_cache_internal_tensor_assign_13_cast_fp16")]; write_state(data = value_cache_internal_tensor_assign_13_cast_fp16, input = value_cache)[name = string("coreml_update_state_239_write_state")]; tensor coreml_update_state_73 = read_state(input = value_cache)[name = string("coreml_update_state_239")]; tensor var_3717_begin_0 = const()[name = string("op_3717_begin_0"), val = tensor([12, 0, 0, 0])]; tensor var_3717_end_0 = const()[name = string("op_3717_end_0"), val = tensor([13, 2, 2048, 64])]; tensor var_3717_end_mask_0 = const()[name = string("op_3717_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_3717_cast_fp16 = slice_by_index(begin = var_3717_begin_0, end = var_3717_end_0, end_mask = var_3717_end_mask_0, x = coreml_update_state_72)[name = string("op_3717_cast_fp16")]; tensor tile_24 = const()[name = string("tile_24"), val = tensor([1, 1])]; int32 var_3720_axis_0 = const()[name = string("op_3720_axis_0"), val = int32(1)]; tensor var_3720_cast_fp16_0, tensor var_3720_cast_fp16_1 = split(axis = var_3720_axis_0, split_sizes = tile_24, x = var_3717_cast_fp16)[name = string("op_3720_cast_fp16")]; tensor var_3727_begin_0 = const()[name = string("op_3727_begin_0"), val = tensor([12, 0, 0, 0])]; tensor var_3727_end_0 = const()[name = string("op_3727_end_0"), val = tensor([13, 2, 2048, 64])]; tensor var_3727_end_mask_0 = const()[name = string("op_3727_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_3727_cast_fp16 = slice_by_index(begin = var_3727_begin_0, end = var_3727_end_0, end_mask = var_3727_end_mask_0, x = coreml_update_state_73)[name = string("op_3727_cast_fp16")]; tensor tile_25 = const()[name = string("tile_25"), val = tensor([1, 1])]; int32 var_3730_axis_0 = const()[name = string("op_3730_axis_0"), val = int32(1)]; tensor var_3730_cast_fp16_0, tensor var_3730_cast_fp16_1 = split(axis = var_3730_axis_0, split_sizes = tile_25, x = var_3727_cast_fp16)[name = string("op_3730_cast_fp16")]; tensor var_3733_split_sizes_0 = const()[name = string("op_3733_split_sizes_0"), val = tensor([8, 8])]; int32 var_3733_axis_0 = const()[name = string("op_3733_axis_0"), val = int32(1)]; tensor var_3733_cast_fp16_0, tensor var_3733_cast_fp16_1 = split(axis = var_3733_axis_0, split_sizes = var_3733_split_sizes_0, x = query_states_51_cast_fp16)[name = string("op_3733_cast_fp16")]; bool attn_weights_193_transpose_x_0 = const()[name = string("attn_weights_193_transpose_x_0"), val = bool(false)]; bool attn_weights_193_transpose_y_0 = const()[name = string("attn_weights_193_transpose_y_0"), val = bool(false)]; tensor attn_weights_193_cast_fp16 = matmul(transpose_x = attn_weights_193_transpose_x_0, transpose_y = attn_weights_193_transpose_y_0, x = var_3720_cast_fp16_0, y = var_3733_cast_fp16_0)[name = string("attn_weights_193_cast_fp16")]; fp16 _inversed_attn_weights_195_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_195_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_195_cast_fp16 = mul(x = attn_weights_193_cast_fp16, y = _inversed_attn_weights_195_y_0_to_fp16)[name = string("_inversed_attn_weights_195_cast_fp16")]; tensor attn_weights_197_cast_fp16 = add(x = _inversed_attn_weights_195_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_197_cast_fp16")]; int32 var_3740 = const()[name = string("op_3740"), val = int32(2)]; tensor attn_weights_199_cast_fp16 = softmax(axis = var_3740, x = attn_weights_197_cast_fp16)[name = string("attn_weights_199_cast_fp16")]; bool var_3746_transpose_x_1 = const()[name = string("op_3746_transpose_x_1"), val = bool(true)]; bool var_3746_transpose_y_1 = const()[name = string("op_3746_transpose_y_1"), val = bool(false)]; tensor var_3746_cast_fp16 = matmul(transpose_x = var_3746_transpose_x_1, transpose_y = var_3746_transpose_y_1, x = attn_weights_199_cast_fp16, y = var_3730_cast_fp16_0)[name = string("op_3746_cast_fp16")]; bool attn_weights_201_transpose_x_0 = const()[name = string("attn_weights_201_transpose_x_0"), val = bool(false)]; bool attn_weights_201_transpose_y_0 = const()[name = string("attn_weights_201_transpose_y_0"), val = bool(false)]; tensor attn_weights_201_cast_fp16 = matmul(transpose_x = attn_weights_201_transpose_x_0, transpose_y = attn_weights_201_transpose_y_0, x = var_3720_cast_fp16_1, y = var_3733_cast_fp16_1)[name = string("attn_weights_201_cast_fp16")]; fp16 _inversed_attn_weights_203_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_203_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_203_cast_fp16 = mul(x = attn_weights_201_cast_fp16, y = _inversed_attn_weights_203_y_0_to_fp16)[name = string("_inversed_attn_weights_203_cast_fp16")]; tensor attn_weights_205_cast_fp16 = add(x = _inversed_attn_weights_203_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_205_cast_fp16")]; int32 var_3752 = const()[name = string("op_3752"), val = int32(2)]; tensor attn_weights_207_cast_fp16 = softmax(axis = var_3752, x = attn_weights_205_cast_fp16)[name = string("attn_weights_207_cast_fp16")]; bool attn_output_73_transpose_x_1 = const()[name = string("attn_output_73_transpose_x_1"), val = bool(true)]; bool attn_output_73_transpose_y_1 = const()[name = string("attn_output_73_transpose_y_1"), val = bool(false)]; tensor attn_output_73_cast_fp16 = matmul(transpose_x = attn_output_73_transpose_x_1, transpose_y = attn_output_73_transpose_y_1, x = attn_weights_207_cast_fp16, y = var_3730_cast_fp16_1)[name = string("attn_output_73_cast_fp16")]; int32 var_3760 = const()[name = string("op_3760"), val = int32(1)]; bool attn_output_75_interleave_0 = const()[name = string("attn_output_75_interleave_0"), val = bool(false)]; tensor attn_output_75_cast_fp16 = concat(axis = var_3760, interleave = attn_output_75_interleave_0, values = (var_3746_cast_fp16, attn_output_73_cast_fp16))[name = string("attn_output_75_cast_fp16")]; tensor var_3764_perm_0 = const()[name = string("op_3764_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_3769 = const()[name = string("op_3769"), val = tensor([1, 1024, 1, 8])]; tensor var_3764_cast_fp16 = transpose(perm = var_3764_perm_0, x = attn_output_75_cast_fp16)[name = string("transpose_33")]; tensor x_221_cast_fp16 = reshape(shape = var_3769, x = var_3764_cast_fp16)[name = string("x_221_cast_fp16")]; string hidden_states_75_pad_type_0 = const()[name = string("hidden_states_75_pad_type_0"), val = string("valid")]; tensor hidden_states_75_strides_0 = const()[name = string("hidden_states_75_strides_0"), val = tensor([1, 1])]; tensor hidden_states_75_pad_0 = const()[name = string("hidden_states_75_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_75_dilations_0 = const()[name = string("hidden_states_75_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_75_groups_0 = const()[name = string("hidden_states_75_groups_0"), val = int32(1)]; tensor var_3776_to_fp16 = const()[name = string("op_3776_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(369745216)))]; tensor hidden_states_75_cast_fp16 = conv(dilations = hidden_states_75_dilations_0, groups = hidden_states_75_groups_0, pad = hidden_states_75_pad_0, pad_type = hidden_states_75_pad_type_0, strides = hidden_states_75_strides_0, weight = var_3776_to_fp16, x = x_221_cast_fp16)[name = string("hidden_states_75_cast_fp16")]; tensor x_223_cast_fp16 = add(x = x_215_cast_fp16, y = hidden_states_75_cast_fp16)[name = string("x_223_cast_fp16")]; int32 var_3788 = const()[name = string("op_3788"), val = int32(1)]; fp16 const_133_promoted_to_fp16 = const()[name = string("const_133_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_3791_cast_fp16 = mul(x = x_223_cast_fp16, y = const_133_promoted_to_fp16)[name = string("op_3791_cast_fp16")]; bool x_225_interleave_0 = const()[name = string("x_225_interleave_0"), val = bool(false)]; tensor x_225_cast_fp16 = concat(axis = var_3788, interleave = x_225_interleave_0, values = (x_223_cast_fp16, var_3791_cast_fp16))[name = string("x_225_cast_fp16")]; tensor out_151_axes_0 = const()[name = string("out_151_axes_0"), val = tensor([1])]; fp16 var_3801_to_fp16 = const()[name = string("op_3801_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_151_cast_fp16 = layer_norm(axes = out_151_axes_0, epsilon = var_3801_to_fp16, x = x_225_cast_fp16)[name = string("out_151_cast_fp16")]; tensor layer_layers_12_post_attention_layernorm_weight_to_fp16 = const()[name = string("layer_layers_12_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(371842432)))]; tensor out_153_cast_fp16 = mul(x = out_151_cast_fp16, y = layer_layers_12_post_attention_layernorm_weight_to_fp16)[name = string("out_153_cast_fp16")]; tensor var_3807_split_sizes_0 = const()[name = string("op_3807_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_3807_axis_0 = const()[name = string("op_3807_axis_0"), val = int32(1)]; tensor var_3807_cast_fp16_0, tensor var_3807_cast_fp16_1 = split(axis = var_3807_axis_0, split_sizes = var_3807_split_sizes_0, x = out_153_cast_fp16)[name = string("op_3807_cast_fp16")]; string input_25_pad_type_0 = const()[name = string("input_25_pad_type_0"), val = string("valid")]; tensor input_25_strides_0 = const()[name = string("input_25_strides_0"), val = tensor([1, 1])]; tensor input_25_pad_0 = const()[name = string("input_25_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_25_dilations_0 = const()[name = string("input_25_dilations_0"), val = tensor([1, 1])]; int32 input_25_groups_0 = const()[name = string("input_25_groups_0"), val = int32(1)]; tensor var_3812_to_fp16 = const()[name = string("op_3812_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(371846592)))]; tensor input_25_cast_fp16 = conv(dilations = input_25_dilations_0, groups = input_25_groups_0, pad = input_25_pad_0, pad_type = input_25_pad_type_0, strides = input_25_strides_0, weight = var_3812_to_fp16, x = var_3807_cast_fp16_0)[name = string("input_25_cast_fp16")]; tensor var_3823_cast_fp16 = silu(x = input_25_cast_fp16)[name = string("op_3823_cast_fp16")]; string var_3828_pad_type_0 = const()[name = string("op_3828_pad_type_0"), val = string("valid")]; tensor var_3828_strides_0 = const()[name = string("op_3828_strides_0"), val = tensor([1, 1])]; tensor var_3828_pad_0 = const()[name = string("op_3828_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_3828_dilations_0 = const()[name = string("op_3828_dilations_0"), val = tensor([1, 1])]; int32 var_3828_groups_0 = const()[name = string("op_3828_groups_0"), val = int32(1)]; tensor var_3811_to_fp16 = const()[name = string("op_3811_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(380235264)))]; tensor var_3828_cast_fp16 = conv(dilations = var_3828_dilations_0, groups = var_3828_groups_0, pad = var_3828_pad_0, pad_type = var_3828_pad_type_0, strides = var_3828_strides_0, weight = var_3811_to_fp16, x = var_3807_cast_fp16_0)[name = string("op_3828_cast_fp16")]; tensor x_231_cast_fp16 = mul(x = var_3823_cast_fp16, y = var_3828_cast_fp16)[name = string("x_231_cast_fp16")]; string hidden_states_77_pad_type_0 = const()[name = string("hidden_states_77_pad_type_0"), val = string("valid")]; tensor hidden_states_77_strides_0 = const()[name = string("hidden_states_77_strides_0"), val = tensor([1, 1])]; tensor hidden_states_77_pad_0 = const()[name = string("hidden_states_77_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_77_dilations_0 = const()[name = string("hidden_states_77_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_77_groups_0 = const()[name = string("hidden_states_77_groups_0"), val = int32(1)]; tensor var_3810_to_fp16 = const()[name = string("op_3810_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(388623936)))]; tensor hidden_states_77_cast_fp16 = conv(dilations = hidden_states_77_dilations_0, groups = hidden_states_77_groups_0, pad = hidden_states_77_pad_0, pad_type = hidden_states_77_pad_type_0, strides = hidden_states_77_strides_0, weight = var_3810_to_fp16, x = x_231_cast_fp16)[name = string("hidden_states_77_cast_fp16")]; tensor x_233_cast_fp16 = add(x = x_223_cast_fp16, y = hidden_states_77_cast_fp16)[name = string("x_233_cast_fp16")]; int32 var_3841 = const()[name = string("op_3841"), val = int32(1)]; fp16 const_134_promoted_to_fp16 = const()[name = string("const_134_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_3844_cast_fp16 = mul(x = x_233_cast_fp16, y = const_134_promoted_to_fp16)[name = string("op_3844_cast_fp16")]; bool x_235_interleave_0 = const()[name = string("x_235_interleave_0"), val = bool(false)]; tensor x_235_cast_fp16 = concat(axis = var_3841, interleave = x_235_interleave_0, values = (x_233_cast_fp16, var_3844_cast_fp16))[name = string("x_235_cast_fp16")]; tensor out_157_axes_0 = const()[name = string("out_157_axes_0"), val = tensor([1])]; fp16 var_3854_to_fp16 = const()[name = string("op_3854_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_157_cast_fp16 = layer_norm(axes = out_157_axes_0, epsilon = var_3854_to_fp16, x = x_235_cast_fp16)[name = string("out_157_cast_fp16")]; tensor layer_layers_13_input_layernorm_weight_to_fp16 = const()[name = string("layer_layers_13_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(397012608)))]; tensor out_159_cast_fp16 = mul(x = out_157_cast_fp16, y = layer_layers_13_input_layernorm_weight_to_fp16)[name = string("out_159_cast_fp16")]; tensor var_3860_split_sizes_0 = const()[name = string("op_3860_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_3860_axis_0 = const()[name = string("op_3860_axis_0"), val = int32(1)]; tensor var_3860_cast_fp16_0, tensor var_3860_cast_fp16_1 = split(axis = var_3860_axis_0, split_sizes = var_3860_split_sizes_0, x = out_159_cast_fp16)[name = string("op_3860_cast_fp16")]; string query_states_53_pad_type_0 = const()[name = string("query_states_53_pad_type_0"), val = string("valid")]; tensor query_states_53_strides_0 = const()[name = string("query_states_53_strides_0"), val = tensor([1, 1])]; tensor query_states_53_pad_0 = const()[name = string("query_states_53_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_states_53_dilations_0 = const()[name = string("query_states_53_dilations_0"), val = tensor([1, 1])]; int32 query_states_53_groups_0 = const()[name = string("query_states_53_groups_0"), val = int32(1)]; tensor var_3882_to_fp16 = const()[name = string("op_3882_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(397016768)))]; tensor query_states_53_cast_fp16 = conv(dilations = query_states_53_dilations_0, groups = query_states_53_groups_0, pad = query_states_53_pad_0, pad_type = query_states_53_pad_type_0, strides = query_states_53_strides_0, weight = var_3882_to_fp16, x = var_3860_cast_fp16_0)[name = string("query_states_53_cast_fp16")]; string key_states_53_pad_type_0 = const()[name = string("key_states_53_pad_type_0"), val = string("valid")]; tensor key_states_53_strides_0 = const()[name = string("key_states_53_strides_0"), val = tensor([1, 1])]; tensor key_states_53_pad_0 = const()[name = string("key_states_53_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_states_53_dilations_0 = const()[name = string("key_states_53_dilations_0"), val = tensor([1, 1])]; int32 key_states_53_groups_0 = const()[name = string("key_states_53_groups_0"), val = int32(1)]; tensor var_3893_to_fp16 = const()[name = string("op_3893_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(399113984)))]; tensor key_states_53_cast_fp16 = conv(dilations = key_states_53_dilations_0, groups = key_states_53_groups_0, pad = key_states_53_pad_0, pad_type = key_states_53_pad_type_0, strides = key_states_53_strides_0, weight = var_3893_to_fp16, x = var_3860_cast_fp16_0)[name = string("key_states_53_cast_fp16")]; string value_states_53_pad_type_0 = const()[name = string("value_states_53_pad_type_0"), val = string("valid")]; tensor value_states_53_strides_0 = const()[name = string("value_states_53_strides_0"), val = tensor([1, 1])]; tensor value_states_53_pad_0 = const()[name = string("value_states_53_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_states_53_dilations_0 = const()[name = string("value_states_53_dilations_0"), val = tensor([1, 1])]; int32 value_states_53_groups_0 = const()[name = string("value_states_53_groups_0"), val = int32(1)]; tensor var_3904_to_fp16 = const()[name = string("op_3904_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(399376192)))]; tensor value_states_53_cast_fp16 = conv(dilations = value_states_53_dilations_0, groups = value_states_53_groups_0, pad = value_states_53_pad_0, pad_type = value_states_53_pad_type_0, strides = value_states_53_strides_0, weight = var_3904_to_fp16, x = var_3860_cast_fp16_0)[name = string("value_states_53_cast_fp16")]; tensor var_3912 = const()[name = string("op_3912"), val = tensor([1, 16, 64, 8])]; tensor embed_53_cast_fp16 = reshape(shape = var_3912, x = query_states_53_cast_fp16)[name = string("embed_53_cast_fp16")]; tensor var_3916 = const()[name = string("op_3916"), val = tensor([1, 2, 64, 8])]; tensor var_3917_cast_fp16 = reshape(shape = var_3916, x = key_states_53_cast_fp16)[name = string("op_3917_cast_fp16")]; tensor embed_55_perm_0 = const()[name = string("embed_55_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_3923 = const()[name = string("op_3923"), val = tensor([1, 2, 64, 8])]; tensor var_3924_cast_fp16 = reshape(shape = var_3923, x = value_states_53_cast_fp16)[name = string("op_3924_cast_fp16")]; tensor value_states_55_perm_0 = const()[name = string("value_states_55_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_3928_cast_fp16 = mul(x = embed_53_cast_fp16, y = cos_1_cast_fp16)[name = string("op_3928_cast_fp16")]; tensor var_3929_split_sizes_0 = const()[name = string("op_3929_split_sizes_0"), val = tensor([32, 32])]; int32 var_3929_axis_0 = const()[name = string("op_3929_axis_0"), val = int32(-2)]; tensor var_3929_cast_fp16_0, tensor var_3929_cast_fp16_1 = split(axis = var_3929_axis_0, split_sizes = var_3929_split_sizes_0, x = embed_53_cast_fp16)[name = string("op_3929_cast_fp16")]; fp16 const_137_promoted_to_fp16 = const()[name = string("const_137_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_3931_cast_fp16 = mul(x = var_3929_cast_fp16_1, y = const_137_promoted_to_fp16)[name = string("op_3931_cast_fp16")]; int32 var_3933 = const()[name = string("op_3933"), val = int32(-2)]; bool var_3934_interleave_0 = const()[name = string("op_3934_interleave_0"), val = bool(false)]; tensor var_3934_cast_fp16 = concat(axis = var_3933, interleave = var_3934_interleave_0, values = (var_3931_cast_fp16, var_3929_cast_fp16_0))[name = string("op_3934_cast_fp16")]; tensor var_3935_cast_fp16 = mul(x = var_3934_cast_fp16, y = sin_1_cast_fp16)[name = string("op_3935_cast_fp16")]; tensor query_states_55_cast_fp16 = add(x = var_3928_cast_fp16, y = var_3935_cast_fp16)[name = string("query_states_55_cast_fp16")]; tensor embed_55_cast_fp16 = transpose(perm = embed_55_perm_0, x = var_3917_cast_fp16)[name = string("transpose_32")]; tensor var_3938_cast_fp16 = mul(x = embed_55_cast_fp16, y = cos_cast_fp16)[name = string("op_3938_cast_fp16")]; tensor var_3939_split_sizes_0 = const()[name = string("op_3939_split_sizes_0"), val = tensor([32, 32])]; int32 var_3939_axis_0 = const()[name = string("op_3939_axis_0"), val = int32(-1)]; tensor var_3939_cast_fp16_0, tensor var_3939_cast_fp16_1 = split(axis = var_3939_axis_0, split_sizes = var_3939_split_sizes_0, x = embed_55_cast_fp16)[name = string("op_3939_cast_fp16")]; fp16 const_138_promoted_to_fp16 = const()[name = string("const_138_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_3941_cast_fp16 = mul(x = var_3939_cast_fp16_1, y = const_138_promoted_to_fp16)[name = string("op_3941_cast_fp16")]; int32 var_3943 = const()[name = string("op_3943"), val = int32(-1)]; bool var_3944_interleave_0 = const()[name = string("op_3944_interleave_0"), val = bool(false)]; tensor var_3944_cast_fp16 = concat(axis = var_3943, interleave = var_3944_interleave_0, values = (var_3941_cast_fp16, var_3939_cast_fp16_0))[name = string("op_3944_cast_fp16")]; tensor var_3945_cast_fp16 = mul(x = var_3944_cast_fp16, y = sin_cast_fp16)[name = string("op_3945_cast_fp16")]; tensor key_states_55_cast_fp16 = add(x = var_3938_cast_fp16, y = var_3945_cast_fp16)[name = string("key_states_55_cast_fp16")]; tensor expand_dims_131 = const()[name = string("expand_dims_131"), val = tensor([13])]; tensor expand_dims_132 = const()[name = string("expand_dims_132"), val = tensor([0])]; tensor expand_dims_134 = const()[name = string("expand_dims_134"), val = tensor([14])]; tensor concat_107_values3_0 = const()[name = string("concat_107_values3_0"), val = tensor([0])]; int32 concat_107_axis_0 = const()[name = string("concat_107_axis_0"), val = int32(0)]; bool concat_107_interleave_0 = const()[name = string("concat_107_interleave_0"), val = bool(false)]; tensor concat_107 = concat(axis = concat_107_axis_0, interleave = concat_107_interleave_0, values = (expand_dims_131, expand_dims_132, position_id, concat_107_values3_0))[name = string("concat_107")]; tensor concat_108_values1_0 = const()[name = string("concat_108_values1_0"), val = tensor([0])]; tensor concat_108_values3_0 = const()[name = string("concat_108_values3_0"), val = tensor([0])]; int32 concat_108_axis_0 = const()[name = string("concat_108_axis_0"), val = int32(0)]; bool concat_108_interleave_0 = const()[name = string("concat_108_interleave_0"), val = bool(false)]; tensor concat_108 = concat(axis = concat_108_axis_0, interleave = concat_108_interleave_0, values = (expand_dims_134, concat_108_values1_0, var_426, concat_108_values3_0))[name = string("concat_108")]; tensor key_cache_internal_tensor_assign_14_stride_0 = const()[name = string("key_cache_internal_tensor_assign_14_stride_0"), val = tensor([1, 1, 1, 1])]; tensor key_cache_internal_tensor_assign_14_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_14_begin_mask_0"), val = tensor([false, false, false, true])]; tensor key_cache_internal_tensor_assign_14_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_14_end_mask_0"), val = tensor([false, true, false, true])]; tensor key_cache_internal_tensor_assign_14_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_14_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor key_cache_internal_tensor_assign_14_cast_fp16 = slice_update(begin = concat_107, begin_mask = key_cache_internal_tensor_assign_14_begin_mask_0, end = concat_108, end_mask = key_cache_internal_tensor_assign_14_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_14_squeeze_mask_0, stride = key_cache_internal_tensor_assign_14_stride_0, update = key_states_55_cast_fp16, x = coreml_update_state_72)[name = string("key_cache_internal_tensor_assign_14_cast_fp16")]; write_state(data = key_cache_internal_tensor_assign_14_cast_fp16, input = key_cache)[name = string("coreml_update_state_240_write_state")]; tensor coreml_update_state_74 = read_state(input = key_cache)[name = string("coreml_update_state_240")]; tensor value_cache_internal_tensor_assign_14_stride_0 = const()[name = string("value_cache_internal_tensor_assign_14_stride_0"), val = tensor([1, 1, 1, 1])]; tensor value_cache_internal_tensor_assign_14_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_14_begin_mask_0"), val = tensor([false, false, false, true])]; tensor value_cache_internal_tensor_assign_14_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_14_end_mask_0"), val = tensor([false, true, false, true])]; tensor value_cache_internal_tensor_assign_14_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_14_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor value_states_55_cast_fp16 = transpose(perm = value_states_55_perm_0, x = var_3924_cast_fp16)[name = string("transpose_31")]; tensor value_cache_internal_tensor_assign_14_cast_fp16 = slice_update(begin = concat_107, begin_mask = value_cache_internal_tensor_assign_14_begin_mask_0, end = concat_108, end_mask = value_cache_internal_tensor_assign_14_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_14_squeeze_mask_0, stride = value_cache_internal_tensor_assign_14_stride_0, update = value_states_55_cast_fp16, x = coreml_update_state_73)[name = string("value_cache_internal_tensor_assign_14_cast_fp16")]; write_state(data = value_cache_internal_tensor_assign_14_cast_fp16, input = value_cache)[name = string("coreml_update_state_241_write_state")]; tensor coreml_update_state_75 = read_state(input = value_cache)[name = string("coreml_update_state_241")]; tensor var_3988_begin_0 = const()[name = string("op_3988_begin_0"), val = tensor([13, 0, 0, 0])]; tensor var_3988_end_0 = const()[name = string("op_3988_end_0"), val = tensor([14, 2, 2048, 64])]; tensor var_3988_end_mask_0 = const()[name = string("op_3988_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_3988_cast_fp16 = slice_by_index(begin = var_3988_begin_0, end = var_3988_end_0, end_mask = var_3988_end_mask_0, x = coreml_update_state_74)[name = string("op_3988_cast_fp16")]; tensor tile_26 = const()[name = string("tile_26"), val = tensor([1, 1])]; int32 var_3991_axis_0 = const()[name = string("op_3991_axis_0"), val = int32(1)]; tensor var_3991_cast_fp16_0, tensor var_3991_cast_fp16_1 = split(axis = var_3991_axis_0, split_sizes = tile_26, x = var_3988_cast_fp16)[name = string("op_3991_cast_fp16")]; tensor var_3998_begin_0 = const()[name = string("op_3998_begin_0"), val = tensor([13, 0, 0, 0])]; tensor var_3998_end_0 = const()[name = string("op_3998_end_0"), val = tensor([14, 2, 2048, 64])]; tensor var_3998_end_mask_0 = const()[name = string("op_3998_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_3998_cast_fp16 = slice_by_index(begin = var_3998_begin_0, end = var_3998_end_0, end_mask = var_3998_end_mask_0, x = coreml_update_state_75)[name = string("op_3998_cast_fp16")]; tensor tile_27 = const()[name = string("tile_27"), val = tensor([1, 1])]; int32 var_4001_axis_0 = const()[name = string("op_4001_axis_0"), val = int32(1)]; tensor var_4001_cast_fp16_0, tensor var_4001_cast_fp16_1 = split(axis = var_4001_axis_0, split_sizes = tile_27, x = var_3998_cast_fp16)[name = string("op_4001_cast_fp16")]; tensor var_4004_split_sizes_0 = const()[name = string("op_4004_split_sizes_0"), val = tensor([8, 8])]; int32 var_4004_axis_0 = const()[name = string("op_4004_axis_0"), val = int32(1)]; tensor var_4004_cast_fp16_0, tensor var_4004_cast_fp16_1 = split(axis = var_4004_axis_0, split_sizes = var_4004_split_sizes_0, x = query_states_55_cast_fp16)[name = string("op_4004_cast_fp16")]; bool attn_weights_209_transpose_x_0 = const()[name = string("attn_weights_209_transpose_x_0"), val = bool(false)]; bool attn_weights_209_transpose_y_0 = const()[name = string("attn_weights_209_transpose_y_0"), val = bool(false)]; tensor attn_weights_209_cast_fp16 = matmul(transpose_x = attn_weights_209_transpose_x_0, transpose_y = attn_weights_209_transpose_y_0, x = var_3991_cast_fp16_0, y = var_4004_cast_fp16_0)[name = string("attn_weights_209_cast_fp16")]; fp16 _inversed_attn_weights_211_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_211_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_211_cast_fp16 = mul(x = attn_weights_209_cast_fp16, y = _inversed_attn_weights_211_y_0_to_fp16)[name = string("_inversed_attn_weights_211_cast_fp16")]; tensor attn_weights_213_cast_fp16 = add(x = _inversed_attn_weights_211_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_213_cast_fp16")]; int32 var_4011 = const()[name = string("op_4011"), val = int32(2)]; tensor attn_weights_215_cast_fp16 = softmax(axis = var_4011, x = attn_weights_213_cast_fp16)[name = string("attn_weights_215_cast_fp16")]; bool var_4017_transpose_x_1 = const()[name = string("op_4017_transpose_x_1"), val = bool(true)]; bool var_4017_transpose_y_1 = const()[name = string("op_4017_transpose_y_1"), val = bool(false)]; tensor var_4017_cast_fp16 = matmul(transpose_x = var_4017_transpose_x_1, transpose_y = var_4017_transpose_y_1, x = attn_weights_215_cast_fp16, y = var_4001_cast_fp16_0)[name = string("op_4017_cast_fp16")]; bool attn_weights_217_transpose_x_0 = const()[name = string("attn_weights_217_transpose_x_0"), val = bool(false)]; bool attn_weights_217_transpose_y_0 = const()[name = string("attn_weights_217_transpose_y_0"), val = bool(false)]; tensor attn_weights_217_cast_fp16 = matmul(transpose_x = attn_weights_217_transpose_x_0, transpose_y = attn_weights_217_transpose_y_0, x = var_3991_cast_fp16_1, y = var_4004_cast_fp16_1)[name = string("attn_weights_217_cast_fp16")]; fp16 _inversed_attn_weights_219_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_219_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_219_cast_fp16 = mul(x = attn_weights_217_cast_fp16, y = _inversed_attn_weights_219_y_0_to_fp16)[name = string("_inversed_attn_weights_219_cast_fp16")]; tensor attn_weights_221_cast_fp16 = add(x = _inversed_attn_weights_219_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_221_cast_fp16")]; int32 var_4023 = const()[name = string("op_4023"), val = int32(2)]; tensor attn_weights_223_cast_fp16 = softmax(axis = var_4023, x = attn_weights_221_cast_fp16)[name = string("attn_weights_223_cast_fp16")]; bool attn_output_79_transpose_x_1 = const()[name = string("attn_output_79_transpose_x_1"), val = bool(true)]; bool attn_output_79_transpose_y_1 = const()[name = string("attn_output_79_transpose_y_1"), val = bool(false)]; tensor attn_output_79_cast_fp16 = matmul(transpose_x = attn_output_79_transpose_x_1, transpose_y = attn_output_79_transpose_y_1, x = attn_weights_223_cast_fp16, y = var_4001_cast_fp16_1)[name = string("attn_output_79_cast_fp16")]; int32 var_4031 = const()[name = string("op_4031"), val = int32(1)]; bool attn_output_81_interleave_0 = const()[name = string("attn_output_81_interleave_0"), val = bool(false)]; tensor attn_output_81_cast_fp16 = concat(axis = var_4031, interleave = attn_output_81_interleave_0, values = (var_4017_cast_fp16, attn_output_79_cast_fp16))[name = string("attn_output_81_cast_fp16")]; tensor var_4035_perm_0 = const()[name = string("op_4035_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_4040 = const()[name = string("op_4040"), val = tensor([1, 1024, 1, 8])]; tensor var_4035_cast_fp16 = transpose(perm = var_4035_perm_0, x = attn_output_81_cast_fp16)[name = string("transpose_30")]; tensor x_239_cast_fp16 = reshape(shape = var_4040, x = var_4035_cast_fp16)[name = string("x_239_cast_fp16")]; string hidden_states_81_pad_type_0 = const()[name = string("hidden_states_81_pad_type_0"), val = string("valid")]; tensor hidden_states_81_strides_0 = const()[name = string("hidden_states_81_strides_0"), val = tensor([1, 1])]; tensor hidden_states_81_pad_0 = const()[name = string("hidden_states_81_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_81_dilations_0 = const()[name = string("hidden_states_81_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_81_groups_0 = const()[name = string("hidden_states_81_groups_0"), val = int32(1)]; tensor var_4047_to_fp16 = const()[name = string("op_4047_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(399638400)))]; tensor hidden_states_81_cast_fp16 = conv(dilations = hidden_states_81_dilations_0, groups = hidden_states_81_groups_0, pad = hidden_states_81_pad_0, pad_type = hidden_states_81_pad_type_0, strides = hidden_states_81_strides_0, weight = var_4047_to_fp16, x = x_239_cast_fp16)[name = string("hidden_states_81_cast_fp16")]; tensor x_241_cast_fp16 = add(x = x_233_cast_fp16, y = hidden_states_81_cast_fp16)[name = string("x_241_cast_fp16")]; int32 var_4059 = const()[name = string("op_4059"), val = int32(1)]; fp16 const_143_promoted_to_fp16 = const()[name = string("const_143_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_4062_cast_fp16 = mul(x = x_241_cast_fp16, y = const_143_promoted_to_fp16)[name = string("op_4062_cast_fp16")]; bool x_243_interleave_0 = const()[name = string("x_243_interleave_0"), val = bool(false)]; tensor x_243_cast_fp16 = concat(axis = var_4059, interleave = x_243_interleave_0, values = (x_241_cast_fp16, var_4062_cast_fp16))[name = string("x_243_cast_fp16")]; tensor out_163_axes_0 = const()[name = string("out_163_axes_0"), val = tensor([1])]; fp16 var_4072_to_fp16 = const()[name = string("op_4072_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_163_cast_fp16 = layer_norm(axes = out_163_axes_0, epsilon = var_4072_to_fp16, x = x_243_cast_fp16)[name = string("out_163_cast_fp16")]; tensor layer_layers_13_post_attention_layernorm_weight_to_fp16 = const()[name = string("layer_layers_13_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(401735616)))]; tensor out_165_cast_fp16 = mul(x = out_163_cast_fp16, y = layer_layers_13_post_attention_layernorm_weight_to_fp16)[name = string("out_165_cast_fp16")]; tensor var_4078_split_sizes_0 = const()[name = string("op_4078_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_4078_axis_0 = const()[name = string("op_4078_axis_0"), val = int32(1)]; tensor var_4078_cast_fp16_0, tensor var_4078_cast_fp16_1 = split(axis = var_4078_axis_0, split_sizes = var_4078_split_sizes_0, x = out_165_cast_fp16)[name = string("op_4078_cast_fp16")]; string input_27_pad_type_0 = const()[name = string("input_27_pad_type_0"), val = string("valid")]; tensor input_27_strides_0 = const()[name = string("input_27_strides_0"), val = tensor([1, 1])]; tensor input_27_pad_0 = const()[name = string("input_27_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_27_dilations_0 = const()[name = string("input_27_dilations_0"), val = tensor([1, 1])]; int32 input_27_groups_0 = const()[name = string("input_27_groups_0"), val = int32(1)]; tensor var_4083_to_fp16 = const()[name = string("op_4083_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(401739776)))]; tensor input_27_cast_fp16 = conv(dilations = input_27_dilations_0, groups = input_27_groups_0, pad = input_27_pad_0, pad_type = input_27_pad_type_0, strides = input_27_strides_0, weight = var_4083_to_fp16, x = var_4078_cast_fp16_0)[name = string("input_27_cast_fp16")]; tensor var_4094_cast_fp16 = silu(x = input_27_cast_fp16)[name = string("op_4094_cast_fp16")]; string var_4099_pad_type_0 = const()[name = string("op_4099_pad_type_0"), val = string("valid")]; tensor var_4099_strides_0 = const()[name = string("op_4099_strides_0"), val = tensor([1, 1])]; tensor var_4099_pad_0 = const()[name = string("op_4099_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_4099_dilations_0 = const()[name = string("op_4099_dilations_0"), val = tensor([1, 1])]; int32 var_4099_groups_0 = const()[name = string("op_4099_groups_0"), val = int32(1)]; tensor var_4082_to_fp16 = const()[name = string("op_4082_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(410128448)))]; tensor var_4099_cast_fp16 = conv(dilations = var_4099_dilations_0, groups = var_4099_groups_0, pad = var_4099_pad_0, pad_type = var_4099_pad_type_0, strides = var_4099_strides_0, weight = var_4082_to_fp16, x = var_4078_cast_fp16_0)[name = string("op_4099_cast_fp16")]; tensor x_249_cast_fp16 = mul(x = var_4094_cast_fp16, y = var_4099_cast_fp16)[name = string("x_249_cast_fp16")]; string hidden_states_83_pad_type_0 = const()[name = string("hidden_states_83_pad_type_0"), val = string("valid")]; tensor hidden_states_83_strides_0 = const()[name = string("hidden_states_83_strides_0"), val = tensor([1, 1])]; tensor hidden_states_83_pad_0 = const()[name = string("hidden_states_83_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_83_dilations_0 = const()[name = string("hidden_states_83_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_83_groups_0 = const()[name = string("hidden_states_83_groups_0"), val = int32(1)]; tensor var_4081_to_fp16 = const()[name = string("op_4081_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(418517120)))]; tensor hidden_states_83_cast_fp16 = conv(dilations = hidden_states_83_dilations_0, groups = hidden_states_83_groups_0, pad = hidden_states_83_pad_0, pad_type = hidden_states_83_pad_type_0, strides = hidden_states_83_strides_0, weight = var_4081_to_fp16, x = x_249_cast_fp16)[name = string("hidden_states_83_cast_fp16")]; tensor x_251_cast_fp16 = add(x = x_241_cast_fp16, y = hidden_states_83_cast_fp16)[name = string("x_251_cast_fp16")]; int32 var_4112 = const()[name = string("op_4112"), val = int32(1)]; fp16 const_144_promoted_to_fp16 = const()[name = string("const_144_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_4115_cast_fp16 = mul(x = x_251_cast_fp16, y = const_144_promoted_to_fp16)[name = string("op_4115_cast_fp16")]; bool x_253_interleave_0 = const()[name = string("x_253_interleave_0"), val = bool(false)]; tensor x_253_cast_fp16 = concat(axis = var_4112, interleave = x_253_interleave_0, values = (x_251_cast_fp16, var_4115_cast_fp16))[name = string("x_253_cast_fp16")]; tensor out_169_axes_0 = const()[name = string("out_169_axes_0"), val = tensor([1])]; fp16 var_4125_to_fp16 = const()[name = string("op_4125_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_169_cast_fp16 = layer_norm(axes = out_169_axes_0, epsilon = var_4125_to_fp16, x = x_253_cast_fp16)[name = string("out_169_cast_fp16")]; tensor layer_layers_14_input_layernorm_weight_to_fp16 = const()[name = string("layer_layers_14_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(426905792)))]; tensor out_171_cast_fp16 = mul(x = out_169_cast_fp16, y = layer_layers_14_input_layernorm_weight_to_fp16)[name = string("out_171_cast_fp16")]; tensor var_4131_split_sizes_0 = const()[name = string("op_4131_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_4131_axis_0 = const()[name = string("op_4131_axis_0"), val = int32(1)]; tensor var_4131_cast_fp16_0, tensor var_4131_cast_fp16_1 = split(axis = var_4131_axis_0, split_sizes = var_4131_split_sizes_0, x = out_171_cast_fp16)[name = string("op_4131_cast_fp16")]; string query_states_57_pad_type_0 = const()[name = string("query_states_57_pad_type_0"), val = string("valid")]; tensor query_states_57_strides_0 = const()[name = string("query_states_57_strides_0"), val = tensor([1, 1])]; tensor query_states_57_pad_0 = const()[name = string("query_states_57_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_states_57_dilations_0 = const()[name = string("query_states_57_dilations_0"), val = tensor([1, 1])]; int32 query_states_57_groups_0 = const()[name = string("query_states_57_groups_0"), val = int32(1)]; tensor var_4153_to_fp16 = const()[name = string("op_4153_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(426909952)))]; tensor query_states_57_cast_fp16 = conv(dilations = query_states_57_dilations_0, groups = query_states_57_groups_0, pad = query_states_57_pad_0, pad_type = query_states_57_pad_type_0, strides = query_states_57_strides_0, weight = var_4153_to_fp16, x = var_4131_cast_fp16_0)[name = string("query_states_57_cast_fp16")]; string key_states_57_pad_type_0 = const()[name = string("key_states_57_pad_type_0"), val = string("valid")]; tensor key_states_57_strides_0 = const()[name = string("key_states_57_strides_0"), val = tensor([1, 1])]; tensor key_states_57_pad_0 = const()[name = string("key_states_57_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_states_57_dilations_0 = const()[name = string("key_states_57_dilations_0"), val = tensor([1, 1])]; int32 key_states_57_groups_0 = const()[name = string("key_states_57_groups_0"), val = int32(1)]; tensor var_4164_to_fp16 = const()[name = string("op_4164_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(429007168)))]; tensor key_states_57_cast_fp16 = conv(dilations = key_states_57_dilations_0, groups = key_states_57_groups_0, pad = key_states_57_pad_0, pad_type = key_states_57_pad_type_0, strides = key_states_57_strides_0, weight = var_4164_to_fp16, x = var_4131_cast_fp16_0)[name = string("key_states_57_cast_fp16")]; string value_states_57_pad_type_0 = const()[name = string("value_states_57_pad_type_0"), val = string("valid")]; tensor value_states_57_strides_0 = const()[name = string("value_states_57_strides_0"), val = tensor([1, 1])]; tensor value_states_57_pad_0 = const()[name = string("value_states_57_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_states_57_dilations_0 = const()[name = string("value_states_57_dilations_0"), val = tensor([1, 1])]; int32 value_states_57_groups_0 = const()[name = string("value_states_57_groups_0"), val = int32(1)]; tensor var_4175_to_fp16 = const()[name = string("op_4175_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(429269376)))]; tensor value_states_57_cast_fp16 = conv(dilations = value_states_57_dilations_0, groups = value_states_57_groups_0, pad = value_states_57_pad_0, pad_type = value_states_57_pad_type_0, strides = value_states_57_strides_0, weight = var_4175_to_fp16, x = var_4131_cast_fp16_0)[name = string("value_states_57_cast_fp16")]; tensor var_4183 = const()[name = string("op_4183"), val = tensor([1, 16, 64, 8])]; tensor embed_57_cast_fp16 = reshape(shape = var_4183, x = query_states_57_cast_fp16)[name = string("embed_57_cast_fp16")]; tensor var_4187 = const()[name = string("op_4187"), val = tensor([1, 2, 64, 8])]; tensor var_4188_cast_fp16 = reshape(shape = var_4187, x = key_states_57_cast_fp16)[name = string("op_4188_cast_fp16")]; tensor embed_59_perm_0 = const()[name = string("embed_59_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_4194 = const()[name = string("op_4194"), val = tensor([1, 2, 64, 8])]; tensor var_4195_cast_fp16 = reshape(shape = var_4194, x = value_states_57_cast_fp16)[name = string("op_4195_cast_fp16")]; tensor value_states_59_perm_0 = const()[name = string("value_states_59_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_4199_cast_fp16 = mul(x = embed_57_cast_fp16, y = cos_1_cast_fp16)[name = string("op_4199_cast_fp16")]; tensor var_4200_split_sizes_0 = const()[name = string("op_4200_split_sizes_0"), val = tensor([32, 32])]; int32 var_4200_axis_0 = const()[name = string("op_4200_axis_0"), val = int32(-2)]; tensor var_4200_cast_fp16_0, tensor var_4200_cast_fp16_1 = split(axis = var_4200_axis_0, split_sizes = var_4200_split_sizes_0, x = embed_57_cast_fp16)[name = string("op_4200_cast_fp16")]; fp16 const_147_promoted_to_fp16 = const()[name = string("const_147_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_4202_cast_fp16 = mul(x = var_4200_cast_fp16_1, y = const_147_promoted_to_fp16)[name = string("op_4202_cast_fp16")]; int32 var_4204 = const()[name = string("op_4204"), val = int32(-2)]; bool var_4205_interleave_0 = const()[name = string("op_4205_interleave_0"), val = bool(false)]; tensor var_4205_cast_fp16 = concat(axis = var_4204, interleave = var_4205_interleave_0, values = (var_4202_cast_fp16, var_4200_cast_fp16_0))[name = string("op_4205_cast_fp16")]; tensor var_4206_cast_fp16 = mul(x = var_4205_cast_fp16, y = sin_1_cast_fp16)[name = string("op_4206_cast_fp16")]; tensor query_states_59_cast_fp16 = add(x = var_4199_cast_fp16, y = var_4206_cast_fp16)[name = string("query_states_59_cast_fp16")]; tensor embed_59_cast_fp16 = transpose(perm = embed_59_perm_0, x = var_4188_cast_fp16)[name = string("transpose_29")]; tensor var_4209_cast_fp16 = mul(x = embed_59_cast_fp16, y = cos_cast_fp16)[name = string("op_4209_cast_fp16")]; tensor var_4210_split_sizes_0 = const()[name = string("op_4210_split_sizes_0"), val = tensor([32, 32])]; int32 var_4210_axis_0 = const()[name = string("op_4210_axis_0"), val = int32(-1)]; tensor var_4210_cast_fp16_0, tensor var_4210_cast_fp16_1 = split(axis = var_4210_axis_0, split_sizes = var_4210_split_sizes_0, x = embed_59_cast_fp16)[name = string("op_4210_cast_fp16")]; fp16 const_148_promoted_to_fp16 = const()[name = string("const_148_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_4212_cast_fp16 = mul(x = var_4210_cast_fp16_1, y = const_148_promoted_to_fp16)[name = string("op_4212_cast_fp16")]; int32 var_4214 = const()[name = string("op_4214"), val = int32(-1)]; bool var_4215_interleave_0 = const()[name = string("op_4215_interleave_0"), val = bool(false)]; tensor var_4215_cast_fp16 = concat(axis = var_4214, interleave = var_4215_interleave_0, values = (var_4212_cast_fp16, var_4210_cast_fp16_0))[name = string("op_4215_cast_fp16")]; tensor var_4216_cast_fp16 = mul(x = var_4215_cast_fp16, y = sin_cast_fp16)[name = string("op_4216_cast_fp16")]; tensor key_states_59_cast_fp16 = add(x = var_4209_cast_fp16, y = var_4216_cast_fp16)[name = string("key_states_59_cast_fp16")]; tensor expand_dims_141 = const()[name = string("expand_dims_141"), val = tensor([14])]; tensor expand_dims_142 = const()[name = string("expand_dims_142"), val = tensor([0])]; tensor expand_dims_144 = const()[name = string("expand_dims_144"), val = tensor([15])]; tensor concat_115_values3_0 = const()[name = string("concat_115_values3_0"), val = tensor([0])]; int32 concat_115_axis_0 = const()[name = string("concat_115_axis_0"), val = int32(0)]; bool concat_115_interleave_0 = const()[name = string("concat_115_interleave_0"), val = bool(false)]; tensor concat_115 = concat(axis = concat_115_axis_0, interleave = concat_115_interleave_0, values = (expand_dims_141, expand_dims_142, position_id, concat_115_values3_0))[name = string("concat_115")]; tensor concat_116_values1_0 = const()[name = string("concat_116_values1_0"), val = tensor([0])]; tensor concat_116_values3_0 = const()[name = string("concat_116_values3_0"), val = tensor([0])]; int32 concat_116_axis_0 = const()[name = string("concat_116_axis_0"), val = int32(0)]; bool concat_116_interleave_0 = const()[name = string("concat_116_interleave_0"), val = bool(false)]; tensor concat_116 = concat(axis = concat_116_axis_0, interleave = concat_116_interleave_0, values = (expand_dims_144, concat_116_values1_0, var_426, concat_116_values3_0))[name = string("concat_116")]; tensor key_cache_internal_tensor_assign_15_stride_0 = const()[name = string("key_cache_internal_tensor_assign_15_stride_0"), val = tensor([1, 1, 1, 1])]; tensor key_cache_internal_tensor_assign_15_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_15_begin_mask_0"), val = tensor([false, false, false, true])]; tensor key_cache_internal_tensor_assign_15_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_15_end_mask_0"), val = tensor([false, true, false, true])]; tensor key_cache_internal_tensor_assign_15_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_15_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor key_cache_internal_tensor_assign_15_cast_fp16 = slice_update(begin = concat_115, begin_mask = key_cache_internal_tensor_assign_15_begin_mask_0, end = concat_116, end_mask = key_cache_internal_tensor_assign_15_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_15_squeeze_mask_0, stride = key_cache_internal_tensor_assign_15_stride_0, update = key_states_59_cast_fp16, x = coreml_update_state_74)[name = string("key_cache_internal_tensor_assign_15_cast_fp16")]; write_state(data = key_cache_internal_tensor_assign_15_cast_fp16, input = key_cache)[name = string("coreml_update_state_242_write_state")]; tensor coreml_update_state_76 = read_state(input = key_cache)[name = string("coreml_update_state_242")]; tensor value_cache_internal_tensor_assign_15_stride_0 = const()[name = string("value_cache_internal_tensor_assign_15_stride_0"), val = tensor([1, 1, 1, 1])]; tensor value_cache_internal_tensor_assign_15_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_15_begin_mask_0"), val = tensor([false, false, false, true])]; tensor value_cache_internal_tensor_assign_15_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_15_end_mask_0"), val = tensor([false, true, false, true])]; tensor value_cache_internal_tensor_assign_15_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_15_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor value_states_59_cast_fp16 = transpose(perm = value_states_59_perm_0, x = var_4195_cast_fp16)[name = string("transpose_28")]; tensor value_cache_internal_tensor_assign_15_cast_fp16 = slice_update(begin = concat_115, begin_mask = value_cache_internal_tensor_assign_15_begin_mask_0, end = concat_116, end_mask = value_cache_internal_tensor_assign_15_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_15_squeeze_mask_0, stride = value_cache_internal_tensor_assign_15_stride_0, update = value_states_59_cast_fp16, x = coreml_update_state_75)[name = string("value_cache_internal_tensor_assign_15_cast_fp16")]; write_state(data = value_cache_internal_tensor_assign_15_cast_fp16, input = value_cache)[name = string("coreml_update_state_243_write_state")]; tensor coreml_update_state_77 = read_state(input = value_cache)[name = string("coreml_update_state_243")]; tensor var_4259_begin_0 = const()[name = string("op_4259_begin_0"), val = tensor([14, 0, 0, 0])]; tensor var_4259_end_0 = const()[name = string("op_4259_end_0"), val = tensor([15, 2, 2048, 64])]; tensor var_4259_end_mask_0 = const()[name = string("op_4259_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_4259_cast_fp16 = slice_by_index(begin = var_4259_begin_0, end = var_4259_end_0, end_mask = var_4259_end_mask_0, x = coreml_update_state_76)[name = string("op_4259_cast_fp16")]; tensor tile_28 = const()[name = string("tile_28"), val = tensor([1, 1])]; int32 var_4262_axis_0 = const()[name = string("op_4262_axis_0"), val = int32(1)]; tensor var_4262_cast_fp16_0, tensor var_4262_cast_fp16_1 = split(axis = var_4262_axis_0, split_sizes = tile_28, x = var_4259_cast_fp16)[name = string("op_4262_cast_fp16")]; tensor var_4269_begin_0 = const()[name = string("op_4269_begin_0"), val = tensor([14, 0, 0, 0])]; tensor var_4269_end_0 = const()[name = string("op_4269_end_0"), val = tensor([15, 2, 2048, 64])]; tensor var_4269_end_mask_0 = const()[name = string("op_4269_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_4269_cast_fp16 = slice_by_index(begin = var_4269_begin_0, end = var_4269_end_0, end_mask = var_4269_end_mask_0, x = coreml_update_state_77)[name = string("op_4269_cast_fp16")]; tensor tile_29 = const()[name = string("tile_29"), val = tensor([1, 1])]; int32 var_4272_axis_0 = const()[name = string("op_4272_axis_0"), val = int32(1)]; tensor var_4272_cast_fp16_0, tensor var_4272_cast_fp16_1 = split(axis = var_4272_axis_0, split_sizes = tile_29, x = var_4269_cast_fp16)[name = string("op_4272_cast_fp16")]; tensor var_4275_split_sizes_0 = const()[name = string("op_4275_split_sizes_0"), val = tensor([8, 8])]; int32 var_4275_axis_0 = const()[name = string("op_4275_axis_0"), val = int32(1)]; tensor var_4275_cast_fp16_0, tensor var_4275_cast_fp16_1 = split(axis = var_4275_axis_0, split_sizes = var_4275_split_sizes_0, x = query_states_59_cast_fp16)[name = string("op_4275_cast_fp16")]; bool attn_weights_225_transpose_x_0 = const()[name = string("attn_weights_225_transpose_x_0"), val = bool(false)]; bool attn_weights_225_transpose_y_0 = const()[name = string("attn_weights_225_transpose_y_0"), val = bool(false)]; tensor attn_weights_225_cast_fp16 = matmul(transpose_x = attn_weights_225_transpose_x_0, transpose_y = attn_weights_225_transpose_y_0, x = var_4262_cast_fp16_0, y = var_4275_cast_fp16_0)[name = string("attn_weights_225_cast_fp16")]; fp16 _inversed_attn_weights_227_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_227_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_227_cast_fp16 = mul(x = attn_weights_225_cast_fp16, y = _inversed_attn_weights_227_y_0_to_fp16)[name = string("_inversed_attn_weights_227_cast_fp16")]; tensor attn_weights_229_cast_fp16 = add(x = _inversed_attn_weights_227_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_229_cast_fp16")]; int32 var_4282 = const()[name = string("op_4282"), val = int32(2)]; tensor attn_weights_231_cast_fp16 = softmax(axis = var_4282, x = attn_weights_229_cast_fp16)[name = string("attn_weights_231_cast_fp16")]; bool var_4288_transpose_x_1 = const()[name = string("op_4288_transpose_x_1"), val = bool(true)]; bool var_4288_transpose_y_1 = const()[name = string("op_4288_transpose_y_1"), val = bool(false)]; tensor var_4288_cast_fp16 = matmul(transpose_x = var_4288_transpose_x_1, transpose_y = var_4288_transpose_y_1, x = attn_weights_231_cast_fp16, y = var_4272_cast_fp16_0)[name = string("op_4288_cast_fp16")]; bool attn_weights_233_transpose_x_0 = const()[name = string("attn_weights_233_transpose_x_0"), val = bool(false)]; bool attn_weights_233_transpose_y_0 = const()[name = string("attn_weights_233_transpose_y_0"), val = bool(false)]; tensor attn_weights_233_cast_fp16 = matmul(transpose_x = attn_weights_233_transpose_x_0, transpose_y = attn_weights_233_transpose_y_0, x = var_4262_cast_fp16_1, y = var_4275_cast_fp16_1)[name = string("attn_weights_233_cast_fp16")]; fp16 _inversed_attn_weights_235_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_235_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_235_cast_fp16 = mul(x = attn_weights_233_cast_fp16, y = _inversed_attn_weights_235_y_0_to_fp16)[name = string("_inversed_attn_weights_235_cast_fp16")]; tensor attn_weights_237_cast_fp16 = add(x = _inversed_attn_weights_235_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_237_cast_fp16")]; int32 var_4294 = const()[name = string("op_4294"), val = int32(2)]; tensor attn_weights_239_cast_fp16 = softmax(axis = var_4294, x = attn_weights_237_cast_fp16)[name = string("attn_weights_239_cast_fp16")]; bool attn_output_85_transpose_x_1 = const()[name = string("attn_output_85_transpose_x_1"), val = bool(true)]; bool attn_output_85_transpose_y_1 = const()[name = string("attn_output_85_transpose_y_1"), val = bool(false)]; tensor attn_output_85_cast_fp16 = matmul(transpose_x = attn_output_85_transpose_x_1, transpose_y = attn_output_85_transpose_y_1, x = attn_weights_239_cast_fp16, y = var_4272_cast_fp16_1)[name = string("attn_output_85_cast_fp16")]; int32 var_4302 = const()[name = string("op_4302"), val = int32(1)]; bool attn_output_87_interleave_0 = const()[name = string("attn_output_87_interleave_0"), val = bool(false)]; tensor attn_output_87_cast_fp16 = concat(axis = var_4302, interleave = attn_output_87_interleave_0, values = (var_4288_cast_fp16, attn_output_85_cast_fp16))[name = string("attn_output_87_cast_fp16")]; tensor var_4306_perm_0 = const()[name = string("op_4306_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_4311 = const()[name = string("op_4311"), val = tensor([1, 1024, 1, 8])]; tensor var_4306_cast_fp16 = transpose(perm = var_4306_perm_0, x = attn_output_87_cast_fp16)[name = string("transpose_27")]; tensor x_257_cast_fp16 = reshape(shape = var_4311, x = var_4306_cast_fp16)[name = string("x_257_cast_fp16")]; string hidden_states_87_pad_type_0 = const()[name = string("hidden_states_87_pad_type_0"), val = string("valid")]; tensor hidden_states_87_strides_0 = const()[name = string("hidden_states_87_strides_0"), val = tensor([1, 1])]; tensor hidden_states_87_pad_0 = const()[name = string("hidden_states_87_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_87_dilations_0 = const()[name = string("hidden_states_87_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_87_groups_0 = const()[name = string("hidden_states_87_groups_0"), val = int32(1)]; tensor var_4318_to_fp16 = const()[name = string("op_4318_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(429531584)))]; tensor hidden_states_87_cast_fp16 = conv(dilations = hidden_states_87_dilations_0, groups = hidden_states_87_groups_0, pad = hidden_states_87_pad_0, pad_type = hidden_states_87_pad_type_0, strides = hidden_states_87_strides_0, weight = var_4318_to_fp16, x = x_257_cast_fp16)[name = string("hidden_states_87_cast_fp16")]; tensor x_259_cast_fp16 = add(x = x_251_cast_fp16, y = hidden_states_87_cast_fp16)[name = string("x_259_cast_fp16")]; int32 var_4330 = const()[name = string("op_4330"), val = int32(1)]; fp16 const_153_promoted_to_fp16 = const()[name = string("const_153_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_4333_cast_fp16 = mul(x = x_259_cast_fp16, y = const_153_promoted_to_fp16)[name = string("op_4333_cast_fp16")]; bool x_261_interleave_0 = const()[name = string("x_261_interleave_0"), val = bool(false)]; tensor x_261_cast_fp16 = concat(axis = var_4330, interleave = x_261_interleave_0, values = (x_259_cast_fp16, var_4333_cast_fp16))[name = string("x_261_cast_fp16")]; tensor out_175_axes_0 = const()[name = string("out_175_axes_0"), val = tensor([1])]; fp16 var_4343_to_fp16 = const()[name = string("op_4343_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_175_cast_fp16 = layer_norm(axes = out_175_axes_0, epsilon = var_4343_to_fp16, x = x_261_cast_fp16)[name = string("out_175_cast_fp16")]; tensor layer_layers_14_post_attention_layernorm_weight_to_fp16 = const()[name = string("layer_layers_14_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(431628800)))]; tensor out_177_cast_fp16 = mul(x = out_175_cast_fp16, y = layer_layers_14_post_attention_layernorm_weight_to_fp16)[name = string("out_177_cast_fp16")]; tensor var_4349_split_sizes_0 = const()[name = string("op_4349_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_4349_axis_0 = const()[name = string("op_4349_axis_0"), val = int32(1)]; tensor var_4349_cast_fp16_0, tensor var_4349_cast_fp16_1 = split(axis = var_4349_axis_0, split_sizes = var_4349_split_sizes_0, x = out_177_cast_fp16)[name = string("op_4349_cast_fp16")]; string input_29_pad_type_0 = const()[name = string("input_29_pad_type_0"), val = string("valid")]; tensor input_29_strides_0 = const()[name = string("input_29_strides_0"), val = tensor([1, 1])]; tensor input_29_pad_0 = const()[name = string("input_29_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_29_dilations_0 = const()[name = string("input_29_dilations_0"), val = tensor([1, 1])]; int32 input_29_groups_0 = const()[name = string("input_29_groups_0"), val = int32(1)]; tensor var_4354_to_fp16 = const()[name = string("op_4354_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(431632960)))]; tensor input_29_cast_fp16 = conv(dilations = input_29_dilations_0, groups = input_29_groups_0, pad = input_29_pad_0, pad_type = input_29_pad_type_0, strides = input_29_strides_0, weight = var_4354_to_fp16, x = var_4349_cast_fp16_0)[name = string("input_29_cast_fp16")]; tensor var_4365_cast_fp16 = silu(x = input_29_cast_fp16)[name = string("op_4365_cast_fp16")]; string var_4370_pad_type_0 = const()[name = string("op_4370_pad_type_0"), val = string("valid")]; tensor var_4370_strides_0 = const()[name = string("op_4370_strides_0"), val = tensor([1, 1])]; tensor var_4370_pad_0 = const()[name = string("op_4370_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_4370_dilations_0 = const()[name = string("op_4370_dilations_0"), val = tensor([1, 1])]; int32 var_4370_groups_0 = const()[name = string("op_4370_groups_0"), val = int32(1)]; tensor var_4353_to_fp16 = const()[name = string("op_4353_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(440021632)))]; tensor var_4370_cast_fp16 = conv(dilations = var_4370_dilations_0, groups = var_4370_groups_0, pad = var_4370_pad_0, pad_type = var_4370_pad_type_0, strides = var_4370_strides_0, weight = var_4353_to_fp16, x = var_4349_cast_fp16_0)[name = string("op_4370_cast_fp16")]; tensor x_267_cast_fp16 = mul(x = var_4365_cast_fp16, y = var_4370_cast_fp16)[name = string("x_267_cast_fp16")]; string hidden_states_89_pad_type_0 = const()[name = string("hidden_states_89_pad_type_0"), val = string("valid")]; tensor hidden_states_89_strides_0 = const()[name = string("hidden_states_89_strides_0"), val = tensor([1, 1])]; tensor hidden_states_89_pad_0 = const()[name = string("hidden_states_89_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_89_dilations_0 = const()[name = string("hidden_states_89_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_89_groups_0 = const()[name = string("hidden_states_89_groups_0"), val = int32(1)]; tensor var_4352_to_fp16 = const()[name = string("op_4352_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(448410304)))]; tensor hidden_states_89_cast_fp16 = conv(dilations = hidden_states_89_dilations_0, groups = hidden_states_89_groups_0, pad = hidden_states_89_pad_0, pad_type = hidden_states_89_pad_type_0, strides = hidden_states_89_strides_0, weight = var_4352_to_fp16, x = x_267_cast_fp16)[name = string("hidden_states_89_cast_fp16")]; tensor x_269_cast_fp16 = add(x = x_259_cast_fp16, y = hidden_states_89_cast_fp16)[name = string("x_269_cast_fp16")]; int32 var_4383 = const()[name = string("op_4383"), val = int32(1)]; fp16 const_154_promoted_to_fp16 = const()[name = string("const_154_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_4386_cast_fp16 = mul(x = x_269_cast_fp16, y = const_154_promoted_to_fp16)[name = string("op_4386_cast_fp16")]; bool x_271_interleave_0 = const()[name = string("x_271_interleave_0"), val = bool(false)]; tensor x_271_cast_fp16 = concat(axis = var_4383, interleave = x_271_interleave_0, values = (x_269_cast_fp16, var_4386_cast_fp16))[name = string("x_271_cast_fp16")]; tensor out_181_axes_0 = const()[name = string("out_181_axes_0"), val = tensor([1])]; fp16 var_4396_to_fp16 = const()[name = string("op_4396_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_181_cast_fp16 = layer_norm(axes = out_181_axes_0, epsilon = var_4396_to_fp16, x = x_271_cast_fp16)[name = string("out_181_cast_fp16")]; tensor layer_layers_15_input_layernorm_weight_to_fp16 = const()[name = string("layer_layers_15_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(456798976)))]; tensor out_183_cast_fp16 = mul(x = out_181_cast_fp16, y = layer_layers_15_input_layernorm_weight_to_fp16)[name = string("out_183_cast_fp16")]; tensor var_4402_split_sizes_0 = const()[name = string("op_4402_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_4402_axis_0 = const()[name = string("op_4402_axis_0"), val = int32(1)]; tensor var_4402_cast_fp16_0, tensor var_4402_cast_fp16_1 = split(axis = var_4402_axis_0, split_sizes = var_4402_split_sizes_0, x = out_183_cast_fp16)[name = string("op_4402_cast_fp16")]; string query_states_61_pad_type_0 = const()[name = string("query_states_61_pad_type_0"), val = string("valid")]; tensor query_states_61_strides_0 = const()[name = string("query_states_61_strides_0"), val = tensor([1, 1])]; tensor query_states_61_pad_0 = const()[name = string("query_states_61_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_states_61_dilations_0 = const()[name = string("query_states_61_dilations_0"), val = tensor([1, 1])]; int32 query_states_61_groups_0 = const()[name = string("query_states_61_groups_0"), val = int32(1)]; tensor var_4424_to_fp16 = const()[name = string("op_4424_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(456803136)))]; tensor query_states_61_cast_fp16 = conv(dilations = query_states_61_dilations_0, groups = query_states_61_groups_0, pad = query_states_61_pad_0, pad_type = query_states_61_pad_type_0, strides = query_states_61_strides_0, weight = var_4424_to_fp16, x = var_4402_cast_fp16_0)[name = string("query_states_61_cast_fp16")]; string key_states_61_pad_type_0 = const()[name = string("key_states_61_pad_type_0"), val = string("valid")]; tensor key_states_61_strides_0 = const()[name = string("key_states_61_strides_0"), val = tensor([1, 1])]; tensor key_states_61_pad_0 = const()[name = string("key_states_61_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_states_61_dilations_0 = const()[name = string("key_states_61_dilations_0"), val = tensor([1, 1])]; int32 key_states_61_groups_0 = const()[name = string("key_states_61_groups_0"), val = int32(1)]; tensor var_4435_to_fp16 = const()[name = string("op_4435_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(458900352)))]; tensor key_states_61_cast_fp16 = conv(dilations = key_states_61_dilations_0, groups = key_states_61_groups_0, pad = key_states_61_pad_0, pad_type = key_states_61_pad_type_0, strides = key_states_61_strides_0, weight = var_4435_to_fp16, x = var_4402_cast_fp16_0)[name = string("key_states_61_cast_fp16")]; string value_states_61_pad_type_0 = const()[name = string("value_states_61_pad_type_0"), val = string("valid")]; tensor value_states_61_strides_0 = const()[name = string("value_states_61_strides_0"), val = tensor([1, 1])]; tensor value_states_61_pad_0 = const()[name = string("value_states_61_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_states_61_dilations_0 = const()[name = string("value_states_61_dilations_0"), val = tensor([1, 1])]; int32 value_states_61_groups_0 = const()[name = string("value_states_61_groups_0"), val = int32(1)]; tensor var_4446_to_fp16 = const()[name = string("op_4446_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(459162560)))]; tensor value_states_61_cast_fp16 = conv(dilations = value_states_61_dilations_0, groups = value_states_61_groups_0, pad = value_states_61_pad_0, pad_type = value_states_61_pad_type_0, strides = value_states_61_strides_0, weight = var_4446_to_fp16, x = var_4402_cast_fp16_0)[name = string("value_states_61_cast_fp16")]; tensor var_4454 = const()[name = string("op_4454"), val = tensor([1, 16, 64, 8])]; tensor embed_61_cast_fp16 = reshape(shape = var_4454, x = query_states_61_cast_fp16)[name = string("embed_61_cast_fp16")]; tensor var_4458 = const()[name = string("op_4458"), val = tensor([1, 2, 64, 8])]; tensor var_4459_cast_fp16 = reshape(shape = var_4458, x = key_states_61_cast_fp16)[name = string("op_4459_cast_fp16")]; tensor embed_63_perm_0 = const()[name = string("embed_63_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_4465 = const()[name = string("op_4465"), val = tensor([1, 2, 64, 8])]; tensor var_4466_cast_fp16 = reshape(shape = var_4465, x = value_states_61_cast_fp16)[name = string("op_4466_cast_fp16")]; tensor value_states_63_perm_0 = const()[name = string("value_states_63_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_4470_cast_fp16 = mul(x = embed_61_cast_fp16, y = cos_1_cast_fp16)[name = string("op_4470_cast_fp16")]; tensor var_4471_split_sizes_0 = const()[name = string("op_4471_split_sizes_0"), val = tensor([32, 32])]; int32 var_4471_axis_0 = const()[name = string("op_4471_axis_0"), val = int32(-2)]; tensor var_4471_cast_fp16_0, tensor var_4471_cast_fp16_1 = split(axis = var_4471_axis_0, split_sizes = var_4471_split_sizes_0, x = embed_61_cast_fp16)[name = string("op_4471_cast_fp16")]; fp16 const_157_promoted_to_fp16 = const()[name = string("const_157_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_4473_cast_fp16 = mul(x = var_4471_cast_fp16_1, y = const_157_promoted_to_fp16)[name = string("op_4473_cast_fp16")]; int32 var_4475 = const()[name = string("op_4475"), val = int32(-2)]; bool var_4476_interleave_0 = const()[name = string("op_4476_interleave_0"), val = bool(false)]; tensor var_4476_cast_fp16 = concat(axis = var_4475, interleave = var_4476_interleave_0, values = (var_4473_cast_fp16, var_4471_cast_fp16_0))[name = string("op_4476_cast_fp16")]; tensor var_4477_cast_fp16 = mul(x = var_4476_cast_fp16, y = sin_1_cast_fp16)[name = string("op_4477_cast_fp16")]; tensor query_states_63_cast_fp16 = add(x = var_4470_cast_fp16, y = var_4477_cast_fp16)[name = string("query_states_63_cast_fp16")]; tensor embed_63_cast_fp16 = transpose(perm = embed_63_perm_0, x = var_4459_cast_fp16)[name = string("transpose_26")]; tensor var_4480_cast_fp16 = mul(x = embed_63_cast_fp16, y = cos_cast_fp16)[name = string("op_4480_cast_fp16")]; tensor var_4481_split_sizes_0 = const()[name = string("op_4481_split_sizes_0"), val = tensor([32, 32])]; int32 var_4481_axis_0 = const()[name = string("op_4481_axis_0"), val = int32(-1)]; tensor var_4481_cast_fp16_0, tensor var_4481_cast_fp16_1 = split(axis = var_4481_axis_0, split_sizes = var_4481_split_sizes_0, x = embed_63_cast_fp16)[name = string("op_4481_cast_fp16")]; fp16 const_158_promoted_to_fp16 = const()[name = string("const_158_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_4483_cast_fp16 = mul(x = var_4481_cast_fp16_1, y = const_158_promoted_to_fp16)[name = string("op_4483_cast_fp16")]; int32 var_4485 = const()[name = string("op_4485"), val = int32(-1)]; bool var_4486_interleave_0 = const()[name = string("op_4486_interleave_0"), val = bool(false)]; tensor var_4486_cast_fp16 = concat(axis = var_4485, interleave = var_4486_interleave_0, values = (var_4483_cast_fp16, var_4481_cast_fp16_0))[name = string("op_4486_cast_fp16")]; tensor var_4487_cast_fp16 = mul(x = var_4486_cast_fp16, y = sin_cast_fp16)[name = string("op_4487_cast_fp16")]; tensor key_states_63_cast_fp16 = add(x = var_4480_cast_fp16, y = var_4487_cast_fp16)[name = string("key_states_63_cast_fp16")]; tensor expand_dims_151 = const()[name = string("expand_dims_151"), val = tensor([15])]; tensor expand_dims_152 = const()[name = string("expand_dims_152"), val = tensor([0])]; tensor expand_dims_154 = const()[name = string("expand_dims_154"), val = tensor([16])]; tensor concat_123_values3_0 = const()[name = string("concat_123_values3_0"), val = tensor([0])]; int32 concat_123_axis_0 = const()[name = string("concat_123_axis_0"), val = int32(0)]; bool concat_123_interleave_0 = const()[name = string("concat_123_interleave_0"), val = bool(false)]; tensor concat_123 = concat(axis = concat_123_axis_0, interleave = concat_123_interleave_0, values = (expand_dims_151, expand_dims_152, position_id, concat_123_values3_0))[name = string("concat_123")]; tensor concat_124_values1_0 = const()[name = string("concat_124_values1_0"), val = tensor([0])]; tensor concat_124_values3_0 = const()[name = string("concat_124_values3_0"), val = tensor([0])]; int32 concat_124_axis_0 = const()[name = string("concat_124_axis_0"), val = int32(0)]; bool concat_124_interleave_0 = const()[name = string("concat_124_interleave_0"), val = bool(false)]; tensor concat_124 = concat(axis = concat_124_axis_0, interleave = concat_124_interleave_0, values = (expand_dims_154, concat_124_values1_0, var_426, concat_124_values3_0))[name = string("concat_124")]; tensor key_cache_internal_tensor_assign_16_stride_0 = const()[name = string("key_cache_internal_tensor_assign_16_stride_0"), val = tensor([1, 1, 1, 1])]; tensor key_cache_internal_tensor_assign_16_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_16_begin_mask_0"), val = tensor([false, false, false, true])]; tensor key_cache_internal_tensor_assign_16_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_16_end_mask_0"), val = tensor([false, true, false, true])]; tensor key_cache_internal_tensor_assign_16_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_16_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor key_cache_internal_tensor_assign_16_cast_fp16 = slice_update(begin = concat_123, begin_mask = key_cache_internal_tensor_assign_16_begin_mask_0, end = concat_124, end_mask = key_cache_internal_tensor_assign_16_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_16_squeeze_mask_0, stride = key_cache_internal_tensor_assign_16_stride_0, update = key_states_63_cast_fp16, x = coreml_update_state_76)[name = string("key_cache_internal_tensor_assign_16_cast_fp16")]; write_state(data = key_cache_internal_tensor_assign_16_cast_fp16, input = key_cache)[name = string("coreml_update_state_244_write_state")]; tensor coreml_update_state_78 = read_state(input = key_cache)[name = string("coreml_update_state_244")]; tensor value_cache_internal_tensor_assign_16_stride_0 = const()[name = string("value_cache_internal_tensor_assign_16_stride_0"), val = tensor([1, 1, 1, 1])]; tensor value_cache_internal_tensor_assign_16_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_16_begin_mask_0"), val = tensor([false, false, false, true])]; tensor value_cache_internal_tensor_assign_16_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_16_end_mask_0"), val = tensor([false, true, false, true])]; tensor value_cache_internal_tensor_assign_16_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_16_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor value_states_63_cast_fp16 = transpose(perm = value_states_63_perm_0, x = var_4466_cast_fp16)[name = string("transpose_25")]; tensor value_cache_internal_tensor_assign_16_cast_fp16 = slice_update(begin = concat_123, begin_mask = value_cache_internal_tensor_assign_16_begin_mask_0, end = concat_124, end_mask = value_cache_internal_tensor_assign_16_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_16_squeeze_mask_0, stride = value_cache_internal_tensor_assign_16_stride_0, update = value_states_63_cast_fp16, x = coreml_update_state_77)[name = string("value_cache_internal_tensor_assign_16_cast_fp16")]; write_state(data = value_cache_internal_tensor_assign_16_cast_fp16, input = value_cache)[name = string("coreml_update_state_245_write_state")]; tensor coreml_update_state_79 = read_state(input = value_cache)[name = string("coreml_update_state_245")]; tensor var_4530_begin_0 = const()[name = string("op_4530_begin_0"), val = tensor([15, 0, 0, 0])]; tensor var_4530_end_0 = const()[name = string("op_4530_end_0"), val = tensor([16, 2, 2048, 64])]; tensor var_4530_end_mask_0 = const()[name = string("op_4530_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_4530_cast_fp16 = slice_by_index(begin = var_4530_begin_0, end = var_4530_end_0, end_mask = var_4530_end_mask_0, x = coreml_update_state_78)[name = string("op_4530_cast_fp16")]; tensor tile_30 = const()[name = string("tile_30"), val = tensor([1, 1])]; int32 var_4533_axis_0 = const()[name = string("op_4533_axis_0"), val = int32(1)]; tensor var_4533_cast_fp16_0, tensor var_4533_cast_fp16_1 = split(axis = var_4533_axis_0, split_sizes = tile_30, x = var_4530_cast_fp16)[name = string("op_4533_cast_fp16")]; tensor var_4540_begin_0 = const()[name = string("op_4540_begin_0"), val = tensor([15, 0, 0, 0])]; tensor var_4540_end_0 = const()[name = string("op_4540_end_0"), val = tensor([16, 2, 2048, 64])]; tensor var_4540_end_mask_0 = const()[name = string("op_4540_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_4540_cast_fp16 = slice_by_index(begin = var_4540_begin_0, end = var_4540_end_0, end_mask = var_4540_end_mask_0, x = coreml_update_state_79)[name = string("op_4540_cast_fp16")]; tensor tile_31 = const()[name = string("tile_31"), val = tensor([1, 1])]; int32 var_4543_axis_0 = const()[name = string("op_4543_axis_0"), val = int32(1)]; tensor var_4543_cast_fp16_0, tensor var_4543_cast_fp16_1 = split(axis = var_4543_axis_0, split_sizes = tile_31, x = var_4540_cast_fp16)[name = string("op_4543_cast_fp16")]; tensor var_4546_split_sizes_0 = const()[name = string("op_4546_split_sizes_0"), val = tensor([8, 8])]; int32 var_4546_axis_0 = const()[name = string("op_4546_axis_0"), val = int32(1)]; tensor var_4546_cast_fp16_0, tensor var_4546_cast_fp16_1 = split(axis = var_4546_axis_0, split_sizes = var_4546_split_sizes_0, x = query_states_63_cast_fp16)[name = string("op_4546_cast_fp16")]; bool attn_weights_241_transpose_x_0 = const()[name = string("attn_weights_241_transpose_x_0"), val = bool(false)]; bool attn_weights_241_transpose_y_0 = const()[name = string("attn_weights_241_transpose_y_0"), val = bool(false)]; tensor attn_weights_241_cast_fp16 = matmul(transpose_x = attn_weights_241_transpose_x_0, transpose_y = attn_weights_241_transpose_y_0, x = var_4533_cast_fp16_0, y = var_4546_cast_fp16_0)[name = string("attn_weights_241_cast_fp16")]; fp16 _inversed_attn_weights_243_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_243_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_243_cast_fp16 = mul(x = attn_weights_241_cast_fp16, y = _inversed_attn_weights_243_y_0_to_fp16)[name = string("_inversed_attn_weights_243_cast_fp16")]; tensor attn_weights_245_cast_fp16 = add(x = _inversed_attn_weights_243_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_245_cast_fp16")]; int32 var_4553 = const()[name = string("op_4553"), val = int32(2)]; tensor attn_weights_247_cast_fp16 = softmax(axis = var_4553, x = attn_weights_245_cast_fp16)[name = string("attn_weights_247_cast_fp16")]; bool var_4559_transpose_x_1 = const()[name = string("op_4559_transpose_x_1"), val = bool(true)]; bool var_4559_transpose_y_1 = const()[name = string("op_4559_transpose_y_1"), val = bool(false)]; tensor var_4559_cast_fp16 = matmul(transpose_x = var_4559_transpose_x_1, transpose_y = var_4559_transpose_y_1, x = attn_weights_247_cast_fp16, y = var_4543_cast_fp16_0)[name = string("op_4559_cast_fp16")]; bool attn_weights_249_transpose_x_0 = const()[name = string("attn_weights_249_transpose_x_0"), val = bool(false)]; bool attn_weights_249_transpose_y_0 = const()[name = string("attn_weights_249_transpose_y_0"), val = bool(false)]; tensor attn_weights_249_cast_fp16 = matmul(transpose_x = attn_weights_249_transpose_x_0, transpose_y = attn_weights_249_transpose_y_0, x = var_4533_cast_fp16_1, y = var_4546_cast_fp16_1)[name = string("attn_weights_249_cast_fp16")]; fp16 _inversed_attn_weights_251_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_251_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_251_cast_fp16 = mul(x = attn_weights_249_cast_fp16, y = _inversed_attn_weights_251_y_0_to_fp16)[name = string("_inversed_attn_weights_251_cast_fp16")]; tensor attn_weights_253_cast_fp16 = add(x = _inversed_attn_weights_251_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_253_cast_fp16")]; int32 var_4565 = const()[name = string("op_4565"), val = int32(2)]; tensor attn_weights_255_cast_fp16 = softmax(axis = var_4565, x = attn_weights_253_cast_fp16)[name = string("attn_weights_255_cast_fp16")]; bool attn_output_91_transpose_x_1 = const()[name = string("attn_output_91_transpose_x_1"), val = bool(true)]; bool attn_output_91_transpose_y_1 = const()[name = string("attn_output_91_transpose_y_1"), val = bool(false)]; tensor attn_output_91_cast_fp16 = matmul(transpose_x = attn_output_91_transpose_x_1, transpose_y = attn_output_91_transpose_y_1, x = attn_weights_255_cast_fp16, y = var_4543_cast_fp16_1)[name = string("attn_output_91_cast_fp16")]; int32 var_4573 = const()[name = string("op_4573"), val = int32(1)]; bool attn_output_93_interleave_0 = const()[name = string("attn_output_93_interleave_0"), val = bool(false)]; tensor attn_output_93_cast_fp16 = concat(axis = var_4573, interleave = attn_output_93_interleave_0, values = (var_4559_cast_fp16, attn_output_91_cast_fp16))[name = string("attn_output_93_cast_fp16")]; tensor var_4577_perm_0 = const()[name = string("op_4577_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_4582 = const()[name = string("op_4582"), val = tensor([1, 1024, 1, 8])]; tensor var_4577_cast_fp16 = transpose(perm = var_4577_perm_0, x = attn_output_93_cast_fp16)[name = string("transpose_24")]; tensor x_275_cast_fp16 = reshape(shape = var_4582, x = var_4577_cast_fp16)[name = string("x_275_cast_fp16")]; string hidden_states_93_pad_type_0 = const()[name = string("hidden_states_93_pad_type_0"), val = string("valid")]; tensor hidden_states_93_strides_0 = const()[name = string("hidden_states_93_strides_0"), val = tensor([1, 1])]; tensor hidden_states_93_pad_0 = const()[name = string("hidden_states_93_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_93_dilations_0 = const()[name = string("hidden_states_93_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_93_groups_0 = const()[name = string("hidden_states_93_groups_0"), val = int32(1)]; tensor var_4589_to_fp16 = const()[name = string("op_4589_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(459424768)))]; tensor hidden_states_93_cast_fp16 = conv(dilations = hidden_states_93_dilations_0, groups = hidden_states_93_groups_0, pad = hidden_states_93_pad_0, pad_type = hidden_states_93_pad_type_0, strides = hidden_states_93_strides_0, weight = var_4589_to_fp16, x = x_275_cast_fp16)[name = string("hidden_states_93_cast_fp16")]; tensor x_277_cast_fp16 = add(x = x_269_cast_fp16, y = hidden_states_93_cast_fp16)[name = string("x_277_cast_fp16")]; int32 var_4601 = const()[name = string("op_4601"), val = int32(1)]; fp16 const_163_promoted_to_fp16 = const()[name = string("const_163_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_4604_cast_fp16 = mul(x = x_277_cast_fp16, y = const_163_promoted_to_fp16)[name = string("op_4604_cast_fp16")]; bool x_279_interleave_0 = const()[name = string("x_279_interleave_0"), val = bool(false)]; tensor x_279_cast_fp16 = concat(axis = var_4601, interleave = x_279_interleave_0, values = (x_277_cast_fp16, var_4604_cast_fp16))[name = string("x_279_cast_fp16")]; tensor out_187_axes_0 = const()[name = string("out_187_axes_0"), val = tensor([1])]; fp16 var_4614_to_fp16 = const()[name = string("op_4614_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_187_cast_fp16 = layer_norm(axes = out_187_axes_0, epsilon = var_4614_to_fp16, x = x_279_cast_fp16)[name = string("out_187_cast_fp16")]; tensor layer_layers_15_post_attention_layernorm_weight_to_fp16 = const()[name = string("layer_layers_15_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(461521984)))]; tensor out_189_cast_fp16 = mul(x = out_187_cast_fp16, y = layer_layers_15_post_attention_layernorm_weight_to_fp16)[name = string("out_189_cast_fp16")]; tensor var_4620_split_sizes_0 = const()[name = string("op_4620_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_4620_axis_0 = const()[name = string("op_4620_axis_0"), val = int32(1)]; tensor var_4620_cast_fp16_0, tensor var_4620_cast_fp16_1 = split(axis = var_4620_axis_0, split_sizes = var_4620_split_sizes_0, x = out_189_cast_fp16)[name = string("op_4620_cast_fp16")]; string input_31_pad_type_0 = const()[name = string("input_31_pad_type_0"), val = string("valid")]; tensor input_31_strides_0 = const()[name = string("input_31_strides_0"), val = tensor([1, 1])]; tensor input_31_pad_0 = const()[name = string("input_31_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_31_dilations_0 = const()[name = string("input_31_dilations_0"), val = tensor([1, 1])]; int32 input_31_groups_0 = const()[name = string("input_31_groups_0"), val = int32(1)]; tensor var_4625_to_fp16 = const()[name = string("op_4625_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(461526144)))]; tensor input_31_cast_fp16 = conv(dilations = input_31_dilations_0, groups = input_31_groups_0, pad = input_31_pad_0, pad_type = input_31_pad_type_0, strides = input_31_strides_0, weight = var_4625_to_fp16, x = var_4620_cast_fp16_0)[name = string("input_31_cast_fp16")]; tensor var_4636_cast_fp16 = silu(x = input_31_cast_fp16)[name = string("op_4636_cast_fp16")]; string var_4641_pad_type_0 = const()[name = string("op_4641_pad_type_0"), val = string("valid")]; tensor var_4641_strides_0 = const()[name = string("op_4641_strides_0"), val = tensor([1, 1])]; tensor var_4641_pad_0 = const()[name = string("op_4641_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_4641_dilations_0 = const()[name = string("op_4641_dilations_0"), val = tensor([1, 1])]; int32 var_4641_groups_0 = const()[name = string("op_4641_groups_0"), val = int32(1)]; tensor var_4624_to_fp16 = const()[name = string("op_4624_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(469914816)))]; tensor var_4641_cast_fp16 = conv(dilations = var_4641_dilations_0, groups = var_4641_groups_0, pad = var_4641_pad_0, pad_type = var_4641_pad_type_0, strides = var_4641_strides_0, weight = var_4624_to_fp16, x = var_4620_cast_fp16_0)[name = string("op_4641_cast_fp16")]; tensor x_285_cast_fp16 = mul(x = var_4636_cast_fp16, y = var_4641_cast_fp16)[name = string("x_285_cast_fp16")]; string hidden_states_95_pad_type_0 = const()[name = string("hidden_states_95_pad_type_0"), val = string("valid")]; tensor hidden_states_95_strides_0 = const()[name = string("hidden_states_95_strides_0"), val = tensor([1, 1])]; tensor hidden_states_95_pad_0 = const()[name = string("hidden_states_95_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_95_dilations_0 = const()[name = string("hidden_states_95_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_95_groups_0 = const()[name = string("hidden_states_95_groups_0"), val = int32(1)]; tensor var_4623_to_fp16 = const()[name = string("op_4623_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(478303488)))]; tensor hidden_states_95_cast_fp16 = conv(dilations = hidden_states_95_dilations_0, groups = hidden_states_95_groups_0, pad = hidden_states_95_pad_0, pad_type = hidden_states_95_pad_type_0, strides = hidden_states_95_strides_0, weight = var_4623_to_fp16, x = x_285_cast_fp16)[name = string("hidden_states_95_cast_fp16")]; tensor x_287_cast_fp16 = add(x = x_277_cast_fp16, y = hidden_states_95_cast_fp16)[name = string("x_287_cast_fp16")]; int32 var_4654 = const()[name = string("op_4654"), val = int32(1)]; fp16 const_164_promoted_to_fp16 = const()[name = string("const_164_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_4657_cast_fp16 = mul(x = x_287_cast_fp16, y = const_164_promoted_to_fp16)[name = string("op_4657_cast_fp16")]; bool x_289_interleave_0 = const()[name = string("x_289_interleave_0"), val = bool(false)]; tensor x_289_cast_fp16 = concat(axis = var_4654, interleave = x_289_interleave_0, values = (x_287_cast_fp16, var_4657_cast_fp16))[name = string("x_289_cast_fp16")]; tensor out_193_axes_0 = const()[name = string("out_193_axes_0"), val = tensor([1])]; fp16 var_4667_to_fp16 = const()[name = string("op_4667_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_193_cast_fp16 = layer_norm(axes = out_193_axes_0, epsilon = var_4667_to_fp16, x = x_289_cast_fp16)[name = string("out_193_cast_fp16")]; tensor layer_layers_16_input_layernorm_weight_to_fp16 = const()[name = string("layer_layers_16_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(486692160)))]; tensor out_195_cast_fp16 = mul(x = out_193_cast_fp16, y = layer_layers_16_input_layernorm_weight_to_fp16)[name = string("out_195_cast_fp16")]; tensor var_4673_split_sizes_0 = const()[name = string("op_4673_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_4673_axis_0 = const()[name = string("op_4673_axis_0"), val = int32(1)]; tensor var_4673_cast_fp16_0, tensor var_4673_cast_fp16_1 = split(axis = var_4673_axis_0, split_sizes = var_4673_split_sizes_0, x = out_195_cast_fp16)[name = string("op_4673_cast_fp16")]; string query_states_65_pad_type_0 = const()[name = string("query_states_65_pad_type_0"), val = string("valid")]; tensor query_states_65_strides_0 = const()[name = string("query_states_65_strides_0"), val = tensor([1, 1])]; tensor query_states_65_pad_0 = const()[name = string("query_states_65_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_states_65_dilations_0 = const()[name = string("query_states_65_dilations_0"), val = tensor([1, 1])]; int32 query_states_65_groups_0 = const()[name = string("query_states_65_groups_0"), val = int32(1)]; tensor var_4695_to_fp16 = const()[name = string("op_4695_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(486696320)))]; tensor query_states_65_cast_fp16 = conv(dilations = query_states_65_dilations_0, groups = query_states_65_groups_0, pad = query_states_65_pad_0, pad_type = query_states_65_pad_type_0, strides = query_states_65_strides_0, weight = var_4695_to_fp16, x = var_4673_cast_fp16_0)[name = string("query_states_65_cast_fp16")]; string key_states_65_pad_type_0 = const()[name = string("key_states_65_pad_type_0"), val = string("valid")]; tensor key_states_65_strides_0 = const()[name = string("key_states_65_strides_0"), val = tensor([1, 1])]; tensor key_states_65_pad_0 = const()[name = string("key_states_65_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_states_65_dilations_0 = const()[name = string("key_states_65_dilations_0"), val = tensor([1, 1])]; int32 key_states_65_groups_0 = const()[name = string("key_states_65_groups_0"), val = int32(1)]; tensor var_4706_to_fp16 = const()[name = string("op_4706_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(488793536)))]; tensor key_states_65_cast_fp16 = conv(dilations = key_states_65_dilations_0, groups = key_states_65_groups_0, pad = key_states_65_pad_0, pad_type = key_states_65_pad_type_0, strides = key_states_65_strides_0, weight = var_4706_to_fp16, x = var_4673_cast_fp16_0)[name = string("key_states_65_cast_fp16")]; string value_states_65_pad_type_0 = const()[name = string("value_states_65_pad_type_0"), val = string("valid")]; tensor value_states_65_strides_0 = const()[name = string("value_states_65_strides_0"), val = tensor([1, 1])]; tensor value_states_65_pad_0 = const()[name = string("value_states_65_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_states_65_dilations_0 = const()[name = string("value_states_65_dilations_0"), val = tensor([1, 1])]; int32 value_states_65_groups_0 = const()[name = string("value_states_65_groups_0"), val = int32(1)]; tensor var_4717_to_fp16 = const()[name = string("op_4717_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(489055744)))]; tensor value_states_65_cast_fp16 = conv(dilations = value_states_65_dilations_0, groups = value_states_65_groups_0, pad = value_states_65_pad_0, pad_type = value_states_65_pad_type_0, strides = value_states_65_strides_0, weight = var_4717_to_fp16, x = var_4673_cast_fp16_0)[name = string("value_states_65_cast_fp16")]; tensor var_4725 = const()[name = string("op_4725"), val = tensor([1, 16, 64, 8])]; tensor embed_65_cast_fp16 = reshape(shape = var_4725, x = query_states_65_cast_fp16)[name = string("embed_65_cast_fp16")]; tensor var_4729 = const()[name = string("op_4729"), val = tensor([1, 2, 64, 8])]; tensor var_4730_cast_fp16 = reshape(shape = var_4729, x = key_states_65_cast_fp16)[name = string("op_4730_cast_fp16")]; tensor embed_67_perm_0 = const()[name = string("embed_67_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_4736 = const()[name = string("op_4736"), val = tensor([1, 2, 64, 8])]; tensor var_4737_cast_fp16 = reshape(shape = var_4736, x = value_states_65_cast_fp16)[name = string("op_4737_cast_fp16")]; tensor value_states_67_perm_0 = const()[name = string("value_states_67_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_4741_cast_fp16 = mul(x = embed_65_cast_fp16, y = cos_1_cast_fp16)[name = string("op_4741_cast_fp16")]; tensor var_4742_split_sizes_0 = const()[name = string("op_4742_split_sizes_0"), val = tensor([32, 32])]; int32 var_4742_axis_0 = const()[name = string("op_4742_axis_0"), val = int32(-2)]; tensor var_4742_cast_fp16_0, tensor var_4742_cast_fp16_1 = split(axis = var_4742_axis_0, split_sizes = var_4742_split_sizes_0, x = embed_65_cast_fp16)[name = string("op_4742_cast_fp16")]; fp16 const_167_promoted_to_fp16 = const()[name = string("const_167_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_4744_cast_fp16 = mul(x = var_4742_cast_fp16_1, y = const_167_promoted_to_fp16)[name = string("op_4744_cast_fp16")]; int32 var_4746 = const()[name = string("op_4746"), val = int32(-2)]; bool var_4747_interleave_0 = const()[name = string("op_4747_interleave_0"), val = bool(false)]; tensor var_4747_cast_fp16 = concat(axis = var_4746, interleave = var_4747_interleave_0, values = (var_4744_cast_fp16, var_4742_cast_fp16_0))[name = string("op_4747_cast_fp16")]; tensor var_4748_cast_fp16 = mul(x = var_4747_cast_fp16, y = sin_1_cast_fp16)[name = string("op_4748_cast_fp16")]; tensor query_states_67_cast_fp16 = add(x = var_4741_cast_fp16, y = var_4748_cast_fp16)[name = string("query_states_67_cast_fp16")]; tensor embed_67_cast_fp16 = transpose(perm = embed_67_perm_0, x = var_4730_cast_fp16)[name = string("transpose_23")]; tensor var_4751_cast_fp16 = mul(x = embed_67_cast_fp16, y = cos_cast_fp16)[name = string("op_4751_cast_fp16")]; tensor var_4752_split_sizes_0 = const()[name = string("op_4752_split_sizes_0"), val = tensor([32, 32])]; int32 var_4752_axis_0 = const()[name = string("op_4752_axis_0"), val = int32(-1)]; tensor var_4752_cast_fp16_0, tensor var_4752_cast_fp16_1 = split(axis = var_4752_axis_0, split_sizes = var_4752_split_sizes_0, x = embed_67_cast_fp16)[name = string("op_4752_cast_fp16")]; fp16 const_168_promoted_to_fp16 = const()[name = string("const_168_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_4754_cast_fp16 = mul(x = var_4752_cast_fp16_1, y = const_168_promoted_to_fp16)[name = string("op_4754_cast_fp16")]; int32 var_4756 = const()[name = string("op_4756"), val = int32(-1)]; bool var_4757_interleave_0 = const()[name = string("op_4757_interleave_0"), val = bool(false)]; tensor var_4757_cast_fp16 = concat(axis = var_4756, interleave = var_4757_interleave_0, values = (var_4754_cast_fp16, var_4752_cast_fp16_0))[name = string("op_4757_cast_fp16")]; tensor var_4758_cast_fp16 = mul(x = var_4757_cast_fp16, y = sin_cast_fp16)[name = string("op_4758_cast_fp16")]; tensor key_states_67_cast_fp16 = add(x = var_4751_cast_fp16, y = var_4758_cast_fp16)[name = string("key_states_67_cast_fp16")]; tensor expand_dims_161 = const()[name = string("expand_dims_161"), val = tensor([16])]; tensor expand_dims_162 = const()[name = string("expand_dims_162"), val = tensor([0])]; tensor expand_dims_164 = const()[name = string("expand_dims_164"), val = tensor([17])]; tensor concat_131_values3_0 = const()[name = string("concat_131_values3_0"), val = tensor([0])]; int32 concat_131_axis_0 = const()[name = string("concat_131_axis_0"), val = int32(0)]; bool concat_131_interleave_0 = const()[name = string("concat_131_interleave_0"), val = bool(false)]; tensor concat_131 = concat(axis = concat_131_axis_0, interleave = concat_131_interleave_0, values = (expand_dims_161, expand_dims_162, position_id, concat_131_values3_0))[name = string("concat_131")]; tensor concat_132_values1_0 = const()[name = string("concat_132_values1_0"), val = tensor([0])]; tensor concat_132_values3_0 = const()[name = string("concat_132_values3_0"), val = tensor([0])]; int32 concat_132_axis_0 = const()[name = string("concat_132_axis_0"), val = int32(0)]; bool concat_132_interleave_0 = const()[name = string("concat_132_interleave_0"), val = bool(false)]; tensor concat_132 = concat(axis = concat_132_axis_0, interleave = concat_132_interleave_0, values = (expand_dims_164, concat_132_values1_0, var_426, concat_132_values3_0))[name = string("concat_132")]; tensor key_cache_internal_tensor_assign_17_stride_0 = const()[name = string("key_cache_internal_tensor_assign_17_stride_0"), val = tensor([1, 1, 1, 1])]; tensor key_cache_internal_tensor_assign_17_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_17_begin_mask_0"), val = tensor([false, false, false, true])]; tensor key_cache_internal_tensor_assign_17_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_17_end_mask_0"), val = tensor([false, true, false, true])]; tensor key_cache_internal_tensor_assign_17_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_17_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor key_cache_internal_tensor_assign_17_cast_fp16 = slice_update(begin = concat_131, begin_mask = key_cache_internal_tensor_assign_17_begin_mask_0, end = concat_132, end_mask = key_cache_internal_tensor_assign_17_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_17_squeeze_mask_0, stride = key_cache_internal_tensor_assign_17_stride_0, update = key_states_67_cast_fp16, x = coreml_update_state_78)[name = string("key_cache_internal_tensor_assign_17_cast_fp16")]; write_state(data = key_cache_internal_tensor_assign_17_cast_fp16, input = key_cache)[name = string("coreml_update_state_246_write_state")]; tensor coreml_update_state_80 = read_state(input = key_cache)[name = string("coreml_update_state_246")]; tensor value_cache_internal_tensor_assign_17_stride_0 = const()[name = string("value_cache_internal_tensor_assign_17_stride_0"), val = tensor([1, 1, 1, 1])]; tensor value_cache_internal_tensor_assign_17_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_17_begin_mask_0"), val = tensor([false, false, false, true])]; tensor value_cache_internal_tensor_assign_17_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_17_end_mask_0"), val = tensor([false, true, false, true])]; tensor value_cache_internal_tensor_assign_17_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_17_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor value_states_67_cast_fp16 = transpose(perm = value_states_67_perm_0, x = var_4737_cast_fp16)[name = string("transpose_22")]; tensor value_cache_internal_tensor_assign_17_cast_fp16 = slice_update(begin = concat_131, begin_mask = value_cache_internal_tensor_assign_17_begin_mask_0, end = concat_132, end_mask = value_cache_internal_tensor_assign_17_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_17_squeeze_mask_0, stride = value_cache_internal_tensor_assign_17_stride_0, update = value_states_67_cast_fp16, x = coreml_update_state_79)[name = string("value_cache_internal_tensor_assign_17_cast_fp16")]; write_state(data = value_cache_internal_tensor_assign_17_cast_fp16, input = value_cache)[name = string("coreml_update_state_247_write_state")]; tensor coreml_update_state_81 = read_state(input = value_cache)[name = string("coreml_update_state_247")]; tensor var_4801_begin_0 = const()[name = string("op_4801_begin_0"), val = tensor([16, 0, 0, 0])]; tensor var_4801_end_0 = const()[name = string("op_4801_end_0"), val = tensor([17, 2, 2048, 64])]; tensor var_4801_end_mask_0 = const()[name = string("op_4801_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_4801_cast_fp16 = slice_by_index(begin = var_4801_begin_0, end = var_4801_end_0, end_mask = var_4801_end_mask_0, x = coreml_update_state_80)[name = string("op_4801_cast_fp16")]; tensor tile_32 = const()[name = string("tile_32"), val = tensor([1, 1])]; int32 var_4804_axis_0 = const()[name = string("op_4804_axis_0"), val = int32(1)]; tensor var_4804_cast_fp16_0, tensor var_4804_cast_fp16_1 = split(axis = var_4804_axis_0, split_sizes = tile_32, x = var_4801_cast_fp16)[name = string("op_4804_cast_fp16")]; tensor var_4811_begin_0 = const()[name = string("op_4811_begin_0"), val = tensor([16, 0, 0, 0])]; tensor var_4811_end_0 = const()[name = string("op_4811_end_0"), val = tensor([17, 2, 2048, 64])]; tensor var_4811_end_mask_0 = const()[name = string("op_4811_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_4811_cast_fp16 = slice_by_index(begin = var_4811_begin_0, end = var_4811_end_0, end_mask = var_4811_end_mask_0, x = coreml_update_state_81)[name = string("op_4811_cast_fp16")]; tensor tile_33 = const()[name = string("tile_33"), val = tensor([1, 1])]; int32 var_4814_axis_0 = const()[name = string("op_4814_axis_0"), val = int32(1)]; tensor var_4814_cast_fp16_0, tensor var_4814_cast_fp16_1 = split(axis = var_4814_axis_0, split_sizes = tile_33, x = var_4811_cast_fp16)[name = string("op_4814_cast_fp16")]; tensor var_4817_split_sizes_0 = const()[name = string("op_4817_split_sizes_0"), val = tensor([8, 8])]; int32 var_4817_axis_0 = const()[name = string("op_4817_axis_0"), val = int32(1)]; tensor var_4817_cast_fp16_0, tensor var_4817_cast_fp16_1 = split(axis = var_4817_axis_0, split_sizes = var_4817_split_sizes_0, x = query_states_67_cast_fp16)[name = string("op_4817_cast_fp16")]; bool attn_weights_257_transpose_x_0 = const()[name = string("attn_weights_257_transpose_x_0"), val = bool(false)]; bool attn_weights_257_transpose_y_0 = const()[name = string("attn_weights_257_transpose_y_0"), val = bool(false)]; tensor attn_weights_257_cast_fp16 = matmul(transpose_x = attn_weights_257_transpose_x_0, transpose_y = attn_weights_257_transpose_y_0, x = var_4804_cast_fp16_0, y = var_4817_cast_fp16_0)[name = string("attn_weights_257_cast_fp16")]; fp16 _inversed_attn_weights_259_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_259_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_259_cast_fp16 = mul(x = attn_weights_257_cast_fp16, y = _inversed_attn_weights_259_y_0_to_fp16)[name = string("_inversed_attn_weights_259_cast_fp16")]; tensor attn_weights_261_cast_fp16 = add(x = _inversed_attn_weights_259_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_261_cast_fp16")]; int32 var_4824 = const()[name = string("op_4824"), val = int32(2)]; tensor attn_weights_263_cast_fp16 = softmax(axis = var_4824, x = attn_weights_261_cast_fp16)[name = string("attn_weights_263_cast_fp16")]; bool var_4830_transpose_x_1 = const()[name = string("op_4830_transpose_x_1"), val = bool(true)]; bool var_4830_transpose_y_1 = const()[name = string("op_4830_transpose_y_1"), val = bool(false)]; tensor var_4830_cast_fp16 = matmul(transpose_x = var_4830_transpose_x_1, transpose_y = var_4830_transpose_y_1, x = attn_weights_263_cast_fp16, y = var_4814_cast_fp16_0)[name = string("op_4830_cast_fp16")]; bool attn_weights_265_transpose_x_0 = const()[name = string("attn_weights_265_transpose_x_0"), val = bool(false)]; bool attn_weights_265_transpose_y_0 = const()[name = string("attn_weights_265_transpose_y_0"), val = bool(false)]; tensor attn_weights_265_cast_fp16 = matmul(transpose_x = attn_weights_265_transpose_x_0, transpose_y = attn_weights_265_transpose_y_0, x = var_4804_cast_fp16_1, y = var_4817_cast_fp16_1)[name = string("attn_weights_265_cast_fp16")]; fp16 _inversed_attn_weights_267_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_267_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_267_cast_fp16 = mul(x = attn_weights_265_cast_fp16, y = _inversed_attn_weights_267_y_0_to_fp16)[name = string("_inversed_attn_weights_267_cast_fp16")]; tensor attn_weights_269_cast_fp16 = add(x = _inversed_attn_weights_267_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_269_cast_fp16")]; int32 var_4836 = const()[name = string("op_4836"), val = int32(2)]; tensor attn_weights_271_cast_fp16 = softmax(axis = var_4836, x = attn_weights_269_cast_fp16)[name = string("attn_weights_271_cast_fp16")]; bool attn_output_97_transpose_x_1 = const()[name = string("attn_output_97_transpose_x_1"), val = bool(true)]; bool attn_output_97_transpose_y_1 = const()[name = string("attn_output_97_transpose_y_1"), val = bool(false)]; tensor attn_output_97_cast_fp16 = matmul(transpose_x = attn_output_97_transpose_x_1, transpose_y = attn_output_97_transpose_y_1, x = attn_weights_271_cast_fp16, y = var_4814_cast_fp16_1)[name = string("attn_output_97_cast_fp16")]; int32 var_4844 = const()[name = string("op_4844"), val = int32(1)]; bool attn_output_99_interleave_0 = const()[name = string("attn_output_99_interleave_0"), val = bool(false)]; tensor attn_output_99_cast_fp16 = concat(axis = var_4844, interleave = attn_output_99_interleave_0, values = (var_4830_cast_fp16, attn_output_97_cast_fp16))[name = string("attn_output_99_cast_fp16")]; tensor var_4848_perm_0 = const()[name = string("op_4848_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_4853 = const()[name = string("op_4853"), val = tensor([1, 1024, 1, 8])]; tensor var_4848_cast_fp16 = transpose(perm = var_4848_perm_0, x = attn_output_99_cast_fp16)[name = string("transpose_21")]; tensor x_293_cast_fp16 = reshape(shape = var_4853, x = var_4848_cast_fp16)[name = string("x_293_cast_fp16")]; string hidden_states_99_pad_type_0 = const()[name = string("hidden_states_99_pad_type_0"), val = string("valid")]; tensor hidden_states_99_strides_0 = const()[name = string("hidden_states_99_strides_0"), val = tensor([1, 1])]; tensor hidden_states_99_pad_0 = const()[name = string("hidden_states_99_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_99_dilations_0 = const()[name = string("hidden_states_99_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_99_groups_0 = const()[name = string("hidden_states_99_groups_0"), val = int32(1)]; tensor var_4860_to_fp16 = const()[name = string("op_4860_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(489317952)))]; tensor hidden_states_99_cast_fp16 = conv(dilations = hidden_states_99_dilations_0, groups = hidden_states_99_groups_0, pad = hidden_states_99_pad_0, pad_type = hidden_states_99_pad_type_0, strides = hidden_states_99_strides_0, weight = var_4860_to_fp16, x = x_293_cast_fp16)[name = string("hidden_states_99_cast_fp16")]; tensor x_295_cast_fp16 = add(x = x_287_cast_fp16, y = hidden_states_99_cast_fp16)[name = string("x_295_cast_fp16")]; int32 var_4872 = const()[name = string("op_4872"), val = int32(1)]; fp16 const_173_promoted_to_fp16 = const()[name = string("const_173_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_4875_cast_fp16 = mul(x = x_295_cast_fp16, y = const_173_promoted_to_fp16)[name = string("op_4875_cast_fp16")]; bool x_297_interleave_0 = const()[name = string("x_297_interleave_0"), val = bool(false)]; tensor x_297_cast_fp16 = concat(axis = var_4872, interleave = x_297_interleave_0, values = (x_295_cast_fp16, var_4875_cast_fp16))[name = string("x_297_cast_fp16")]; tensor out_199_axes_0 = const()[name = string("out_199_axes_0"), val = tensor([1])]; fp16 var_4885_to_fp16 = const()[name = string("op_4885_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_199_cast_fp16 = layer_norm(axes = out_199_axes_0, epsilon = var_4885_to_fp16, x = x_297_cast_fp16)[name = string("out_199_cast_fp16")]; tensor layer_layers_16_post_attention_layernorm_weight_to_fp16 = const()[name = string("layer_layers_16_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(491415168)))]; tensor out_201_cast_fp16 = mul(x = out_199_cast_fp16, y = layer_layers_16_post_attention_layernorm_weight_to_fp16)[name = string("out_201_cast_fp16")]; tensor var_4891_split_sizes_0 = const()[name = string("op_4891_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_4891_axis_0 = const()[name = string("op_4891_axis_0"), val = int32(1)]; tensor var_4891_cast_fp16_0, tensor var_4891_cast_fp16_1 = split(axis = var_4891_axis_0, split_sizes = var_4891_split_sizes_0, x = out_201_cast_fp16)[name = string("op_4891_cast_fp16")]; string input_33_pad_type_0 = const()[name = string("input_33_pad_type_0"), val = string("valid")]; tensor input_33_strides_0 = const()[name = string("input_33_strides_0"), val = tensor([1, 1])]; tensor input_33_pad_0 = const()[name = string("input_33_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_33_dilations_0 = const()[name = string("input_33_dilations_0"), val = tensor([1, 1])]; int32 input_33_groups_0 = const()[name = string("input_33_groups_0"), val = int32(1)]; tensor var_4896_to_fp16 = const()[name = string("op_4896_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(491419328)))]; tensor input_33_cast_fp16 = conv(dilations = input_33_dilations_0, groups = input_33_groups_0, pad = input_33_pad_0, pad_type = input_33_pad_type_0, strides = input_33_strides_0, weight = var_4896_to_fp16, x = var_4891_cast_fp16_0)[name = string("input_33_cast_fp16")]; tensor var_4907_cast_fp16 = silu(x = input_33_cast_fp16)[name = string("op_4907_cast_fp16")]; string var_4912_pad_type_0 = const()[name = string("op_4912_pad_type_0"), val = string("valid")]; tensor var_4912_strides_0 = const()[name = string("op_4912_strides_0"), val = tensor([1, 1])]; tensor var_4912_pad_0 = const()[name = string("op_4912_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_4912_dilations_0 = const()[name = string("op_4912_dilations_0"), val = tensor([1, 1])]; int32 var_4912_groups_0 = const()[name = string("op_4912_groups_0"), val = int32(1)]; tensor var_4895_to_fp16 = const()[name = string("op_4895_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(499808000)))]; tensor var_4912_cast_fp16 = conv(dilations = var_4912_dilations_0, groups = var_4912_groups_0, pad = var_4912_pad_0, pad_type = var_4912_pad_type_0, strides = var_4912_strides_0, weight = var_4895_to_fp16, x = var_4891_cast_fp16_0)[name = string("op_4912_cast_fp16")]; tensor x_303_cast_fp16 = mul(x = var_4907_cast_fp16, y = var_4912_cast_fp16)[name = string("x_303_cast_fp16")]; string hidden_states_101_pad_type_0 = const()[name = string("hidden_states_101_pad_type_0"), val = string("valid")]; tensor hidden_states_101_strides_0 = const()[name = string("hidden_states_101_strides_0"), val = tensor([1, 1])]; tensor hidden_states_101_pad_0 = const()[name = string("hidden_states_101_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_101_dilations_0 = const()[name = string("hidden_states_101_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_101_groups_0 = const()[name = string("hidden_states_101_groups_0"), val = int32(1)]; tensor var_4894_to_fp16 = const()[name = string("op_4894_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(508196672)))]; tensor hidden_states_101_cast_fp16 = conv(dilations = hidden_states_101_dilations_0, groups = hidden_states_101_groups_0, pad = hidden_states_101_pad_0, pad_type = hidden_states_101_pad_type_0, strides = hidden_states_101_strides_0, weight = var_4894_to_fp16, x = x_303_cast_fp16)[name = string("hidden_states_101_cast_fp16")]; tensor x_305_cast_fp16 = add(x = x_295_cast_fp16, y = hidden_states_101_cast_fp16)[name = string("x_305_cast_fp16")]; int32 var_4925 = const()[name = string("op_4925"), val = int32(1)]; fp16 const_174_promoted_to_fp16 = const()[name = string("const_174_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_4928_cast_fp16 = mul(x = x_305_cast_fp16, y = const_174_promoted_to_fp16)[name = string("op_4928_cast_fp16")]; bool x_307_interleave_0 = const()[name = string("x_307_interleave_0"), val = bool(false)]; tensor x_307_cast_fp16 = concat(axis = var_4925, interleave = x_307_interleave_0, values = (x_305_cast_fp16, var_4928_cast_fp16))[name = string("x_307_cast_fp16")]; tensor out_205_axes_0 = const()[name = string("out_205_axes_0"), val = tensor([1])]; fp16 var_4938_to_fp16 = const()[name = string("op_4938_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_205_cast_fp16 = layer_norm(axes = out_205_axes_0, epsilon = var_4938_to_fp16, x = x_307_cast_fp16)[name = string("out_205_cast_fp16")]; tensor layer_layers_17_input_layernorm_weight_to_fp16 = const()[name = string("layer_layers_17_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(516585344)))]; tensor out_207_cast_fp16 = mul(x = out_205_cast_fp16, y = layer_layers_17_input_layernorm_weight_to_fp16)[name = string("out_207_cast_fp16")]; tensor var_4944_split_sizes_0 = const()[name = string("op_4944_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_4944_axis_0 = const()[name = string("op_4944_axis_0"), val = int32(1)]; tensor var_4944_cast_fp16_0, tensor var_4944_cast_fp16_1 = split(axis = var_4944_axis_0, split_sizes = var_4944_split_sizes_0, x = out_207_cast_fp16)[name = string("op_4944_cast_fp16")]; string query_states_69_pad_type_0 = const()[name = string("query_states_69_pad_type_0"), val = string("valid")]; tensor query_states_69_strides_0 = const()[name = string("query_states_69_strides_0"), val = tensor([1, 1])]; tensor query_states_69_pad_0 = const()[name = string("query_states_69_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_states_69_dilations_0 = const()[name = string("query_states_69_dilations_0"), val = tensor([1, 1])]; int32 query_states_69_groups_0 = const()[name = string("query_states_69_groups_0"), val = int32(1)]; tensor var_4966_to_fp16 = const()[name = string("op_4966_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(516589504)))]; tensor query_states_69_cast_fp16 = conv(dilations = query_states_69_dilations_0, groups = query_states_69_groups_0, pad = query_states_69_pad_0, pad_type = query_states_69_pad_type_0, strides = query_states_69_strides_0, weight = var_4966_to_fp16, x = var_4944_cast_fp16_0)[name = string("query_states_69_cast_fp16")]; string key_states_69_pad_type_0 = const()[name = string("key_states_69_pad_type_0"), val = string("valid")]; tensor key_states_69_strides_0 = const()[name = string("key_states_69_strides_0"), val = tensor([1, 1])]; tensor key_states_69_pad_0 = const()[name = string("key_states_69_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_states_69_dilations_0 = const()[name = string("key_states_69_dilations_0"), val = tensor([1, 1])]; int32 key_states_69_groups_0 = const()[name = string("key_states_69_groups_0"), val = int32(1)]; tensor var_4977_to_fp16 = const()[name = string("op_4977_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(518686720)))]; tensor key_states_69_cast_fp16 = conv(dilations = key_states_69_dilations_0, groups = key_states_69_groups_0, pad = key_states_69_pad_0, pad_type = key_states_69_pad_type_0, strides = key_states_69_strides_0, weight = var_4977_to_fp16, x = var_4944_cast_fp16_0)[name = string("key_states_69_cast_fp16")]; string value_states_69_pad_type_0 = const()[name = string("value_states_69_pad_type_0"), val = string("valid")]; tensor value_states_69_strides_0 = const()[name = string("value_states_69_strides_0"), val = tensor([1, 1])]; tensor value_states_69_pad_0 = const()[name = string("value_states_69_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_states_69_dilations_0 = const()[name = string("value_states_69_dilations_0"), val = tensor([1, 1])]; int32 value_states_69_groups_0 = const()[name = string("value_states_69_groups_0"), val = int32(1)]; tensor var_4988_to_fp16 = const()[name = string("op_4988_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(518948928)))]; tensor value_states_69_cast_fp16 = conv(dilations = value_states_69_dilations_0, groups = value_states_69_groups_0, pad = value_states_69_pad_0, pad_type = value_states_69_pad_type_0, strides = value_states_69_strides_0, weight = var_4988_to_fp16, x = var_4944_cast_fp16_0)[name = string("value_states_69_cast_fp16")]; tensor var_4996 = const()[name = string("op_4996"), val = tensor([1, 16, 64, 8])]; tensor embed_69_cast_fp16 = reshape(shape = var_4996, x = query_states_69_cast_fp16)[name = string("embed_69_cast_fp16")]; tensor var_5000 = const()[name = string("op_5000"), val = tensor([1, 2, 64, 8])]; tensor var_5001_cast_fp16 = reshape(shape = var_5000, x = key_states_69_cast_fp16)[name = string("op_5001_cast_fp16")]; tensor embed_71_perm_0 = const()[name = string("embed_71_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_5007 = const()[name = string("op_5007"), val = tensor([1, 2, 64, 8])]; tensor var_5008_cast_fp16 = reshape(shape = var_5007, x = value_states_69_cast_fp16)[name = string("op_5008_cast_fp16")]; tensor value_states_71_perm_0 = const()[name = string("value_states_71_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_5012_cast_fp16 = mul(x = embed_69_cast_fp16, y = cos_1_cast_fp16)[name = string("op_5012_cast_fp16")]; tensor var_5013_split_sizes_0 = const()[name = string("op_5013_split_sizes_0"), val = tensor([32, 32])]; int32 var_5013_axis_0 = const()[name = string("op_5013_axis_0"), val = int32(-2)]; tensor var_5013_cast_fp16_0, tensor var_5013_cast_fp16_1 = split(axis = var_5013_axis_0, split_sizes = var_5013_split_sizes_0, x = embed_69_cast_fp16)[name = string("op_5013_cast_fp16")]; fp16 const_177_promoted_to_fp16 = const()[name = string("const_177_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_5015_cast_fp16 = mul(x = var_5013_cast_fp16_1, y = const_177_promoted_to_fp16)[name = string("op_5015_cast_fp16")]; int32 var_5017 = const()[name = string("op_5017"), val = int32(-2)]; bool var_5018_interleave_0 = const()[name = string("op_5018_interleave_0"), val = bool(false)]; tensor var_5018_cast_fp16 = concat(axis = var_5017, interleave = var_5018_interleave_0, values = (var_5015_cast_fp16, var_5013_cast_fp16_0))[name = string("op_5018_cast_fp16")]; tensor var_5019_cast_fp16 = mul(x = var_5018_cast_fp16, y = sin_1_cast_fp16)[name = string("op_5019_cast_fp16")]; tensor query_states_71_cast_fp16 = add(x = var_5012_cast_fp16, y = var_5019_cast_fp16)[name = string("query_states_71_cast_fp16")]; tensor embed_71_cast_fp16 = transpose(perm = embed_71_perm_0, x = var_5001_cast_fp16)[name = string("transpose_20")]; tensor var_5022_cast_fp16 = mul(x = embed_71_cast_fp16, y = cos_cast_fp16)[name = string("op_5022_cast_fp16")]; tensor var_5023_split_sizes_0 = const()[name = string("op_5023_split_sizes_0"), val = tensor([32, 32])]; int32 var_5023_axis_0 = const()[name = string("op_5023_axis_0"), val = int32(-1)]; tensor var_5023_cast_fp16_0, tensor var_5023_cast_fp16_1 = split(axis = var_5023_axis_0, split_sizes = var_5023_split_sizes_0, x = embed_71_cast_fp16)[name = string("op_5023_cast_fp16")]; fp16 const_178_promoted_to_fp16 = const()[name = string("const_178_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_5025_cast_fp16 = mul(x = var_5023_cast_fp16_1, y = const_178_promoted_to_fp16)[name = string("op_5025_cast_fp16")]; int32 var_5027 = const()[name = string("op_5027"), val = int32(-1)]; bool var_5028_interleave_0 = const()[name = string("op_5028_interleave_0"), val = bool(false)]; tensor var_5028_cast_fp16 = concat(axis = var_5027, interleave = var_5028_interleave_0, values = (var_5025_cast_fp16, var_5023_cast_fp16_0))[name = string("op_5028_cast_fp16")]; tensor var_5029_cast_fp16 = mul(x = var_5028_cast_fp16, y = sin_cast_fp16)[name = string("op_5029_cast_fp16")]; tensor key_states_71_cast_fp16 = add(x = var_5022_cast_fp16, y = var_5029_cast_fp16)[name = string("key_states_71_cast_fp16")]; tensor expand_dims_171 = const()[name = string("expand_dims_171"), val = tensor([17])]; tensor expand_dims_172 = const()[name = string("expand_dims_172"), val = tensor([0])]; tensor expand_dims_174 = const()[name = string("expand_dims_174"), val = tensor([18])]; tensor concat_139_values3_0 = const()[name = string("concat_139_values3_0"), val = tensor([0])]; int32 concat_139_axis_0 = const()[name = string("concat_139_axis_0"), val = int32(0)]; bool concat_139_interleave_0 = const()[name = string("concat_139_interleave_0"), val = bool(false)]; tensor concat_139 = concat(axis = concat_139_axis_0, interleave = concat_139_interleave_0, values = (expand_dims_171, expand_dims_172, position_id, concat_139_values3_0))[name = string("concat_139")]; tensor concat_140_values1_0 = const()[name = string("concat_140_values1_0"), val = tensor([0])]; tensor concat_140_values3_0 = const()[name = string("concat_140_values3_0"), val = tensor([0])]; int32 concat_140_axis_0 = const()[name = string("concat_140_axis_0"), val = int32(0)]; bool concat_140_interleave_0 = const()[name = string("concat_140_interleave_0"), val = bool(false)]; tensor concat_140 = concat(axis = concat_140_axis_0, interleave = concat_140_interleave_0, values = (expand_dims_174, concat_140_values1_0, var_426, concat_140_values3_0))[name = string("concat_140")]; tensor key_cache_internal_tensor_assign_18_stride_0 = const()[name = string("key_cache_internal_tensor_assign_18_stride_0"), val = tensor([1, 1, 1, 1])]; tensor key_cache_internal_tensor_assign_18_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_18_begin_mask_0"), val = tensor([false, false, false, true])]; tensor key_cache_internal_tensor_assign_18_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_18_end_mask_0"), val = tensor([false, true, false, true])]; tensor key_cache_internal_tensor_assign_18_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_18_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor key_cache_internal_tensor_assign_18_cast_fp16 = slice_update(begin = concat_139, begin_mask = key_cache_internal_tensor_assign_18_begin_mask_0, end = concat_140, end_mask = key_cache_internal_tensor_assign_18_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_18_squeeze_mask_0, stride = key_cache_internal_tensor_assign_18_stride_0, update = key_states_71_cast_fp16, x = coreml_update_state_80)[name = string("key_cache_internal_tensor_assign_18_cast_fp16")]; write_state(data = key_cache_internal_tensor_assign_18_cast_fp16, input = key_cache)[name = string("coreml_update_state_248_write_state")]; tensor coreml_update_state_82 = read_state(input = key_cache)[name = string("coreml_update_state_248")]; tensor value_cache_internal_tensor_assign_18_stride_0 = const()[name = string("value_cache_internal_tensor_assign_18_stride_0"), val = tensor([1, 1, 1, 1])]; tensor value_cache_internal_tensor_assign_18_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_18_begin_mask_0"), val = tensor([false, false, false, true])]; tensor value_cache_internal_tensor_assign_18_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_18_end_mask_0"), val = tensor([false, true, false, true])]; tensor value_cache_internal_tensor_assign_18_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_18_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor value_states_71_cast_fp16 = transpose(perm = value_states_71_perm_0, x = var_5008_cast_fp16)[name = string("transpose_19")]; tensor value_cache_internal_tensor_assign_18_cast_fp16 = slice_update(begin = concat_139, begin_mask = value_cache_internal_tensor_assign_18_begin_mask_0, end = concat_140, end_mask = value_cache_internal_tensor_assign_18_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_18_squeeze_mask_0, stride = value_cache_internal_tensor_assign_18_stride_0, update = value_states_71_cast_fp16, x = coreml_update_state_81)[name = string("value_cache_internal_tensor_assign_18_cast_fp16")]; write_state(data = value_cache_internal_tensor_assign_18_cast_fp16, input = value_cache)[name = string("coreml_update_state_249_write_state")]; tensor coreml_update_state_83 = read_state(input = value_cache)[name = string("coreml_update_state_249")]; tensor var_5072_begin_0 = const()[name = string("op_5072_begin_0"), val = tensor([17, 0, 0, 0])]; tensor var_5072_end_0 = const()[name = string("op_5072_end_0"), val = tensor([18, 2, 2048, 64])]; tensor var_5072_end_mask_0 = const()[name = string("op_5072_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_5072_cast_fp16 = slice_by_index(begin = var_5072_begin_0, end = var_5072_end_0, end_mask = var_5072_end_mask_0, x = coreml_update_state_82)[name = string("op_5072_cast_fp16")]; tensor tile_34 = const()[name = string("tile_34"), val = tensor([1, 1])]; int32 var_5075_axis_0 = const()[name = string("op_5075_axis_0"), val = int32(1)]; tensor var_5075_cast_fp16_0, tensor var_5075_cast_fp16_1 = split(axis = var_5075_axis_0, split_sizes = tile_34, x = var_5072_cast_fp16)[name = string("op_5075_cast_fp16")]; tensor var_5082_begin_0 = const()[name = string("op_5082_begin_0"), val = tensor([17, 0, 0, 0])]; tensor var_5082_end_0 = const()[name = string("op_5082_end_0"), val = tensor([18, 2, 2048, 64])]; tensor var_5082_end_mask_0 = const()[name = string("op_5082_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_5082_cast_fp16 = slice_by_index(begin = var_5082_begin_0, end = var_5082_end_0, end_mask = var_5082_end_mask_0, x = coreml_update_state_83)[name = string("op_5082_cast_fp16")]; tensor tile_35 = const()[name = string("tile_35"), val = tensor([1, 1])]; int32 var_5085_axis_0 = const()[name = string("op_5085_axis_0"), val = int32(1)]; tensor var_5085_cast_fp16_0, tensor var_5085_cast_fp16_1 = split(axis = var_5085_axis_0, split_sizes = tile_35, x = var_5082_cast_fp16)[name = string("op_5085_cast_fp16")]; tensor var_5088_split_sizes_0 = const()[name = string("op_5088_split_sizes_0"), val = tensor([8, 8])]; int32 var_5088_axis_0 = const()[name = string("op_5088_axis_0"), val = int32(1)]; tensor var_5088_cast_fp16_0, tensor var_5088_cast_fp16_1 = split(axis = var_5088_axis_0, split_sizes = var_5088_split_sizes_0, x = query_states_71_cast_fp16)[name = string("op_5088_cast_fp16")]; bool attn_weights_273_transpose_x_0 = const()[name = string("attn_weights_273_transpose_x_0"), val = bool(false)]; bool attn_weights_273_transpose_y_0 = const()[name = string("attn_weights_273_transpose_y_0"), val = bool(false)]; tensor attn_weights_273_cast_fp16 = matmul(transpose_x = attn_weights_273_transpose_x_0, transpose_y = attn_weights_273_transpose_y_0, x = var_5075_cast_fp16_0, y = var_5088_cast_fp16_0)[name = string("attn_weights_273_cast_fp16")]; fp16 _inversed_attn_weights_275_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_275_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_275_cast_fp16 = mul(x = attn_weights_273_cast_fp16, y = _inversed_attn_weights_275_y_0_to_fp16)[name = string("_inversed_attn_weights_275_cast_fp16")]; tensor attn_weights_277_cast_fp16 = add(x = _inversed_attn_weights_275_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_277_cast_fp16")]; int32 var_5095 = const()[name = string("op_5095"), val = int32(2)]; tensor attn_weights_279_cast_fp16 = softmax(axis = var_5095, x = attn_weights_277_cast_fp16)[name = string("attn_weights_279_cast_fp16")]; bool var_5101_transpose_x_1 = const()[name = string("op_5101_transpose_x_1"), val = bool(true)]; bool var_5101_transpose_y_1 = const()[name = string("op_5101_transpose_y_1"), val = bool(false)]; tensor var_5101_cast_fp16 = matmul(transpose_x = var_5101_transpose_x_1, transpose_y = var_5101_transpose_y_1, x = attn_weights_279_cast_fp16, y = var_5085_cast_fp16_0)[name = string("op_5101_cast_fp16")]; bool attn_weights_281_transpose_x_0 = const()[name = string("attn_weights_281_transpose_x_0"), val = bool(false)]; bool attn_weights_281_transpose_y_0 = const()[name = string("attn_weights_281_transpose_y_0"), val = bool(false)]; tensor attn_weights_281_cast_fp16 = matmul(transpose_x = attn_weights_281_transpose_x_0, transpose_y = attn_weights_281_transpose_y_0, x = var_5075_cast_fp16_1, y = var_5088_cast_fp16_1)[name = string("attn_weights_281_cast_fp16")]; fp16 _inversed_attn_weights_283_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_283_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_283_cast_fp16 = mul(x = attn_weights_281_cast_fp16, y = _inversed_attn_weights_283_y_0_to_fp16)[name = string("_inversed_attn_weights_283_cast_fp16")]; tensor attn_weights_285_cast_fp16 = add(x = _inversed_attn_weights_283_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_285_cast_fp16")]; int32 var_5107 = const()[name = string("op_5107"), val = int32(2)]; tensor attn_weights_287_cast_fp16 = softmax(axis = var_5107, x = attn_weights_285_cast_fp16)[name = string("attn_weights_287_cast_fp16")]; bool attn_output_103_transpose_x_1 = const()[name = string("attn_output_103_transpose_x_1"), val = bool(true)]; bool attn_output_103_transpose_y_1 = const()[name = string("attn_output_103_transpose_y_1"), val = bool(false)]; tensor attn_output_103_cast_fp16 = matmul(transpose_x = attn_output_103_transpose_x_1, transpose_y = attn_output_103_transpose_y_1, x = attn_weights_287_cast_fp16, y = var_5085_cast_fp16_1)[name = string("attn_output_103_cast_fp16")]; int32 var_5115 = const()[name = string("op_5115"), val = int32(1)]; bool attn_output_105_interleave_0 = const()[name = string("attn_output_105_interleave_0"), val = bool(false)]; tensor attn_output_105_cast_fp16 = concat(axis = var_5115, interleave = attn_output_105_interleave_0, values = (var_5101_cast_fp16, attn_output_103_cast_fp16))[name = string("attn_output_105_cast_fp16")]; tensor var_5119_perm_0 = const()[name = string("op_5119_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_5124 = const()[name = string("op_5124"), val = tensor([1, 1024, 1, 8])]; tensor var_5119_cast_fp16 = transpose(perm = var_5119_perm_0, x = attn_output_105_cast_fp16)[name = string("transpose_18")]; tensor x_311_cast_fp16 = reshape(shape = var_5124, x = var_5119_cast_fp16)[name = string("x_311_cast_fp16")]; string hidden_states_105_pad_type_0 = const()[name = string("hidden_states_105_pad_type_0"), val = string("valid")]; tensor hidden_states_105_strides_0 = const()[name = string("hidden_states_105_strides_0"), val = tensor([1, 1])]; tensor hidden_states_105_pad_0 = const()[name = string("hidden_states_105_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_105_dilations_0 = const()[name = string("hidden_states_105_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_105_groups_0 = const()[name = string("hidden_states_105_groups_0"), val = int32(1)]; tensor var_5131_to_fp16 = const()[name = string("op_5131_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(519211136)))]; tensor hidden_states_105_cast_fp16 = conv(dilations = hidden_states_105_dilations_0, groups = hidden_states_105_groups_0, pad = hidden_states_105_pad_0, pad_type = hidden_states_105_pad_type_0, strides = hidden_states_105_strides_0, weight = var_5131_to_fp16, x = x_311_cast_fp16)[name = string("hidden_states_105_cast_fp16")]; tensor x_313_cast_fp16 = add(x = x_305_cast_fp16, y = hidden_states_105_cast_fp16)[name = string("x_313_cast_fp16")]; int32 var_5143 = const()[name = string("op_5143"), val = int32(1)]; fp16 const_183_promoted_to_fp16 = const()[name = string("const_183_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_5146_cast_fp16 = mul(x = x_313_cast_fp16, y = const_183_promoted_to_fp16)[name = string("op_5146_cast_fp16")]; bool x_315_interleave_0 = const()[name = string("x_315_interleave_0"), val = bool(false)]; tensor x_315_cast_fp16 = concat(axis = var_5143, interleave = x_315_interleave_0, values = (x_313_cast_fp16, var_5146_cast_fp16))[name = string("x_315_cast_fp16")]; tensor out_211_axes_0 = const()[name = string("out_211_axes_0"), val = tensor([1])]; fp16 var_5156_to_fp16 = const()[name = string("op_5156_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_211_cast_fp16 = layer_norm(axes = out_211_axes_0, epsilon = var_5156_to_fp16, x = x_315_cast_fp16)[name = string("out_211_cast_fp16")]; tensor layer_layers_17_post_attention_layernorm_weight_to_fp16 = const()[name = string("layer_layers_17_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(521308352)))]; tensor out_213_cast_fp16 = mul(x = out_211_cast_fp16, y = layer_layers_17_post_attention_layernorm_weight_to_fp16)[name = string("out_213_cast_fp16")]; tensor var_5162_split_sizes_0 = const()[name = string("op_5162_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_5162_axis_0 = const()[name = string("op_5162_axis_0"), val = int32(1)]; tensor var_5162_cast_fp16_0, tensor var_5162_cast_fp16_1 = split(axis = var_5162_axis_0, split_sizes = var_5162_split_sizes_0, x = out_213_cast_fp16)[name = string("op_5162_cast_fp16")]; string input_35_pad_type_0 = const()[name = string("input_35_pad_type_0"), val = string("valid")]; tensor input_35_strides_0 = const()[name = string("input_35_strides_0"), val = tensor([1, 1])]; tensor input_35_pad_0 = const()[name = string("input_35_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_35_dilations_0 = const()[name = string("input_35_dilations_0"), val = tensor([1, 1])]; int32 input_35_groups_0 = const()[name = string("input_35_groups_0"), val = int32(1)]; tensor var_5167_to_fp16 = const()[name = string("op_5167_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(521312512)))]; tensor input_35_cast_fp16 = conv(dilations = input_35_dilations_0, groups = input_35_groups_0, pad = input_35_pad_0, pad_type = input_35_pad_type_0, strides = input_35_strides_0, weight = var_5167_to_fp16, x = var_5162_cast_fp16_0)[name = string("input_35_cast_fp16")]; tensor var_5178_cast_fp16 = silu(x = input_35_cast_fp16)[name = string("op_5178_cast_fp16")]; string var_5183_pad_type_0 = const()[name = string("op_5183_pad_type_0"), val = string("valid")]; tensor var_5183_strides_0 = const()[name = string("op_5183_strides_0"), val = tensor([1, 1])]; tensor var_5183_pad_0 = const()[name = string("op_5183_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_5183_dilations_0 = const()[name = string("op_5183_dilations_0"), val = tensor([1, 1])]; int32 var_5183_groups_0 = const()[name = string("op_5183_groups_0"), val = int32(1)]; tensor var_5166_to_fp16 = const()[name = string("op_5166_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(529701184)))]; tensor var_5183_cast_fp16 = conv(dilations = var_5183_dilations_0, groups = var_5183_groups_0, pad = var_5183_pad_0, pad_type = var_5183_pad_type_0, strides = var_5183_strides_0, weight = var_5166_to_fp16, x = var_5162_cast_fp16_0)[name = string("op_5183_cast_fp16")]; tensor x_321_cast_fp16 = mul(x = var_5178_cast_fp16, y = var_5183_cast_fp16)[name = string("x_321_cast_fp16")]; string hidden_states_107_pad_type_0 = const()[name = string("hidden_states_107_pad_type_0"), val = string("valid")]; tensor hidden_states_107_strides_0 = const()[name = string("hidden_states_107_strides_0"), val = tensor([1, 1])]; tensor hidden_states_107_pad_0 = const()[name = string("hidden_states_107_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_107_dilations_0 = const()[name = string("hidden_states_107_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_107_groups_0 = const()[name = string("hidden_states_107_groups_0"), val = int32(1)]; tensor var_5165_to_fp16 = const()[name = string("op_5165_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(538089856)))]; tensor hidden_states_107_cast_fp16 = conv(dilations = hidden_states_107_dilations_0, groups = hidden_states_107_groups_0, pad = hidden_states_107_pad_0, pad_type = hidden_states_107_pad_type_0, strides = hidden_states_107_strides_0, weight = var_5165_to_fp16, x = x_321_cast_fp16)[name = string("hidden_states_107_cast_fp16")]; tensor x_323_cast_fp16 = add(x = x_313_cast_fp16, y = hidden_states_107_cast_fp16)[name = string("x_323_cast_fp16")]; int32 var_5196 = const()[name = string("op_5196"), val = int32(1)]; fp16 const_184_promoted_to_fp16 = const()[name = string("const_184_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_5199_cast_fp16 = mul(x = x_323_cast_fp16, y = const_184_promoted_to_fp16)[name = string("op_5199_cast_fp16")]; bool x_325_interleave_0 = const()[name = string("x_325_interleave_0"), val = bool(false)]; tensor x_325_cast_fp16 = concat(axis = var_5196, interleave = x_325_interleave_0, values = (x_323_cast_fp16, var_5199_cast_fp16))[name = string("x_325_cast_fp16")]; tensor out_217_axes_0 = const()[name = string("out_217_axes_0"), val = tensor([1])]; fp16 var_5209_to_fp16 = const()[name = string("op_5209_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_217_cast_fp16 = layer_norm(axes = out_217_axes_0, epsilon = var_5209_to_fp16, x = x_325_cast_fp16)[name = string("out_217_cast_fp16")]; tensor layer_layers_18_input_layernorm_weight_to_fp16 = const()[name = string("layer_layers_18_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(546478528)))]; tensor out_219_cast_fp16 = mul(x = out_217_cast_fp16, y = layer_layers_18_input_layernorm_weight_to_fp16)[name = string("out_219_cast_fp16")]; tensor var_5215_split_sizes_0 = const()[name = string("op_5215_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_5215_axis_0 = const()[name = string("op_5215_axis_0"), val = int32(1)]; tensor var_5215_cast_fp16_0, tensor var_5215_cast_fp16_1 = split(axis = var_5215_axis_0, split_sizes = var_5215_split_sizes_0, x = out_219_cast_fp16)[name = string("op_5215_cast_fp16")]; string query_states_73_pad_type_0 = const()[name = string("query_states_73_pad_type_0"), val = string("valid")]; tensor query_states_73_strides_0 = const()[name = string("query_states_73_strides_0"), val = tensor([1, 1])]; tensor query_states_73_pad_0 = const()[name = string("query_states_73_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_states_73_dilations_0 = const()[name = string("query_states_73_dilations_0"), val = tensor([1, 1])]; int32 query_states_73_groups_0 = const()[name = string("query_states_73_groups_0"), val = int32(1)]; tensor var_5237_to_fp16 = const()[name = string("op_5237_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(546482688)))]; tensor query_states_73_cast_fp16 = conv(dilations = query_states_73_dilations_0, groups = query_states_73_groups_0, pad = query_states_73_pad_0, pad_type = query_states_73_pad_type_0, strides = query_states_73_strides_0, weight = var_5237_to_fp16, x = var_5215_cast_fp16_0)[name = string("query_states_73_cast_fp16")]; string key_states_73_pad_type_0 = const()[name = string("key_states_73_pad_type_0"), val = string("valid")]; tensor key_states_73_strides_0 = const()[name = string("key_states_73_strides_0"), val = tensor([1, 1])]; tensor key_states_73_pad_0 = const()[name = string("key_states_73_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_states_73_dilations_0 = const()[name = string("key_states_73_dilations_0"), val = tensor([1, 1])]; int32 key_states_73_groups_0 = const()[name = string("key_states_73_groups_0"), val = int32(1)]; tensor var_5248_to_fp16 = const()[name = string("op_5248_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(548579904)))]; tensor key_states_73_cast_fp16 = conv(dilations = key_states_73_dilations_0, groups = key_states_73_groups_0, pad = key_states_73_pad_0, pad_type = key_states_73_pad_type_0, strides = key_states_73_strides_0, weight = var_5248_to_fp16, x = var_5215_cast_fp16_0)[name = string("key_states_73_cast_fp16")]; string value_states_73_pad_type_0 = const()[name = string("value_states_73_pad_type_0"), val = string("valid")]; tensor value_states_73_strides_0 = const()[name = string("value_states_73_strides_0"), val = tensor([1, 1])]; tensor value_states_73_pad_0 = const()[name = string("value_states_73_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_states_73_dilations_0 = const()[name = string("value_states_73_dilations_0"), val = tensor([1, 1])]; int32 value_states_73_groups_0 = const()[name = string("value_states_73_groups_0"), val = int32(1)]; tensor var_5259_to_fp16 = const()[name = string("op_5259_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(548842112)))]; tensor value_states_73_cast_fp16 = conv(dilations = value_states_73_dilations_0, groups = value_states_73_groups_0, pad = value_states_73_pad_0, pad_type = value_states_73_pad_type_0, strides = value_states_73_strides_0, weight = var_5259_to_fp16, x = var_5215_cast_fp16_0)[name = string("value_states_73_cast_fp16")]; tensor var_5267 = const()[name = string("op_5267"), val = tensor([1, 16, 64, 8])]; tensor embed_73_cast_fp16 = reshape(shape = var_5267, x = query_states_73_cast_fp16)[name = string("embed_73_cast_fp16")]; tensor var_5271 = const()[name = string("op_5271"), val = tensor([1, 2, 64, 8])]; tensor var_5272_cast_fp16 = reshape(shape = var_5271, x = key_states_73_cast_fp16)[name = string("op_5272_cast_fp16")]; tensor embed_75_perm_0 = const()[name = string("embed_75_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_5278 = const()[name = string("op_5278"), val = tensor([1, 2, 64, 8])]; tensor var_5279_cast_fp16 = reshape(shape = var_5278, x = value_states_73_cast_fp16)[name = string("op_5279_cast_fp16")]; tensor value_states_75_perm_0 = const()[name = string("value_states_75_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_5283_cast_fp16 = mul(x = embed_73_cast_fp16, y = cos_1_cast_fp16)[name = string("op_5283_cast_fp16")]; tensor var_5284_split_sizes_0 = const()[name = string("op_5284_split_sizes_0"), val = tensor([32, 32])]; int32 var_5284_axis_0 = const()[name = string("op_5284_axis_0"), val = int32(-2)]; tensor var_5284_cast_fp16_0, tensor var_5284_cast_fp16_1 = split(axis = var_5284_axis_0, split_sizes = var_5284_split_sizes_0, x = embed_73_cast_fp16)[name = string("op_5284_cast_fp16")]; fp16 const_187_promoted_to_fp16 = const()[name = string("const_187_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_5286_cast_fp16 = mul(x = var_5284_cast_fp16_1, y = const_187_promoted_to_fp16)[name = string("op_5286_cast_fp16")]; int32 var_5288 = const()[name = string("op_5288"), val = int32(-2)]; bool var_5289_interleave_0 = const()[name = string("op_5289_interleave_0"), val = bool(false)]; tensor var_5289_cast_fp16 = concat(axis = var_5288, interleave = var_5289_interleave_0, values = (var_5286_cast_fp16, var_5284_cast_fp16_0))[name = string("op_5289_cast_fp16")]; tensor var_5290_cast_fp16 = mul(x = var_5289_cast_fp16, y = sin_1_cast_fp16)[name = string("op_5290_cast_fp16")]; tensor query_states_75_cast_fp16 = add(x = var_5283_cast_fp16, y = var_5290_cast_fp16)[name = string("query_states_75_cast_fp16")]; tensor embed_75_cast_fp16 = transpose(perm = embed_75_perm_0, x = var_5272_cast_fp16)[name = string("transpose_17")]; tensor var_5293_cast_fp16 = mul(x = embed_75_cast_fp16, y = cos_cast_fp16)[name = string("op_5293_cast_fp16")]; tensor var_5294_split_sizes_0 = const()[name = string("op_5294_split_sizes_0"), val = tensor([32, 32])]; int32 var_5294_axis_0 = const()[name = string("op_5294_axis_0"), val = int32(-1)]; tensor var_5294_cast_fp16_0, tensor var_5294_cast_fp16_1 = split(axis = var_5294_axis_0, split_sizes = var_5294_split_sizes_0, x = embed_75_cast_fp16)[name = string("op_5294_cast_fp16")]; fp16 const_188_promoted_to_fp16 = const()[name = string("const_188_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_5296_cast_fp16 = mul(x = var_5294_cast_fp16_1, y = const_188_promoted_to_fp16)[name = string("op_5296_cast_fp16")]; int32 var_5298 = const()[name = string("op_5298"), val = int32(-1)]; bool var_5299_interleave_0 = const()[name = string("op_5299_interleave_0"), val = bool(false)]; tensor var_5299_cast_fp16 = concat(axis = var_5298, interleave = var_5299_interleave_0, values = (var_5296_cast_fp16, var_5294_cast_fp16_0))[name = string("op_5299_cast_fp16")]; tensor var_5300_cast_fp16 = mul(x = var_5299_cast_fp16, y = sin_cast_fp16)[name = string("op_5300_cast_fp16")]; tensor key_states_75_cast_fp16 = add(x = var_5293_cast_fp16, y = var_5300_cast_fp16)[name = string("key_states_75_cast_fp16")]; tensor expand_dims_181 = const()[name = string("expand_dims_181"), val = tensor([18])]; tensor expand_dims_182 = const()[name = string("expand_dims_182"), val = tensor([0])]; tensor expand_dims_184 = const()[name = string("expand_dims_184"), val = tensor([19])]; tensor concat_147_values3_0 = const()[name = string("concat_147_values3_0"), val = tensor([0])]; int32 concat_147_axis_0 = const()[name = string("concat_147_axis_0"), val = int32(0)]; bool concat_147_interleave_0 = const()[name = string("concat_147_interleave_0"), val = bool(false)]; tensor concat_147 = concat(axis = concat_147_axis_0, interleave = concat_147_interleave_0, values = (expand_dims_181, expand_dims_182, position_id, concat_147_values3_0))[name = string("concat_147")]; tensor concat_148_values1_0 = const()[name = string("concat_148_values1_0"), val = tensor([0])]; tensor concat_148_values3_0 = const()[name = string("concat_148_values3_0"), val = tensor([0])]; int32 concat_148_axis_0 = const()[name = string("concat_148_axis_0"), val = int32(0)]; bool concat_148_interleave_0 = const()[name = string("concat_148_interleave_0"), val = bool(false)]; tensor concat_148 = concat(axis = concat_148_axis_0, interleave = concat_148_interleave_0, values = (expand_dims_184, concat_148_values1_0, var_426, concat_148_values3_0))[name = string("concat_148")]; tensor key_cache_internal_tensor_assign_19_stride_0 = const()[name = string("key_cache_internal_tensor_assign_19_stride_0"), val = tensor([1, 1, 1, 1])]; tensor key_cache_internal_tensor_assign_19_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_19_begin_mask_0"), val = tensor([false, false, false, true])]; tensor key_cache_internal_tensor_assign_19_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_19_end_mask_0"), val = tensor([false, true, false, true])]; tensor key_cache_internal_tensor_assign_19_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_19_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor key_cache_internal_tensor_assign_19_cast_fp16 = slice_update(begin = concat_147, begin_mask = key_cache_internal_tensor_assign_19_begin_mask_0, end = concat_148, end_mask = key_cache_internal_tensor_assign_19_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_19_squeeze_mask_0, stride = key_cache_internal_tensor_assign_19_stride_0, update = key_states_75_cast_fp16, x = coreml_update_state_82)[name = string("key_cache_internal_tensor_assign_19_cast_fp16")]; write_state(data = key_cache_internal_tensor_assign_19_cast_fp16, input = key_cache)[name = string("coreml_update_state_250_write_state")]; tensor coreml_update_state_84 = read_state(input = key_cache)[name = string("coreml_update_state_250")]; tensor value_cache_internal_tensor_assign_19_stride_0 = const()[name = string("value_cache_internal_tensor_assign_19_stride_0"), val = tensor([1, 1, 1, 1])]; tensor value_cache_internal_tensor_assign_19_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_19_begin_mask_0"), val = tensor([false, false, false, true])]; tensor value_cache_internal_tensor_assign_19_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_19_end_mask_0"), val = tensor([false, true, false, true])]; tensor value_cache_internal_tensor_assign_19_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_19_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor value_states_75_cast_fp16 = transpose(perm = value_states_75_perm_0, x = var_5279_cast_fp16)[name = string("transpose_16")]; tensor value_cache_internal_tensor_assign_19_cast_fp16 = slice_update(begin = concat_147, begin_mask = value_cache_internal_tensor_assign_19_begin_mask_0, end = concat_148, end_mask = value_cache_internal_tensor_assign_19_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_19_squeeze_mask_0, stride = value_cache_internal_tensor_assign_19_stride_0, update = value_states_75_cast_fp16, x = coreml_update_state_83)[name = string("value_cache_internal_tensor_assign_19_cast_fp16")]; write_state(data = value_cache_internal_tensor_assign_19_cast_fp16, input = value_cache)[name = string("coreml_update_state_251_write_state")]; tensor coreml_update_state_85 = read_state(input = value_cache)[name = string("coreml_update_state_251")]; tensor var_5343_begin_0 = const()[name = string("op_5343_begin_0"), val = tensor([18, 0, 0, 0])]; tensor var_5343_end_0 = const()[name = string("op_5343_end_0"), val = tensor([19, 2, 2048, 64])]; tensor var_5343_end_mask_0 = const()[name = string("op_5343_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_5343_cast_fp16 = slice_by_index(begin = var_5343_begin_0, end = var_5343_end_0, end_mask = var_5343_end_mask_0, x = coreml_update_state_84)[name = string("op_5343_cast_fp16")]; tensor tile_36 = const()[name = string("tile_36"), val = tensor([1, 1])]; int32 var_5346_axis_0 = const()[name = string("op_5346_axis_0"), val = int32(1)]; tensor var_5346_cast_fp16_0, tensor var_5346_cast_fp16_1 = split(axis = var_5346_axis_0, split_sizes = tile_36, x = var_5343_cast_fp16)[name = string("op_5346_cast_fp16")]; tensor var_5353_begin_0 = const()[name = string("op_5353_begin_0"), val = tensor([18, 0, 0, 0])]; tensor var_5353_end_0 = const()[name = string("op_5353_end_0"), val = tensor([19, 2, 2048, 64])]; tensor var_5353_end_mask_0 = const()[name = string("op_5353_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_5353_cast_fp16 = slice_by_index(begin = var_5353_begin_0, end = var_5353_end_0, end_mask = var_5353_end_mask_0, x = coreml_update_state_85)[name = string("op_5353_cast_fp16")]; tensor tile_37 = const()[name = string("tile_37"), val = tensor([1, 1])]; int32 var_5356_axis_0 = const()[name = string("op_5356_axis_0"), val = int32(1)]; tensor var_5356_cast_fp16_0, tensor var_5356_cast_fp16_1 = split(axis = var_5356_axis_0, split_sizes = tile_37, x = var_5353_cast_fp16)[name = string("op_5356_cast_fp16")]; tensor var_5359_split_sizes_0 = const()[name = string("op_5359_split_sizes_0"), val = tensor([8, 8])]; int32 var_5359_axis_0 = const()[name = string("op_5359_axis_0"), val = int32(1)]; tensor var_5359_cast_fp16_0, tensor var_5359_cast_fp16_1 = split(axis = var_5359_axis_0, split_sizes = var_5359_split_sizes_0, x = query_states_75_cast_fp16)[name = string("op_5359_cast_fp16")]; bool attn_weights_289_transpose_x_0 = const()[name = string("attn_weights_289_transpose_x_0"), val = bool(false)]; bool attn_weights_289_transpose_y_0 = const()[name = string("attn_weights_289_transpose_y_0"), val = bool(false)]; tensor attn_weights_289_cast_fp16 = matmul(transpose_x = attn_weights_289_transpose_x_0, transpose_y = attn_weights_289_transpose_y_0, x = var_5346_cast_fp16_0, y = var_5359_cast_fp16_0)[name = string("attn_weights_289_cast_fp16")]; fp16 _inversed_attn_weights_291_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_291_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_291_cast_fp16 = mul(x = attn_weights_289_cast_fp16, y = _inversed_attn_weights_291_y_0_to_fp16)[name = string("_inversed_attn_weights_291_cast_fp16")]; tensor attn_weights_293_cast_fp16 = add(x = _inversed_attn_weights_291_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_293_cast_fp16")]; int32 var_5366 = const()[name = string("op_5366"), val = int32(2)]; tensor attn_weights_295_cast_fp16 = softmax(axis = var_5366, x = attn_weights_293_cast_fp16)[name = string("attn_weights_295_cast_fp16")]; bool var_5372_transpose_x_1 = const()[name = string("op_5372_transpose_x_1"), val = bool(true)]; bool var_5372_transpose_y_1 = const()[name = string("op_5372_transpose_y_1"), val = bool(false)]; tensor var_5372_cast_fp16 = matmul(transpose_x = var_5372_transpose_x_1, transpose_y = var_5372_transpose_y_1, x = attn_weights_295_cast_fp16, y = var_5356_cast_fp16_0)[name = string("op_5372_cast_fp16")]; bool attn_weights_297_transpose_x_0 = const()[name = string("attn_weights_297_transpose_x_0"), val = bool(false)]; bool attn_weights_297_transpose_y_0 = const()[name = string("attn_weights_297_transpose_y_0"), val = bool(false)]; tensor attn_weights_297_cast_fp16 = matmul(transpose_x = attn_weights_297_transpose_x_0, transpose_y = attn_weights_297_transpose_y_0, x = var_5346_cast_fp16_1, y = var_5359_cast_fp16_1)[name = string("attn_weights_297_cast_fp16")]; fp16 _inversed_attn_weights_299_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_299_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_299_cast_fp16 = mul(x = attn_weights_297_cast_fp16, y = _inversed_attn_weights_299_y_0_to_fp16)[name = string("_inversed_attn_weights_299_cast_fp16")]; tensor attn_weights_301_cast_fp16 = add(x = _inversed_attn_weights_299_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_301_cast_fp16")]; int32 var_5378 = const()[name = string("op_5378"), val = int32(2)]; tensor attn_weights_303_cast_fp16 = softmax(axis = var_5378, x = attn_weights_301_cast_fp16)[name = string("attn_weights_303_cast_fp16")]; bool attn_output_109_transpose_x_1 = const()[name = string("attn_output_109_transpose_x_1"), val = bool(true)]; bool attn_output_109_transpose_y_1 = const()[name = string("attn_output_109_transpose_y_1"), val = bool(false)]; tensor attn_output_109_cast_fp16 = matmul(transpose_x = attn_output_109_transpose_x_1, transpose_y = attn_output_109_transpose_y_1, x = attn_weights_303_cast_fp16, y = var_5356_cast_fp16_1)[name = string("attn_output_109_cast_fp16")]; int32 var_5386 = const()[name = string("op_5386"), val = int32(1)]; bool attn_output_111_interleave_0 = const()[name = string("attn_output_111_interleave_0"), val = bool(false)]; tensor attn_output_111_cast_fp16 = concat(axis = var_5386, interleave = attn_output_111_interleave_0, values = (var_5372_cast_fp16, attn_output_109_cast_fp16))[name = string("attn_output_111_cast_fp16")]; tensor var_5390_perm_0 = const()[name = string("op_5390_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_5395 = const()[name = string("op_5395"), val = tensor([1, 1024, 1, 8])]; tensor var_5390_cast_fp16 = transpose(perm = var_5390_perm_0, x = attn_output_111_cast_fp16)[name = string("transpose_15")]; tensor x_329_cast_fp16 = reshape(shape = var_5395, x = var_5390_cast_fp16)[name = string("x_329_cast_fp16")]; string hidden_states_111_pad_type_0 = const()[name = string("hidden_states_111_pad_type_0"), val = string("valid")]; tensor hidden_states_111_strides_0 = const()[name = string("hidden_states_111_strides_0"), val = tensor([1, 1])]; tensor hidden_states_111_pad_0 = const()[name = string("hidden_states_111_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_111_dilations_0 = const()[name = string("hidden_states_111_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_111_groups_0 = const()[name = string("hidden_states_111_groups_0"), val = int32(1)]; tensor var_5402_to_fp16 = const()[name = string("op_5402_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(549104320)))]; tensor hidden_states_111_cast_fp16 = conv(dilations = hidden_states_111_dilations_0, groups = hidden_states_111_groups_0, pad = hidden_states_111_pad_0, pad_type = hidden_states_111_pad_type_0, strides = hidden_states_111_strides_0, weight = var_5402_to_fp16, x = x_329_cast_fp16)[name = string("hidden_states_111_cast_fp16")]; tensor x_331_cast_fp16 = add(x = x_323_cast_fp16, y = hidden_states_111_cast_fp16)[name = string("x_331_cast_fp16")]; int32 var_5414 = const()[name = string("op_5414"), val = int32(1)]; fp16 const_193_promoted_to_fp16 = const()[name = string("const_193_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_5417_cast_fp16 = mul(x = x_331_cast_fp16, y = const_193_promoted_to_fp16)[name = string("op_5417_cast_fp16")]; bool x_333_interleave_0 = const()[name = string("x_333_interleave_0"), val = bool(false)]; tensor x_333_cast_fp16 = concat(axis = var_5414, interleave = x_333_interleave_0, values = (x_331_cast_fp16, var_5417_cast_fp16))[name = string("x_333_cast_fp16")]; tensor out_223_axes_0 = const()[name = string("out_223_axes_0"), val = tensor([1])]; fp16 var_5427_to_fp16 = const()[name = string("op_5427_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_223_cast_fp16 = layer_norm(axes = out_223_axes_0, epsilon = var_5427_to_fp16, x = x_333_cast_fp16)[name = string("out_223_cast_fp16")]; tensor layer_layers_18_post_attention_layernorm_weight_to_fp16 = const()[name = string("layer_layers_18_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(551201536)))]; tensor out_225_cast_fp16 = mul(x = out_223_cast_fp16, y = layer_layers_18_post_attention_layernorm_weight_to_fp16)[name = string("out_225_cast_fp16")]; tensor var_5433_split_sizes_0 = const()[name = string("op_5433_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_5433_axis_0 = const()[name = string("op_5433_axis_0"), val = int32(1)]; tensor var_5433_cast_fp16_0, tensor var_5433_cast_fp16_1 = split(axis = var_5433_axis_0, split_sizes = var_5433_split_sizes_0, x = out_225_cast_fp16)[name = string("op_5433_cast_fp16")]; string input_37_pad_type_0 = const()[name = string("input_37_pad_type_0"), val = string("valid")]; tensor input_37_strides_0 = const()[name = string("input_37_strides_0"), val = tensor([1, 1])]; tensor input_37_pad_0 = const()[name = string("input_37_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_37_dilations_0 = const()[name = string("input_37_dilations_0"), val = tensor([1, 1])]; int32 input_37_groups_0 = const()[name = string("input_37_groups_0"), val = int32(1)]; tensor var_5438_to_fp16 = const()[name = string("op_5438_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(551205696)))]; tensor input_37_cast_fp16 = conv(dilations = input_37_dilations_0, groups = input_37_groups_0, pad = input_37_pad_0, pad_type = input_37_pad_type_0, strides = input_37_strides_0, weight = var_5438_to_fp16, x = var_5433_cast_fp16_0)[name = string("input_37_cast_fp16")]; tensor var_5449_cast_fp16 = silu(x = input_37_cast_fp16)[name = string("op_5449_cast_fp16")]; string var_5454_pad_type_0 = const()[name = string("op_5454_pad_type_0"), val = string("valid")]; tensor var_5454_strides_0 = const()[name = string("op_5454_strides_0"), val = tensor([1, 1])]; tensor var_5454_pad_0 = const()[name = string("op_5454_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_5454_dilations_0 = const()[name = string("op_5454_dilations_0"), val = tensor([1, 1])]; int32 var_5454_groups_0 = const()[name = string("op_5454_groups_0"), val = int32(1)]; tensor var_5437_to_fp16 = const()[name = string("op_5437_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(559594368)))]; tensor var_5454_cast_fp16 = conv(dilations = var_5454_dilations_0, groups = var_5454_groups_0, pad = var_5454_pad_0, pad_type = var_5454_pad_type_0, strides = var_5454_strides_0, weight = var_5437_to_fp16, x = var_5433_cast_fp16_0)[name = string("op_5454_cast_fp16")]; tensor x_339_cast_fp16 = mul(x = var_5449_cast_fp16, y = var_5454_cast_fp16)[name = string("x_339_cast_fp16")]; string hidden_states_113_pad_type_0 = const()[name = string("hidden_states_113_pad_type_0"), val = string("valid")]; tensor hidden_states_113_strides_0 = const()[name = string("hidden_states_113_strides_0"), val = tensor([1, 1])]; tensor hidden_states_113_pad_0 = const()[name = string("hidden_states_113_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_113_dilations_0 = const()[name = string("hidden_states_113_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_113_groups_0 = const()[name = string("hidden_states_113_groups_0"), val = int32(1)]; tensor var_5436_to_fp16 = const()[name = string("op_5436_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(567983040)))]; tensor hidden_states_113_cast_fp16 = conv(dilations = hidden_states_113_dilations_0, groups = hidden_states_113_groups_0, pad = hidden_states_113_pad_0, pad_type = hidden_states_113_pad_type_0, strides = hidden_states_113_strides_0, weight = var_5436_to_fp16, x = x_339_cast_fp16)[name = string("hidden_states_113_cast_fp16")]; tensor x_341_cast_fp16 = add(x = x_331_cast_fp16, y = hidden_states_113_cast_fp16)[name = string("x_341_cast_fp16")]; int32 var_5467 = const()[name = string("op_5467"), val = int32(1)]; fp16 const_194_promoted_to_fp16 = const()[name = string("const_194_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_5470_cast_fp16 = mul(x = x_341_cast_fp16, y = const_194_promoted_to_fp16)[name = string("op_5470_cast_fp16")]; bool x_343_interleave_0 = const()[name = string("x_343_interleave_0"), val = bool(false)]; tensor x_343_cast_fp16 = concat(axis = var_5467, interleave = x_343_interleave_0, values = (x_341_cast_fp16, var_5470_cast_fp16))[name = string("x_343_cast_fp16")]; tensor out_229_axes_0 = const()[name = string("out_229_axes_0"), val = tensor([1])]; fp16 var_5480_to_fp16 = const()[name = string("op_5480_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_229_cast_fp16 = layer_norm(axes = out_229_axes_0, epsilon = var_5480_to_fp16, x = x_343_cast_fp16)[name = string("out_229_cast_fp16")]; tensor layer_layers_19_input_layernorm_weight_to_fp16 = const()[name = string("layer_layers_19_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(576371712)))]; tensor out_231_cast_fp16 = mul(x = out_229_cast_fp16, y = layer_layers_19_input_layernorm_weight_to_fp16)[name = string("out_231_cast_fp16")]; tensor var_5486_split_sizes_0 = const()[name = string("op_5486_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_5486_axis_0 = const()[name = string("op_5486_axis_0"), val = int32(1)]; tensor var_5486_cast_fp16_0, tensor var_5486_cast_fp16_1 = split(axis = var_5486_axis_0, split_sizes = var_5486_split_sizes_0, x = out_231_cast_fp16)[name = string("op_5486_cast_fp16")]; string query_states_77_pad_type_0 = const()[name = string("query_states_77_pad_type_0"), val = string("valid")]; tensor query_states_77_strides_0 = const()[name = string("query_states_77_strides_0"), val = tensor([1, 1])]; tensor query_states_77_pad_0 = const()[name = string("query_states_77_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_states_77_dilations_0 = const()[name = string("query_states_77_dilations_0"), val = tensor([1, 1])]; int32 query_states_77_groups_0 = const()[name = string("query_states_77_groups_0"), val = int32(1)]; tensor var_5508_to_fp16 = const()[name = string("op_5508_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(576375872)))]; tensor query_states_77_cast_fp16 = conv(dilations = query_states_77_dilations_0, groups = query_states_77_groups_0, pad = query_states_77_pad_0, pad_type = query_states_77_pad_type_0, strides = query_states_77_strides_0, weight = var_5508_to_fp16, x = var_5486_cast_fp16_0)[name = string("query_states_77_cast_fp16")]; string key_states_77_pad_type_0 = const()[name = string("key_states_77_pad_type_0"), val = string("valid")]; tensor key_states_77_strides_0 = const()[name = string("key_states_77_strides_0"), val = tensor([1, 1])]; tensor key_states_77_pad_0 = const()[name = string("key_states_77_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_states_77_dilations_0 = const()[name = string("key_states_77_dilations_0"), val = tensor([1, 1])]; int32 key_states_77_groups_0 = const()[name = string("key_states_77_groups_0"), val = int32(1)]; tensor var_5519_to_fp16 = const()[name = string("op_5519_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(578473088)))]; tensor key_states_77_cast_fp16 = conv(dilations = key_states_77_dilations_0, groups = key_states_77_groups_0, pad = key_states_77_pad_0, pad_type = key_states_77_pad_type_0, strides = key_states_77_strides_0, weight = var_5519_to_fp16, x = var_5486_cast_fp16_0)[name = string("key_states_77_cast_fp16")]; string value_states_77_pad_type_0 = const()[name = string("value_states_77_pad_type_0"), val = string("valid")]; tensor value_states_77_strides_0 = const()[name = string("value_states_77_strides_0"), val = tensor([1, 1])]; tensor value_states_77_pad_0 = const()[name = string("value_states_77_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_states_77_dilations_0 = const()[name = string("value_states_77_dilations_0"), val = tensor([1, 1])]; int32 value_states_77_groups_0 = const()[name = string("value_states_77_groups_0"), val = int32(1)]; tensor var_5530_to_fp16 = const()[name = string("op_5530_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(578735296)))]; tensor value_states_77_cast_fp16 = conv(dilations = value_states_77_dilations_0, groups = value_states_77_groups_0, pad = value_states_77_pad_0, pad_type = value_states_77_pad_type_0, strides = value_states_77_strides_0, weight = var_5530_to_fp16, x = var_5486_cast_fp16_0)[name = string("value_states_77_cast_fp16")]; tensor var_5538 = const()[name = string("op_5538"), val = tensor([1, 16, 64, 8])]; tensor embed_77_cast_fp16 = reshape(shape = var_5538, x = query_states_77_cast_fp16)[name = string("embed_77_cast_fp16")]; tensor var_5542 = const()[name = string("op_5542"), val = tensor([1, 2, 64, 8])]; tensor var_5543_cast_fp16 = reshape(shape = var_5542, x = key_states_77_cast_fp16)[name = string("op_5543_cast_fp16")]; tensor embed_79_perm_0 = const()[name = string("embed_79_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_5549 = const()[name = string("op_5549"), val = tensor([1, 2, 64, 8])]; tensor var_5550_cast_fp16 = reshape(shape = var_5549, x = value_states_77_cast_fp16)[name = string("op_5550_cast_fp16")]; tensor value_states_79_perm_0 = const()[name = string("value_states_79_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_5554_cast_fp16 = mul(x = embed_77_cast_fp16, y = cos_1_cast_fp16)[name = string("op_5554_cast_fp16")]; tensor var_5555_split_sizes_0 = const()[name = string("op_5555_split_sizes_0"), val = tensor([32, 32])]; int32 var_5555_axis_0 = const()[name = string("op_5555_axis_0"), val = int32(-2)]; tensor var_5555_cast_fp16_0, tensor var_5555_cast_fp16_1 = split(axis = var_5555_axis_0, split_sizes = var_5555_split_sizes_0, x = embed_77_cast_fp16)[name = string("op_5555_cast_fp16")]; fp16 const_197_promoted_to_fp16 = const()[name = string("const_197_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_5557_cast_fp16 = mul(x = var_5555_cast_fp16_1, y = const_197_promoted_to_fp16)[name = string("op_5557_cast_fp16")]; int32 var_5559 = const()[name = string("op_5559"), val = int32(-2)]; bool var_5560_interleave_0 = const()[name = string("op_5560_interleave_0"), val = bool(false)]; tensor var_5560_cast_fp16 = concat(axis = var_5559, interleave = var_5560_interleave_0, values = (var_5557_cast_fp16, var_5555_cast_fp16_0))[name = string("op_5560_cast_fp16")]; tensor var_5561_cast_fp16 = mul(x = var_5560_cast_fp16, y = sin_1_cast_fp16)[name = string("op_5561_cast_fp16")]; tensor query_states_79_cast_fp16 = add(x = var_5554_cast_fp16, y = var_5561_cast_fp16)[name = string("query_states_79_cast_fp16")]; tensor embed_79_cast_fp16 = transpose(perm = embed_79_perm_0, x = var_5543_cast_fp16)[name = string("transpose_14")]; tensor var_5564_cast_fp16 = mul(x = embed_79_cast_fp16, y = cos_cast_fp16)[name = string("op_5564_cast_fp16")]; tensor var_5565_split_sizes_0 = const()[name = string("op_5565_split_sizes_0"), val = tensor([32, 32])]; int32 var_5565_axis_0 = const()[name = string("op_5565_axis_0"), val = int32(-1)]; tensor var_5565_cast_fp16_0, tensor var_5565_cast_fp16_1 = split(axis = var_5565_axis_0, split_sizes = var_5565_split_sizes_0, x = embed_79_cast_fp16)[name = string("op_5565_cast_fp16")]; fp16 const_198_promoted_to_fp16 = const()[name = string("const_198_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_5567_cast_fp16 = mul(x = var_5565_cast_fp16_1, y = const_198_promoted_to_fp16)[name = string("op_5567_cast_fp16")]; int32 var_5569 = const()[name = string("op_5569"), val = int32(-1)]; bool var_5570_interleave_0 = const()[name = string("op_5570_interleave_0"), val = bool(false)]; tensor var_5570_cast_fp16 = concat(axis = var_5569, interleave = var_5570_interleave_0, values = (var_5567_cast_fp16, var_5565_cast_fp16_0))[name = string("op_5570_cast_fp16")]; tensor var_5571_cast_fp16 = mul(x = var_5570_cast_fp16, y = sin_cast_fp16)[name = string("op_5571_cast_fp16")]; tensor key_states_79_cast_fp16 = add(x = var_5564_cast_fp16, y = var_5571_cast_fp16)[name = string("key_states_79_cast_fp16")]; tensor expand_dims_191 = const()[name = string("expand_dims_191"), val = tensor([19])]; tensor expand_dims_192 = const()[name = string("expand_dims_192"), val = tensor([0])]; tensor expand_dims_194 = const()[name = string("expand_dims_194"), val = tensor([20])]; tensor concat_155_values3_0 = const()[name = string("concat_155_values3_0"), val = tensor([0])]; int32 concat_155_axis_0 = const()[name = string("concat_155_axis_0"), val = int32(0)]; bool concat_155_interleave_0 = const()[name = string("concat_155_interleave_0"), val = bool(false)]; tensor concat_155 = concat(axis = concat_155_axis_0, interleave = concat_155_interleave_0, values = (expand_dims_191, expand_dims_192, position_id, concat_155_values3_0))[name = string("concat_155")]; tensor concat_156_values1_0 = const()[name = string("concat_156_values1_0"), val = tensor([0])]; tensor concat_156_values3_0 = const()[name = string("concat_156_values3_0"), val = tensor([0])]; int32 concat_156_axis_0 = const()[name = string("concat_156_axis_0"), val = int32(0)]; bool concat_156_interleave_0 = const()[name = string("concat_156_interleave_0"), val = bool(false)]; tensor concat_156 = concat(axis = concat_156_axis_0, interleave = concat_156_interleave_0, values = (expand_dims_194, concat_156_values1_0, var_426, concat_156_values3_0))[name = string("concat_156")]; tensor key_cache_internal_tensor_assign_20_stride_0 = const()[name = string("key_cache_internal_tensor_assign_20_stride_0"), val = tensor([1, 1, 1, 1])]; tensor key_cache_internal_tensor_assign_20_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_20_begin_mask_0"), val = tensor([false, false, false, true])]; tensor key_cache_internal_tensor_assign_20_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_20_end_mask_0"), val = tensor([false, true, false, true])]; tensor key_cache_internal_tensor_assign_20_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_20_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor key_cache_internal_tensor_assign_20_cast_fp16 = slice_update(begin = concat_155, begin_mask = key_cache_internal_tensor_assign_20_begin_mask_0, end = concat_156, end_mask = key_cache_internal_tensor_assign_20_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_20_squeeze_mask_0, stride = key_cache_internal_tensor_assign_20_stride_0, update = key_states_79_cast_fp16, x = coreml_update_state_84)[name = string("key_cache_internal_tensor_assign_20_cast_fp16")]; write_state(data = key_cache_internal_tensor_assign_20_cast_fp16, input = key_cache)[name = string("coreml_update_state_252_write_state")]; tensor coreml_update_state_86 = read_state(input = key_cache)[name = string("coreml_update_state_252")]; tensor value_cache_internal_tensor_assign_20_stride_0 = const()[name = string("value_cache_internal_tensor_assign_20_stride_0"), val = tensor([1, 1, 1, 1])]; tensor value_cache_internal_tensor_assign_20_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_20_begin_mask_0"), val = tensor([false, false, false, true])]; tensor value_cache_internal_tensor_assign_20_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_20_end_mask_0"), val = tensor([false, true, false, true])]; tensor value_cache_internal_tensor_assign_20_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_20_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor value_states_79_cast_fp16 = transpose(perm = value_states_79_perm_0, x = var_5550_cast_fp16)[name = string("transpose_13")]; tensor value_cache_internal_tensor_assign_20_cast_fp16 = slice_update(begin = concat_155, begin_mask = value_cache_internal_tensor_assign_20_begin_mask_0, end = concat_156, end_mask = value_cache_internal_tensor_assign_20_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_20_squeeze_mask_0, stride = value_cache_internal_tensor_assign_20_stride_0, update = value_states_79_cast_fp16, x = coreml_update_state_85)[name = string("value_cache_internal_tensor_assign_20_cast_fp16")]; write_state(data = value_cache_internal_tensor_assign_20_cast_fp16, input = value_cache)[name = string("coreml_update_state_253_write_state")]; tensor coreml_update_state_87 = read_state(input = value_cache)[name = string("coreml_update_state_253")]; tensor var_5614_begin_0 = const()[name = string("op_5614_begin_0"), val = tensor([19, 0, 0, 0])]; tensor var_5614_end_0 = const()[name = string("op_5614_end_0"), val = tensor([20, 2, 2048, 64])]; tensor var_5614_end_mask_0 = const()[name = string("op_5614_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_5614_cast_fp16 = slice_by_index(begin = var_5614_begin_0, end = var_5614_end_0, end_mask = var_5614_end_mask_0, x = coreml_update_state_86)[name = string("op_5614_cast_fp16")]; tensor tile_38 = const()[name = string("tile_38"), val = tensor([1, 1])]; int32 var_5617_axis_0 = const()[name = string("op_5617_axis_0"), val = int32(1)]; tensor var_5617_cast_fp16_0, tensor var_5617_cast_fp16_1 = split(axis = var_5617_axis_0, split_sizes = tile_38, x = var_5614_cast_fp16)[name = string("op_5617_cast_fp16")]; tensor var_5624_begin_0 = const()[name = string("op_5624_begin_0"), val = tensor([19, 0, 0, 0])]; tensor var_5624_end_0 = const()[name = string("op_5624_end_0"), val = tensor([20, 2, 2048, 64])]; tensor var_5624_end_mask_0 = const()[name = string("op_5624_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_5624_cast_fp16 = slice_by_index(begin = var_5624_begin_0, end = var_5624_end_0, end_mask = var_5624_end_mask_0, x = coreml_update_state_87)[name = string("op_5624_cast_fp16")]; tensor tile_39 = const()[name = string("tile_39"), val = tensor([1, 1])]; int32 var_5627_axis_0 = const()[name = string("op_5627_axis_0"), val = int32(1)]; tensor var_5627_cast_fp16_0, tensor var_5627_cast_fp16_1 = split(axis = var_5627_axis_0, split_sizes = tile_39, x = var_5624_cast_fp16)[name = string("op_5627_cast_fp16")]; tensor var_5630_split_sizes_0 = const()[name = string("op_5630_split_sizes_0"), val = tensor([8, 8])]; int32 var_5630_axis_0 = const()[name = string("op_5630_axis_0"), val = int32(1)]; tensor var_5630_cast_fp16_0, tensor var_5630_cast_fp16_1 = split(axis = var_5630_axis_0, split_sizes = var_5630_split_sizes_0, x = query_states_79_cast_fp16)[name = string("op_5630_cast_fp16")]; bool attn_weights_305_transpose_x_0 = const()[name = string("attn_weights_305_transpose_x_0"), val = bool(false)]; bool attn_weights_305_transpose_y_0 = const()[name = string("attn_weights_305_transpose_y_0"), val = bool(false)]; tensor attn_weights_305_cast_fp16 = matmul(transpose_x = attn_weights_305_transpose_x_0, transpose_y = attn_weights_305_transpose_y_0, x = var_5617_cast_fp16_0, y = var_5630_cast_fp16_0)[name = string("attn_weights_305_cast_fp16")]; fp16 _inversed_attn_weights_307_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_307_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_307_cast_fp16 = mul(x = attn_weights_305_cast_fp16, y = _inversed_attn_weights_307_y_0_to_fp16)[name = string("_inversed_attn_weights_307_cast_fp16")]; tensor attn_weights_309_cast_fp16 = add(x = _inversed_attn_weights_307_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_309_cast_fp16")]; int32 var_5637 = const()[name = string("op_5637"), val = int32(2)]; tensor attn_weights_311_cast_fp16 = softmax(axis = var_5637, x = attn_weights_309_cast_fp16)[name = string("attn_weights_311_cast_fp16")]; bool var_5643_transpose_x_1 = const()[name = string("op_5643_transpose_x_1"), val = bool(true)]; bool var_5643_transpose_y_1 = const()[name = string("op_5643_transpose_y_1"), val = bool(false)]; tensor var_5643_cast_fp16 = matmul(transpose_x = var_5643_transpose_x_1, transpose_y = var_5643_transpose_y_1, x = attn_weights_311_cast_fp16, y = var_5627_cast_fp16_0)[name = string("op_5643_cast_fp16")]; bool attn_weights_313_transpose_x_0 = const()[name = string("attn_weights_313_transpose_x_0"), val = bool(false)]; bool attn_weights_313_transpose_y_0 = const()[name = string("attn_weights_313_transpose_y_0"), val = bool(false)]; tensor attn_weights_313_cast_fp16 = matmul(transpose_x = attn_weights_313_transpose_x_0, transpose_y = attn_weights_313_transpose_y_0, x = var_5617_cast_fp16_1, y = var_5630_cast_fp16_1)[name = string("attn_weights_313_cast_fp16")]; fp16 _inversed_attn_weights_315_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_315_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_315_cast_fp16 = mul(x = attn_weights_313_cast_fp16, y = _inversed_attn_weights_315_y_0_to_fp16)[name = string("_inversed_attn_weights_315_cast_fp16")]; tensor attn_weights_317_cast_fp16 = add(x = _inversed_attn_weights_315_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_317_cast_fp16")]; int32 var_5649 = const()[name = string("op_5649"), val = int32(2)]; tensor attn_weights_319_cast_fp16 = softmax(axis = var_5649, x = attn_weights_317_cast_fp16)[name = string("attn_weights_319_cast_fp16")]; bool attn_output_115_transpose_x_1 = const()[name = string("attn_output_115_transpose_x_1"), val = bool(true)]; bool attn_output_115_transpose_y_1 = const()[name = string("attn_output_115_transpose_y_1"), val = bool(false)]; tensor attn_output_115_cast_fp16 = matmul(transpose_x = attn_output_115_transpose_x_1, transpose_y = attn_output_115_transpose_y_1, x = attn_weights_319_cast_fp16, y = var_5627_cast_fp16_1)[name = string("attn_output_115_cast_fp16")]; int32 var_5657 = const()[name = string("op_5657"), val = int32(1)]; bool attn_output_117_interleave_0 = const()[name = string("attn_output_117_interleave_0"), val = bool(false)]; tensor attn_output_117_cast_fp16 = concat(axis = var_5657, interleave = attn_output_117_interleave_0, values = (var_5643_cast_fp16, attn_output_115_cast_fp16))[name = string("attn_output_117_cast_fp16")]; tensor var_5661_perm_0 = const()[name = string("op_5661_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_5666 = const()[name = string("op_5666"), val = tensor([1, 1024, 1, 8])]; tensor var_5661_cast_fp16 = transpose(perm = var_5661_perm_0, x = attn_output_117_cast_fp16)[name = string("transpose_12")]; tensor x_347_cast_fp16 = reshape(shape = var_5666, x = var_5661_cast_fp16)[name = string("x_347_cast_fp16")]; string hidden_states_117_pad_type_0 = const()[name = string("hidden_states_117_pad_type_0"), val = string("valid")]; tensor hidden_states_117_strides_0 = const()[name = string("hidden_states_117_strides_0"), val = tensor([1, 1])]; tensor hidden_states_117_pad_0 = const()[name = string("hidden_states_117_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_117_dilations_0 = const()[name = string("hidden_states_117_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_117_groups_0 = const()[name = string("hidden_states_117_groups_0"), val = int32(1)]; tensor var_5673_to_fp16 = const()[name = string("op_5673_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(578997504)))]; tensor hidden_states_117_cast_fp16 = conv(dilations = hidden_states_117_dilations_0, groups = hidden_states_117_groups_0, pad = hidden_states_117_pad_0, pad_type = hidden_states_117_pad_type_0, strides = hidden_states_117_strides_0, weight = var_5673_to_fp16, x = x_347_cast_fp16)[name = string("hidden_states_117_cast_fp16")]; tensor x_349_cast_fp16 = add(x = x_341_cast_fp16, y = hidden_states_117_cast_fp16)[name = string("x_349_cast_fp16")]; int32 var_5685 = const()[name = string("op_5685"), val = int32(1)]; fp16 const_203_promoted_to_fp16 = const()[name = string("const_203_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_5688_cast_fp16 = mul(x = x_349_cast_fp16, y = const_203_promoted_to_fp16)[name = string("op_5688_cast_fp16")]; bool x_351_interleave_0 = const()[name = string("x_351_interleave_0"), val = bool(false)]; tensor x_351_cast_fp16 = concat(axis = var_5685, interleave = x_351_interleave_0, values = (x_349_cast_fp16, var_5688_cast_fp16))[name = string("x_351_cast_fp16")]; tensor out_235_axes_0 = const()[name = string("out_235_axes_0"), val = tensor([1])]; fp16 var_5698_to_fp16 = const()[name = string("op_5698_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_235_cast_fp16 = layer_norm(axes = out_235_axes_0, epsilon = var_5698_to_fp16, x = x_351_cast_fp16)[name = string("out_235_cast_fp16")]; tensor layer_layers_19_post_attention_layernorm_weight_to_fp16 = const()[name = string("layer_layers_19_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(581094720)))]; tensor out_237_cast_fp16 = mul(x = out_235_cast_fp16, y = layer_layers_19_post_attention_layernorm_weight_to_fp16)[name = string("out_237_cast_fp16")]; tensor var_5704_split_sizes_0 = const()[name = string("op_5704_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_5704_axis_0 = const()[name = string("op_5704_axis_0"), val = int32(1)]; tensor var_5704_cast_fp16_0, tensor var_5704_cast_fp16_1 = split(axis = var_5704_axis_0, split_sizes = var_5704_split_sizes_0, x = out_237_cast_fp16)[name = string("op_5704_cast_fp16")]; string input_39_pad_type_0 = const()[name = string("input_39_pad_type_0"), val = string("valid")]; tensor input_39_strides_0 = const()[name = string("input_39_strides_0"), val = tensor([1, 1])]; tensor input_39_pad_0 = const()[name = string("input_39_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_39_dilations_0 = const()[name = string("input_39_dilations_0"), val = tensor([1, 1])]; int32 input_39_groups_0 = const()[name = string("input_39_groups_0"), val = int32(1)]; tensor var_5709_to_fp16 = const()[name = string("op_5709_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(581098880)))]; tensor input_39_cast_fp16 = conv(dilations = input_39_dilations_0, groups = input_39_groups_0, pad = input_39_pad_0, pad_type = input_39_pad_type_0, strides = input_39_strides_0, weight = var_5709_to_fp16, x = var_5704_cast_fp16_0)[name = string("input_39_cast_fp16")]; tensor var_5720_cast_fp16 = silu(x = input_39_cast_fp16)[name = string("op_5720_cast_fp16")]; string var_5725_pad_type_0 = const()[name = string("op_5725_pad_type_0"), val = string("valid")]; tensor var_5725_strides_0 = const()[name = string("op_5725_strides_0"), val = tensor([1, 1])]; tensor var_5725_pad_0 = const()[name = string("op_5725_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_5725_dilations_0 = const()[name = string("op_5725_dilations_0"), val = tensor([1, 1])]; int32 var_5725_groups_0 = const()[name = string("op_5725_groups_0"), val = int32(1)]; tensor var_5708_to_fp16 = const()[name = string("op_5708_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(589487552)))]; tensor var_5725_cast_fp16 = conv(dilations = var_5725_dilations_0, groups = var_5725_groups_0, pad = var_5725_pad_0, pad_type = var_5725_pad_type_0, strides = var_5725_strides_0, weight = var_5708_to_fp16, x = var_5704_cast_fp16_0)[name = string("op_5725_cast_fp16")]; tensor x_357_cast_fp16 = mul(x = var_5720_cast_fp16, y = var_5725_cast_fp16)[name = string("x_357_cast_fp16")]; string hidden_states_119_pad_type_0 = const()[name = string("hidden_states_119_pad_type_0"), val = string("valid")]; tensor hidden_states_119_strides_0 = const()[name = string("hidden_states_119_strides_0"), val = tensor([1, 1])]; tensor hidden_states_119_pad_0 = const()[name = string("hidden_states_119_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_119_dilations_0 = const()[name = string("hidden_states_119_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_119_groups_0 = const()[name = string("hidden_states_119_groups_0"), val = int32(1)]; tensor var_5707_to_fp16 = const()[name = string("op_5707_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(597876224)))]; tensor hidden_states_119_cast_fp16 = conv(dilations = hidden_states_119_dilations_0, groups = hidden_states_119_groups_0, pad = hidden_states_119_pad_0, pad_type = hidden_states_119_pad_type_0, strides = hidden_states_119_strides_0, weight = var_5707_to_fp16, x = x_357_cast_fp16)[name = string("hidden_states_119_cast_fp16")]; tensor x_359_cast_fp16 = add(x = x_349_cast_fp16, y = hidden_states_119_cast_fp16)[name = string("x_359_cast_fp16")]; int32 var_5738 = const()[name = string("op_5738"), val = int32(1)]; fp16 const_204_promoted_to_fp16 = const()[name = string("const_204_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_5741_cast_fp16 = mul(x = x_359_cast_fp16, y = const_204_promoted_to_fp16)[name = string("op_5741_cast_fp16")]; bool x_361_interleave_0 = const()[name = string("x_361_interleave_0"), val = bool(false)]; tensor x_361_cast_fp16 = concat(axis = var_5738, interleave = x_361_interleave_0, values = (x_359_cast_fp16, var_5741_cast_fp16))[name = string("x_361_cast_fp16")]; tensor out_241_axes_0 = const()[name = string("out_241_axes_0"), val = tensor([1])]; fp16 var_5751_to_fp16 = const()[name = string("op_5751_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_241_cast_fp16 = layer_norm(axes = out_241_axes_0, epsilon = var_5751_to_fp16, x = x_361_cast_fp16)[name = string("out_241_cast_fp16")]; tensor layer_layers_20_input_layernorm_weight_to_fp16 = const()[name = string("layer_layers_20_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(606264896)))]; tensor out_243_cast_fp16 = mul(x = out_241_cast_fp16, y = layer_layers_20_input_layernorm_weight_to_fp16)[name = string("out_243_cast_fp16")]; tensor var_5757_split_sizes_0 = const()[name = string("op_5757_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_5757_axis_0 = const()[name = string("op_5757_axis_0"), val = int32(1)]; tensor var_5757_cast_fp16_0, tensor var_5757_cast_fp16_1 = split(axis = var_5757_axis_0, split_sizes = var_5757_split_sizes_0, x = out_243_cast_fp16)[name = string("op_5757_cast_fp16")]; string query_states_81_pad_type_0 = const()[name = string("query_states_81_pad_type_0"), val = string("valid")]; tensor query_states_81_strides_0 = const()[name = string("query_states_81_strides_0"), val = tensor([1, 1])]; tensor query_states_81_pad_0 = const()[name = string("query_states_81_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_states_81_dilations_0 = const()[name = string("query_states_81_dilations_0"), val = tensor([1, 1])]; int32 query_states_81_groups_0 = const()[name = string("query_states_81_groups_0"), val = int32(1)]; tensor var_5779_to_fp16 = const()[name = string("op_5779_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(606269056)))]; tensor query_states_81_cast_fp16 = conv(dilations = query_states_81_dilations_0, groups = query_states_81_groups_0, pad = query_states_81_pad_0, pad_type = query_states_81_pad_type_0, strides = query_states_81_strides_0, weight = var_5779_to_fp16, x = var_5757_cast_fp16_0)[name = string("query_states_81_cast_fp16")]; string key_states_81_pad_type_0 = const()[name = string("key_states_81_pad_type_0"), val = string("valid")]; tensor key_states_81_strides_0 = const()[name = string("key_states_81_strides_0"), val = tensor([1, 1])]; tensor key_states_81_pad_0 = const()[name = string("key_states_81_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_states_81_dilations_0 = const()[name = string("key_states_81_dilations_0"), val = tensor([1, 1])]; int32 key_states_81_groups_0 = const()[name = string("key_states_81_groups_0"), val = int32(1)]; tensor var_5790_to_fp16 = const()[name = string("op_5790_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(608366272)))]; tensor key_states_81_cast_fp16 = conv(dilations = key_states_81_dilations_0, groups = key_states_81_groups_0, pad = key_states_81_pad_0, pad_type = key_states_81_pad_type_0, strides = key_states_81_strides_0, weight = var_5790_to_fp16, x = var_5757_cast_fp16_0)[name = string("key_states_81_cast_fp16")]; string value_states_81_pad_type_0 = const()[name = string("value_states_81_pad_type_0"), val = string("valid")]; tensor value_states_81_strides_0 = const()[name = string("value_states_81_strides_0"), val = tensor([1, 1])]; tensor value_states_81_pad_0 = const()[name = string("value_states_81_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_states_81_dilations_0 = const()[name = string("value_states_81_dilations_0"), val = tensor([1, 1])]; int32 value_states_81_groups_0 = const()[name = string("value_states_81_groups_0"), val = int32(1)]; tensor var_5801_to_fp16 = const()[name = string("op_5801_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(608628480)))]; tensor value_states_81_cast_fp16 = conv(dilations = value_states_81_dilations_0, groups = value_states_81_groups_0, pad = value_states_81_pad_0, pad_type = value_states_81_pad_type_0, strides = value_states_81_strides_0, weight = var_5801_to_fp16, x = var_5757_cast_fp16_0)[name = string("value_states_81_cast_fp16")]; tensor var_5809 = const()[name = string("op_5809"), val = tensor([1, 16, 64, 8])]; tensor embed_81_cast_fp16 = reshape(shape = var_5809, x = query_states_81_cast_fp16)[name = string("embed_81_cast_fp16")]; tensor var_5813 = const()[name = string("op_5813"), val = tensor([1, 2, 64, 8])]; tensor var_5814_cast_fp16 = reshape(shape = var_5813, x = key_states_81_cast_fp16)[name = string("op_5814_cast_fp16")]; tensor embed_83_perm_0 = const()[name = string("embed_83_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_5820 = const()[name = string("op_5820"), val = tensor([1, 2, 64, 8])]; tensor var_5821_cast_fp16 = reshape(shape = var_5820, x = value_states_81_cast_fp16)[name = string("op_5821_cast_fp16")]; tensor value_states_83_perm_0 = const()[name = string("value_states_83_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_5825_cast_fp16 = mul(x = embed_81_cast_fp16, y = cos_1_cast_fp16)[name = string("op_5825_cast_fp16")]; tensor var_5826_split_sizes_0 = const()[name = string("op_5826_split_sizes_0"), val = tensor([32, 32])]; int32 var_5826_axis_0 = const()[name = string("op_5826_axis_0"), val = int32(-2)]; tensor var_5826_cast_fp16_0, tensor var_5826_cast_fp16_1 = split(axis = var_5826_axis_0, split_sizes = var_5826_split_sizes_0, x = embed_81_cast_fp16)[name = string("op_5826_cast_fp16")]; fp16 const_207_promoted_to_fp16 = const()[name = string("const_207_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_5828_cast_fp16 = mul(x = var_5826_cast_fp16_1, y = const_207_promoted_to_fp16)[name = string("op_5828_cast_fp16")]; int32 var_5830 = const()[name = string("op_5830"), val = int32(-2)]; bool var_5831_interleave_0 = const()[name = string("op_5831_interleave_0"), val = bool(false)]; tensor var_5831_cast_fp16 = concat(axis = var_5830, interleave = var_5831_interleave_0, values = (var_5828_cast_fp16, var_5826_cast_fp16_0))[name = string("op_5831_cast_fp16")]; tensor var_5832_cast_fp16 = mul(x = var_5831_cast_fp16, y = sin_1_cast_fp16)[name = string("op_5832_cast_fp16")]; tensor query_states_83_cast_fp16 = add(x = var_5825_cast_fp16, y = var_5832_cast_fp16)[name = string("query_states_83_cast_fp16")]; tensor embed_83_cast_fp16 = transpose(perm = embed_83_perm_0, x = var_5814_cast_fp16)[name = string("transpose_11")]; tensor var_5835_cast_fp16 = mul(x = embed_83_cast_fp16, y = cos_cast_fp16)[name = string("op_5835_cast_fp16")]; tensor var_5836_split_sizes_0 = const()[name = string("op_5836_split_sizes_0"), val = tensor([32, 32])]; int32 var_5836_axis_0 = const()[name = string("op_5836_axis_0"), val = int32(-1)]; tensor var_5836_cast_fp16_0, tensor var_5836_cast_fp16_1 = split(axis = var_5836_axis_0, split_sizes = var_5836_split_sizes_0, x = embed_83_cast_fp16)[name = string("op_5836_cast_fp16")]; fp16 const_208_promoted_to_fp16 = const()[name = string("const_208_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_5838_cast_fp16 = mul(x = var_5836_cast_fp16_1, y = const_208_promoted_to_fp16)[name = string("op_5838_cast_fp16")]; int32 var_5840 = const()[name = string("op_5840"), val = int32(-1)]; bool var_5841_interleave_0 = const()[name = string("op_5841_interleave_0"), val = bool(false)]; tensor var_5841_cast_fp16 = concat(axis = var_5840, interleave = var_5841_interleave_0, values = (var_5838_cast_fp16, var_5836_cast_fp16_0))[name = string("op_5841_cast_fp16")]; tensor var_5842_cast_fp16 = mul(x = var_5841_cast_fp16, y = sin_cast_fp16)[name = string("op_5842_cast_fp16")]; tensor key_states_83_cast_fp16 = add(x = var_5835_cast_fp16, y = var_5842_cast_fp16)[name = string("key_states_83_cast_fp16")]; tensor expand_dims_201 = const()[name = string("expand_dims_201"), val = tensor([20])]; tensor expand_dims_202 = const()[name = string("expand_dims_202"), val = tensor([0])]; tensor expand_dims_204 = const()[name = string("expand_dims_204"), val = tensor([21])]; tensor concat_163_values3_0 = const()[name = string("concat_163_values3_0"), val = tensor([0])]; int32 concat_163_axis_0 = const()[name = string("concat_163_axis_0"), val = int32(0)]; bool concat_163_interleave_0 = const()[name = string("concat_163_interleave_0"), val = bool(false)]; tensor concat_163 = concat(axis = concat_163_axis_0, interleave = concat_163_interleave_0, values = (expand_dims_201, expand_dims_202, position_id, concat_163_values3_0))[name = string("concat_163")]; tensor concat_164_values1_0 = const()[name = string("concat_164_values1_0"), val = tensor([0])]; tensor concat_164_values3_0 = const()[name = string("concat_164_values3_0"), val = tensor([0])]; int32 concat_164_axis_0 = const()[name = string("concat_164_axis_0"), val = int32(0)]; bool concat_164_interleave_0 = const()[name = string("concat_164_interleave_0"), val = bool(false)]; tensor concat_164 = concat(axis = concat_164_axis_0, interleave = concat_164_interleave_0, values = (expand_dims_204, concat_164_values1_0, var_426, concat_164_values3_0))[name = string("concat_164")]; tensor key_cache_internal_tensor_assign_21_stride_0 = const()[name = string("key_cache_internal_tensor_assign_21_stride_0"), val = tensor([1, 1, 1, 1])]; tensor key_cache_internal_tensor_assign_21_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_21_begin_mask_0"), val = tensor([false, false, false, true])]; tensor key_cache_internal_tensor_assign_21_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_21_end_mask_0"), val = tensor([false, true, false, true])]; tensor key_cache_internal_tensor_assign_21_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_21_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor key_cache_internal_tensor_assign_21_cast_fp16 = slice_update(begin = concat_163, begin_mask = key_cache_internal_tensor_assign_21_begin_mask_0, end = concat_164, end_mask = key_cache_internal_tensor_assign_21_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_21_squeeze_mask_0, stride = key_cache_internal_tensor_assign_21_stride_0, update = key_states_83_cast_fp16, x = coreml_update_state_86)[name = string("key_cache_internal_tensor_assign_21_cast_fp16")]; write_state(data = key_cache_internal_tensor_assign_21_cast_fp16, input = key_cache)[name = string("coreml_update_state_254_write_state")]; tensor coreml_update_state_88 = read_state(input = key_cache)[name = string("coreml_update_state_254")]; tensor value_cache_internal_tensor_assign_21_stride_0 = const()[name = string("value_cache_internal_tensor_assign_21_stride_0"), val = tensor([1, 1, 1, 1])]; tensor value_cache_internal_tensor_assign_21_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_21_begin_mask_0"), val = tensor([false, false, false, true])]; tensor value_cache_internal_tensor_assign_21_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_21_end_mask_0"), val = tensor([false, true, false, true])]; tensor value_cache_internal_tensor_assign_21_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_21_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor value_states_83_cast_fp16 = transpose(perm = value_states_83_perm_0, x = var_5821_cast_fp16)[name = string("transpose_10")]; tensor value_cache_internal_tensor_assign_21_cast_fp16 = slice_update(begin = concat_163, begin_mask = value_cache_internal_tensor_assign_21_begin_mask_0, end = concat_164, end_mask = value_cache_internal_tensor_assign_21_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_21_squeeze_mask_0, stride = value_cache_internal_tensor_assign_21_stride_0, update = value_states_83_cast_fp16, x = coreml_update_state_87)[name = string("value_cache_internal_tensor_assign_21_cast_fp16")]; write_state(data = value_cache_internal_tensor_assign_21_cast_fp16, input = value_cache)[name = string("coreml_update_state_255_write_state")]; tensor coreml_update_state_89 = read_state(input = value_cache)[name = string("coreml_update_state_255")]; tensor var_5885_begin_0 = const()[name = string("op_5885_begin_0"), val = tensor([20, 0, 0, 0])]; tensor var_5885_end_0 = const()[name = string("op_5885_end_0"), val = tensor([21, 2, 2048, 64])]; tensor var_5885_end_mask_0 = const()[name = string("op_5885_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_5885_cast_fp16 = slice_by_index(begin = var_5885_begin_0, end = var_5885_end_0, end_mask = var_5885_end_mask_0, x = coreml_update_state_88)[name = string("op_5885_cast_fp16")]; tensor tile_40 = const()[name = string("tile_40"), val = tensor([1, 1])]; int32 var_5888_axis_0 = const()[name = string("op_5888_axis_0"), val = int32(1)]; tensor var_5888_cast_fp16_0, tensor var_5888_cast_fp16_1 = split(axis = var_5888_axis_0, split_sizes = tile_40, x = var_5885_cast_fp16)[name = string("op_5888_cast_fp16")]; tensor var_5895_begin_0 = const()[name = string("op_5895_begin_0"), val = tensor([20, 0, 0, 0])]; tensor var_5895_end_0 = const()[name = string("op_5895_end_0"), val = tensor([21, 2, 2048, 64])]; tensor var_5895_end_mask_0 = const()[name = string("op_5895_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_5895_cast_fp16 = slice_by_index(begin = var_5895_begin_0, end = var_5895_end_0, end_mask = var_5895_end_mask_0, x = coreml_update_state_89)[name = string("op_5895_cast_fp16")]; tensor tile_41 = const()[name = string("tile_41"), val = tensor([1, 1])]; int32 var_5898_axis_0 = const()[name = string("op_5898_axis_0"), val = int32(1)]; tensor var_5898_cast_fp16_0, tensor var_5898_cast_fp16_1 = split(axis = var_5898_axis_0, split_sizes = tile_41, x = var_5895_cast_fp16)[name = string("op_5898_cast_fp16")]; tensor var_5901_split_sizes_0 = const()[name = string("op_5901_split_sizes_0"), val = tensor([8, 8])]; int32 var_5901_axis_0 = const()[name = string("op_5901_axis_0"), val = int32(1)]; tensor var_5901_cast_fp16_0, tensor var_5901_cast_fp16_1 = split(axis = var_5901_axis_0, split_sizes = var_5901_split_sizes_0, x = query_states_83_cast_fp16)[name = string("op_5901_cast_fp16")]; bool attn_weights_321_transpose_x_0 = const()[name = string("attn_weights_321_transpose_x_0"), val = bool(false)]; bool attn_weights_321_transpose_y_0 = const()[name = string("attn_weights_321_transpose_y_0"), val = bool(false)]; tensor attn_weights_321_cast_fp16 = matmul(transpose_x = attn_weights_321_transpose_x_0, transpose_y = attn_weights_321_transpose_y_0, x = var_5888_cast_fp16_0, y = var_5901_cast_fp16_0)[name = string("attn_weights_321_cast_fp16")]; fp16 _inversed_attn_weights_323_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_323_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_323_cast_fp16 = mul(x = attn_weights_321_cast_fp16, y = _inversed_attn_weights_323_y_0_to_fp16)[name = string("_inversed_attn_weights_323_cast_fp16")]; tensor attn_weights_325_cast_fp16 = add(x = _inversed_attn_weights_323_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_325_cast_fp16")]; int32 var_5908 = const()[name = string("op_5908"), val = int32(2)]; tensor attn_weights_327_cast_fp16 = softmax(axis = var_5908, x = attn_weights_325_cast_fp16)[name = string("attn_weights_327_cast_fp16")]; bool var_5914_transpose_x_1 = const()[name = string("op_5914_transpose_x_1"), val = bool(true)]; bool var_5914_transpose_y_1 = const()[name = string("op_5914_transpose_y_1"), val = bool(false)]; tensor var_5914_cast_fp16 = matmul(transpose_x = var_5914_transpose_x_1, transpose_y = var_5914_transpose_y_1, x = attn_weights_327_cast_fp16, y = var_5898_cast_fp16_0)[name = string("op_5914_cast_fp16")]; bool attn_weights_329_transpose_x_0 = const()[name = string("attn_weights_329_transpose_x_0"), val = bool(false)]; bool attn_weights_329_transpose_y_0 = const()[name = string("attn_weights_329_transpose_y_0"), val = bool(false)]; tensor attn_weights_329_cast_fp16 = matmul(transpose_x = attn_weights_329_transpose_x_0, transpose_y = attn_weights_329_transpose_y_0, x = var_5888_cast_fp16_1, y = var_5901_cast_fp16_1)[name = string("attn_weights_329_cast_fp16")]; fp16 _inversed_attn_weights_331_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_331_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_331_cast_fp16 = mul(x = attn_weights_329_cast_fp16, y = _inversed_attn_weights_331_y_0_to_fp16)[name = string("_inversed_attn_weights_331_cast_fp16")]; tensor attn_weights_333_cast_fp16 = add(x = _inversed_attn_weights_331_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_333_cast_fp16")]; int32 var_5920 = const()[name = string("op_5920"), val = int32(2)]; tensor attn_weights_335_cast_fp16 = softmax(axis = var_5920, x = attn_weights_333_cast_fp16)[name = string("attn_weights_335_cast_fp16")]; bool attn_output_121_transpose_x_1 = const()[name = string("attn_output_121_transpose_x_1"), val = bool(true)]; bool attn_output_121_transpose_y_1 = const()[name = string("attn_output_121_transpose_y_1"), val = bool(false)]; tensor attn_output_121_cast_fp16 = matmul(transpose_x = attn_output_121_transpose_x_1, transpose_y = attn_output_121_transpose_y_1, x = attn_weights_335_cast_fp16, y = var_5898_cast_fp16_1)[name = string("attn_output_121_cast_fp16")]; int32 var_5928 = const()[name = string("op_5928"), val = int32(1)]; bool attn_output_123_interleave_0 = const()[name = string("attn_output_123_interleave_0"), val = bool(false)]; tensor attn_output_123_cast_fp16 = concat(axis = var_5928, interleave = attn_output_123_interleave_0, values = (var_5914_cast_fp16, attn_output_121_cast_fp16))[name = string("attn_output_123_cast_fp16")]; tensor var_5932_perm_0 = const()[name = string("op_5932_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_5937 = const()[name = string("op_5937"), val = tensor([1, 1024, 1, 8])]; tensor var_5932_cast_fp16 = transpose(perm = var_5932_perm_0, x = attn_output_123_cast_fp16)[name = string("transpose_9")]; tensor x_365_cast_fp16 = reshape(shape = var_5937, x = var_5932_cast_fp16)[name = string("x_365_cast_fp16")]; string hidden_states_123_pad_type_0 = const()[name = string("hidden_states_123_pad_type_0"), val = string("valid")]; tensor hidden_states_123_strides_0 = const()[name = string("hidden_states_123_strides_0"), val = tensor([1, 1])]; tensor hidden_states_123_pad_0 = const()[name = string("hidden_states_123_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_123_dilations_0 = const()[name = string("hidden_states_123_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_123_groups_0 = const()[name = string("hidden_states_123_groups_0"), val = int32(1)]; tensor var_5944_to_fp16 = const()[name = string("op_5944_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(608890688)))]; tensor hidden_states_123_cast_fp16 = conv(dilations = hidden_states_123_dilations_0, groups = hidden_states_123_groups_0, pad = hidden_states_123_pad_0, pad_type = hidden_states_123_pad_type_0, strides = hidden_states_123_strides_0, weight = var_5944_to_fp16, x = x_365_cast_fp16)[name = string("hidden_states_123_cast_fp16")]; tensor x_367_cast_fp16 = add(x = x_359_cast_fp16, y = hidden_states_123_cast_fp16)[name = string("x_367_cast_fp16")]; int32 var_5956 = const()[name = string("op_5956"), val = int32(1)]; fp16 const_213_promoted_to_fp16 = const()[name = string("const_213_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_5959_cast_fp16 = mul(x = x_367_cast_fp16, y = const_213_promoted_to_fp16)[name = string("op_5959_cast_fp16")]; bool x_369_interleave_0 = const()[name = string("x_369_interleave_0"), val = bool(false)]; tensor x_369_cast_fp16 = concat(axis = var_5956, interleave = x_369_interleave_0, values = (x_367_cast_fp16, var_5959_cast_fp16))[name = string("x_369_cast_fp16")]; tensor out_247_axes_0 = const()[name = string("out_247_axes_0"), val = tensor([1])]; fp16 var_5969_to_fp16 = const()[name = string("op_5969_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_247_cast_fp16 = layer_norm(axes = out_247_axes_0, epsilon = var_5969_to_fp16, x = x_369_cast_fp16)[name = string("out_247_cast_fp16")]; tensor layer_layers_20_post_attention_layernorm_weight_to_fp16 = const()[name = string("layer_layers_20_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(610987904)))]; tensor out_249_cast_fp16 = mul(x = out_247_cast_fp16, y = layer_layers_20_post_attention_layernorm_weight_to_fp16)[name = string("out_249_cast_fp16")]; tensor var_5975_split_sizes_0 = const()[name = string("op_5975_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_5975_axis_0 = const()[name = string("op_5975_axis_0"), val = int32(1)]; tensor var_5975_cast_fp16_0, tensor var_5975_cast_fp16_1 = split(axis = var_5975_axis_0, split_sizes = var_5975_split_sizes_0, x = out_249_cast_fp16)[name = string("op_5975_cast_fp16")]; string input_41_pad_type_0 = const()[name = string("input_41_pad_type_0"), val = string("valid")]; tensor input_41_strides_0 = const()[name = string("input_41_strides_0"), val = tensor([1, 1])]; tensor input_41_pad_0 = const()[name = string("input_41_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_41_dilations_0 = const()[name = string("input_41_dilations_0"), val = tensor([1, 1])]; int32 input_41_groups_0 = const()[name = string("input_41_groups_0"), val = int32(1)]; tensor var_5980_to_fp16 = const()[name = string("op_5980_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(610992064)))]; tensor input_41_cast_fp16 = conv(dilations = input_41_dilations_0, groups = input_41_groups_0, pad = input_41_pad_0, pad_type = input_41_pad_type_0, strides = input_41_strides_0, weight = var_5980_to_fp16, x = var_5975_cast_fp16_0)[name = string("input_41_cast_fp16")]; tensor var_5991_cast_fp16 = silu(x = input_41_cast_fp16)[name = string("op_5991_cast_fp16")]; string var_5996_pad_type_0 = const()[name = string("op_5996_pad_type_0"), val = string("valid")]; tensor var_5996_strides_0 = const()[name = string("op_5996_strides_0"), val = tensor([1, 1])]; tensor var_5996_pad_0 = const()[name = string("op_5996_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_5996_dilations_0 = const()[name = string("op_5996_dilations_0"), val = tensor([1, 1])]; int32 var_5996_groups_0 = const()[name = string("op_5996_groups_0"), val = int32(1)]; tensor var_5979_to_fp16 = const()[name = string("op_5979_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(619380736)))]; tensor var_5996_cast_fp16 = conv(dilations = var_5996_dilations_0, groups = var_5996_groups_0, pad = var_5996_pad_0, pad_type = var_5996_pad_type_0, strides = var_5996_strides_0, weight = var_5979_to_fp16, x = var_5975_cast_fp16_0)[name = string("op_5996_cast_fp16")]; tensor x_375_cast_fp16 = mul(x = var_5991_cast_fp16, y = var_5996_cast_fp16)[name = string("x_375_cast_fp16")]; string hidden_states_125_pad_type_0 = const()[name = string("hidden_states_125_pad_type_0"), val = string("valid")]; tensor hidden_states_125_strides_0 = const()[name = string("hidden_states_125_strides_0"), val = tensor([1, 1])]; tensor hidden_states_125_pad_0 = const()[name = string("hidden_states_125_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_125_dilations_0 = const()[name = string("hidden_states_125_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_125_groups_0 = const()[name = string("hidden_states_125_groups_0"), val = int32(1)]; tensor var_5978_to_fp16 = const()[name = string("op_5978_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(627769408)))]; tensor hidden_states_125_cast_fp16 = conv(dilations = hidden_states_125_dilations_0, groups = hidden_states_125_groups_0, pad = hidden_states_125_pad_0, pad_type = hidden_states_125_pad_type_0, strides = hidden_states_125_strides_0, weight = var_5978_to_fp16, x = x_375_cast_fp16)[name = string("hidden_states_125_cast_fp16")]; tensor x_377_cast_fp16 = add(x = x_367_cast_fp16, y = hidden_states_125_cast_fp16)[name = string("x_377_cast_fp16")]; int32 var_6009 = const()[name = string("op_6009"), val = int32(1)]; fp16 const_214_promoted_to_fp16 = const()[name = string("const_214_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_6012_cast_fp16 = mul(x = x_377_cast_fp16, y = const_214_promoted_to_fp16)[name = string("op_6012_cast_fp16")]; bool x_379_interleave_0 = const()[name = string("x_379_interleave_0"), val = bool(false)]; tensor x_379_cast_fp16 = concat(axis = var_6009, interleave = x_379_interleave_0, values = (x_377_cast_fp16, var_6012_cast_fp16))[name = string("x_379_cast_fp16")]; tensor out_253_axes_0 = const()[name = string("out_253_axes_0"), val = tensor([1])]; fp16 var_6022_to_fp16 = const()[name = string("op_6022_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_253_cast_fp16 = layer_norm(axes = out_253_axes_0, epsilon = var_6022_to_fp16, x = x_379_cast_fp16)[name = string("out_253_cast_fp16")]; tensor layer_layers_21_input_layernorm_weight_to_fp16 = const()[name = string("layer_layers_21_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(636158080)))]; tensor out_255_cast_fp16 = mul(x = out_253_cast_fp16, y = layer_layers_21_input_layernorm_weight_to_fp16)[name = string("out_255_cast_fp16")]; tensor var_6028_split_sizes_0 = const()[name = string("op_6028_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_6028_axis_0 = const()[name = string("op_6028_axis_0"), val = int32(1)]; tensor var_6028_cast_fp16_0, tensor var_6028_cast_fp16_1 = split(axis = var_6028_axis_0, split_sizes = var_6028_split_sizes_0, x = out_255_cast_fp16)[name = string("op_6028_cast_fp16")]; string query_states_85_pad_type_0 = const()[name = string("query_states_85_pad_type_0"), val = string("valid")]; tensor query_states_85_strides_0 = const()[name = string("query_states_85_strides_0"), val = tensor([1, 1])]; tensor query_states_85_pad_0 = const()[name = string("query_states_85_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_states_85_dilations_0 = const()[name = string("query_states_85_dilations_0"), val = tensor([1, 1])]; int32 query_states_85_groups_0 = const()[name = string("query_states_85_groups_0"), val = int32(1)]; tensor var_6050_to_fp16 = const()[name = string("op_6050_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(636162240)))]; tensor query_states_85_cast_fp16 = conv(dilations = query_states_85_dilations_0, groups = query_states_85_groups_0, pad = query_states_85_pad_0, pad_type = query_states_85_pad_type_0, strides = query_states_85_strides_0, weight = var_6050_to_fp16, x = var_6028_cast_fp16_0)[name = string("query_states_85_cast_fp16")]; string key_states_85_pad_type_0 = const()[name = string("key_states_85_pad_type_0"), val = string("valid")]; tensor key_states_85_strides_0 = const()[name = string("key_states_85_strides_0"), val = tensor([1, 1])]; tensor key_states_85_pad_0 = const()[name = string("key_states_85_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_states_85_dilations_0 = const()[name = string("key_states_85_dilations_0"), val = tensor([1, 1])]; int32 key_states_85_groups_0 = const()[name = string("key_states_85_groups_0"), val = int32(1)]; tensor var_6061_to_fp16 = const()[name = string("op_6061_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(638259456)))]; tensor key_states_85_cast_fp16 = conv(dilations = key_states_85_dilations_0, groups = key_states_85_groups_0, pad = key_states_85_pad_0, pad_type = key_states_85_pad_type_0, strides = key_states_85_strides_0, weight = var_6061_to_fp16, x = var_6028_cast_fp16_0)[name = string("key_states_85_cast_fp16")]; string value_states_85_pad_type_0 = const()[name = string("value_states_85_pad_type_0"), val = string("valid")]; tensor value_states_85_strides_0 = const()[name = string("value_states_85_strides_0"), val = tensor([1, 1])]; tensor value_states_85_pad_0 = const()[name = string("value_states_85_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_states_85_dilations_0 = const()[name = string("value_states_85_dilations_0"), val = tensor([1, 1])]; int32 value_states_85_groups_0 = const()[name = string("value_states_85_groups_0"), val = int32(1)]; tensor var_6072_to_fp16 = const()[name = string("op_6072_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(638521664)))]; tensor value_states_85_cast_fp16 = conv(dilations = value_states_85_dilations_0, groups = value_states_85_groups_0, pad = value_states_85_pad_0, pad_type = value_states_85_pad_type_0, strides = value_states_85_strides_0, weight = var_6072_to_fp16, x = var_6028_cast_fp16_0)[name = string("value_states_85_cast_fp16")]; tensor var_6080 = const()[name = string("op_6080"), val = tensor([1, 16, 64, 8])]; tensor embed_85_cast_fp16 = reshape(shape = var_6080, x = query_states_85_cast_fp16)[name = string("embed_85_cast_fp16")]; tensor var_6084 = const()[name = string("op_6084"), val = tensor([1, 2, 64, 8])]; tensor var_6085_cast_fp16 = reshape(shape = var_6084, x = key_states_85_cast_fp16)[name = string("op_6085_cast_fp16")]; tensor embed_87_perm_0 = const()[name = string("embed_87_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_6091 = const()[name = string("op_6091"), val = tensor([1, 2, 64, 8])]; tensor var_6092_cast_fp16 = reshape(shape = var_6091, x = value_states_85_cast_fp16)[name = string("op_6092_cast_fp16")]; tensor value_states_87_perm_0 = const()[name = string("value_states_87_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_6096_cast_fp16 = mul(x = embed_85_cast_fp16, y = cos_1_cast_fp16)[name = string("op_6096_cast_fp16")]; tensor var_6097_split_sizes_0 = const()[name = string("op_6097_split_sizes_0"), val = tensor([32, 32])]; int32 var_6097_axis_0 = const()[name = string("op_6097_axis_0"), val = int32(-2)]; tensor var_6097_cast_fp16_0, tensor var_6097_cast_fp16_1 = split(axis = var_6097_axis_0, split_sizes = var_6097_split_sizes_0, x = embed_85_cast_fp16)[name = string("op_6097_cast_fp16")]; fp16 const_217_promoted_to_fp16 = const()[name = string("const_217_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_6099_cast_fp16 = mul(x = var_6097_cast_fp16_1, y = const_217_promoted_to_fp16)[name = string("op_6099_cast_fp16")]; int32 var_6101 = const()[name = string("op_6101"), val = int32(-2)]; bool var_6102_interleave_0 = const()[name = string("op_6102_interleave_0"), val = bool(false)]; tensor var_6102_cast_fp16 = concat(axis = var_6101, interleave = var_6102_interleave_0, values = (var_6099_cast_fp16, var_6097_cast_fp16_0))[name = string("op_6102_cast_fp16")]; tensor var_6103_cast_fp16 = mul(x = var_6102_cast_fp16, y = sin_1_cast_fp16)[name = string("op_6103_cast_fp16")]; tensor query_states_87_cast_fp16 = add(x = var_6096_cast_fp16, y = var_6103_cast_fp16)[name = string("query_states_87_cast_fp16")]; tensor embed_87_cast_fp16 = transpose(perm = embed_87_perm_0, x = var_6085_cast_fp16)[name = string("transpose_8")]; tensor var_6106_cast_fp16 = mul(x = embed_87_cast_fp16, y = cos_cast_fp16)[name = string("op_6106_cast_fp16")]; tensor var_6107_split_sizes_0 = const()[name = string("op_6107_split_sizes_0"), val = tensor([32, 32])]; int32 var_6107_axis_0 = const()[name = string("op_6107_axis_0"), val = int32(-1)]; tensor var_6107_cast_fp16_0, tensor var_6107_cast_fp16_1 = split(axis = var_6107_axis_0, split_sizes = var_6107_split_sizes_0, x = embed_87_cast_fp16)[name = string("op_6107_cast_fp16")]; fp16 const_218_promoted_to_fp16 = const()[name = string("const_218_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_6109_cast_fp16 = mul(x = var_6107_cast_fp16_1, y = const_218_promoted_to_fp16)[name = string("op_6109_cast_fp16")]; int32 var_6111 = const()[name = string("op_6111"), val = int32(-1)]; bool var_6112_interleave_0 = const()[name = string("op_6112_interleave_0"), val = bool(false)]; tensor var_6112_cast_fp16 = concat(axis = var_6111, interleave = var_6112_interleave_0, values = (var_6109_cast_fp16, var_6107_cast_fp16_0))[name = string("op_6112_cast_fp16")]; tensor var_6113_cast_fp16 = mul(x = var_6112_cast_fp16, y = sin_cast_fp16)[name = string("op_6113_cast_fp16")]; tensor key_states_87_cast_fp16 = add(x = var_6106_cast_fp16, y = var_6113_cast_fp16)[name = string("key_states_87_cast_fp16")]; tensor expand_dims_211 = const()[name = string("expand_dims_211"), val = tensor([21])]; tensor expand_dims_212 = const()[name = string("expand_dims_212"), val = tensor([0])]; tensor expand_dims_214 = const()[name = string("expand_dims_214"), val = tensor([22])]; tensor concat_171_values3_0 = const()[name = string("concat_171_values3_0"), val = tensor([0])]; int32 concat_171_axis_0 = const()[name = string("concat_171_axis_0"), val = int32(0)]; bool concat_171_interleave_0 = const()[name = string("concat_171_interleave_0"), val = bool(false)]; tensor concat_171 = concat(axis = concat_171_axis_0, interleave = concat_171_interleave_0, values = (expand_dims_211, expand_dims_212, position_id, concat_171_values3_0))[name = string("concat_171")]; tensor concat_172_values1_0 = const()[name = string("concat_172_values1_0"), val = tensor([0])]; tensor concat_172_values3_0 = const()[name = string("concat_172_values3_0"), val = tensor([0])]; int32 concat_172_axis_0 = const()[name = string("concat_172_axis_0"), val = int32(0)]; bool concat_172_interleave_0 = const()[name = string("concat_172_interleave_0"), val = bool(false)]; tensor concat_172 = concat(axis = concat_172_axis_0, interleave = concat_172_interleave_0, values = (expand_dims_214, concat_172_values1_0, var_426, concat_172_values3_0))[name = string("concat_172")]; tensor key_cache_internal_tensor_assign_22_stride_0 = const()[name = string("key_cache_internal_tensor_assign_22_stride_0"), val = tensor([1, 1, 1, 1])]; tensor key_cache_internal_tensor_assign_22_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_22_begin_mask_0"), val = tensor([false, false, false, true])]; tensor key_cache_internal_tensor_assign_22_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_22_end_mask_0"), val = tensor([false, true, false, true])]; tensor key_cache_internal_tensor_assign_22_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_22_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor key_cache_internal_tensor_assign_22_cast_fp16 = slice_update(begin = concat_171, begin_mask = key_cache_internal_tensor_assign_22_begin_mask_0, end = concat_172, end_mask = key_cache_internal_tensor_assign_22_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_22_squeeze_mask_0, stride = key_cache_internal_tensor_assign_22_stride_0, update = key_states_87_cast_fp16, x = coreml_update_state_88)[name = string("key_cache_internal_tensor_assign_22_cast_fp16")]; write_state(data = key_cache_internal_tensor_assign_22_cast_fp16, input = key_cache)[name = string("coreml_update_state_256_write_state")]; tensor coreml_update_state_90 = read_state(input = key_cache)[name = string("coreml_update_state_256")]; tensor value_cache_internal_tensor_assign_22_stride_0 = const()[name = string("value_cache_internal_tensor_assign_22_stride_0"), val = tensor([1, 1, 1, 1])]; tensor value_cache_internal_tensor_assign_22_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_22_begin_mask_0"), val = tensor([false, false, false, true])]; tensor value_cache_internal_tensor_assign_22_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_22_end_mask_0"), val = tensor([false, true, false, true])]; tensor value_cache_internal_tensor_assign_22_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_22_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor value_states_87_cast_fp16 = transpose(perm = value_states_87_perm_0, x = var_6092_cast_fp16)[name = string("transpose_7")]; tensor value_cache_internal_tensor_assign_22_cast_fp16 = slice_update(begin = concat_171, begin_mask = value_cache_internal_tensor_assign_22_begin_mask_0, end = concat_172, end_mask = value_cache_internal_tensor_assign_22_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_22_squeeze_mask_0, stride = value_cache_internal_tensor_assign_22_stride_0, update = value_states_87_cast_fp16, x = coreml_update_state_89)[name = string("value_cache_internal_tensor_assign_22_cast_fp16")]; write_state(data = value_cache_internal_tensor_assign_22_cast_fp16, input = value_cache)[name = string("coreml_update_state_257_write_state")]; tensor coreml_update_state_91 = read_state(input = value_cache)[name = string("coreml_update_state_257")]; tensor var_6156_begin_0 = const()[name = string("op_6156_begin_0"), val = tensor([21, 0, 0, 0])]; tensor var_6156_end_0 = const()[name = string("op_6156_end_0"), val = tensor([22, 2, 2048, 64])]; tensor var_6156_end_mask_0 = const()[name = string("op_6156_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_6156_cast_fp16 = slice_by_index(begin = var_6156_begin_0, end = var_6156_end_0, end_mask = var_6156_end_mask_0, x = coreml_update_state_90)[name = string("op_6156_cast_fp16")]; tensor tile_42 = const()[name = string("tile_42"), val = tensor([1, 1])]; int32 var_6159_axis_0 = const()[name = string("op_6159_axis_0"), val = int32(1)]; tensor var_6159_cast_fp16_0, tensor var_6159_cast_fp16_1 = split(axis = var_6159_axis_0, split_sizes = tile_42, x = var_6156_cast_fp16)[name = string("op_6159_cast_fp16")]; tensor var_6166_begin_0 = const()[name = string("op_6166_begin_0"), val = tensor([21, 0, 0, 0])]; tensor var_6166_end_0 = const()[name = string("op_6166_end_0"), val = tensor([22, 2, 2048, 64])]; tensor var_6166_end_mask_0 = const()[name = string("op_6166_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_6166_cast_fp16 = slice_by_index(begin = var_6166_begin_0, end = var_6166_end_0, end_mask = var_6166_end_mask_0, x = coreml_update_state_91)[name = string("op_6166_cast_fp16")]; tensor tile_43 = const()[name = string("tile_43"), val = tensor([1, 1])]; int32 var_6169_axis_0 = const()[name = string("op_6169_axis_0"), val = int32(1)]; tensor var_6169_cast_fp16_0, tensor var_6169_cast_fp16_1 = split(axis = var_6169_axis_0, split_sizes = tile_43, x = var_6166_cast_fp16)[name = string("op_6169_cast_fp16")]; tensor var_6172_split_sizes_0 = const()[name = string("op_6172_split_sizes_0"), val = tensor([8, 8])]; int32 var_6172_axis_0 = const()[name = string("op_6172_axis_0"), val = int32(1)]; tensor var_6172_cast_fp16_0, tensor var_6172_cast_fp16_1 = split(axis = var_6172_axis_0, split_sizes = var_6172_split_sizes_0, x = query_states_87_cast_fp16)[name = string("op_6172_cast_fp16")]; bool attn_weights_337_transpose_x_0 = const()[name = string("attn_weights_337_transpose_x_0"), val = bool(false)]; bool attn_weights_337_transpose_y_0 = const()[name = string("attn_weights_337_transpose_y_0"), val = bool(false)]; tensor attn_weights_337_cast_fp16 = matmul(transpose_x = attn_weights_337_transpose_x_0, transpose_y = attn_weights_337_transpose_y_0, x = var_6159_cast_fp16_0, y = var_6172_cast_fp16_0)[name = string("attn_weights_337_cast_fp16")]; fp16 _inversed_attn_weights_339_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_339_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_339_cast_fp16 = mul(x = attn_weights_337_cast_fp16, y = _inversed_attn_weights_339_y_0_to_fp16)[name = string("_inversed_attn_weights_339_cast_fp16")]; tensor attn_weights_341_cast_fp16 = add(x = _inversed_attn_weights_339_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_341_cast_fp16")]; int32 var_6179 = const()[name = string("op_6179"), val = int32(2)]; tensor attn_weights_343_cast_fp16 = softmax(axis = var_6179, x = attn_weights_341_cast_fp16)[name = string("attn_weights_343_cast_fp16")]; bool var_6185_transpose_x_1 = const()[name = string("op_6185_transpose_x_1"), val = bool(true)]; bool var_6185_transpose_y_1 = const()[name = string("op_6185_transpose_y_1"), val = bool(false)]; tensor var_6185_cast_fp16 = matmul(transpose_x = var_6185_transpose_x_1, transpose_y = var_6185_transpose_y_1, x = attn_weights_343_cast_fp16, y = var_6169_cast_fp16_0)[name = string("op_6185_cast_fp16")]; bool attn_weights_345_transpose_x_0 = const()[name = string("attn_weights_345_transpose_x_0"), val = bool(false)]; bool attn_weights_345_transpose_y_0 = const()[name = string("attn_weights_345_transpose_y_0"), val = bool(false)]; tensor attn_weights_345_cast_fp16 = matmul(transpose_x = attn_weights_345_transpose_x_0, transpose_y = attn_weights_345_transpose_y_0, x = var_6159_cast_fp16_1, y = var_6172_cast_fp16_1)[name = string("attn_weights_345_cast_fp16")]; fp16 _inversed_attn_weights_347_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_347_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_347_cast_fp16 = mul(x = attn_weights_345_cast_fp16, y = _inversed_attn_weights_347_y_0_to_fp16)[name = string("_inversed_attn_weights_347_cast_fp16")]; tensor attn_weights_349_cast_fp16 = add(x = _inversed_attn_weights_347_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_349_cast_fp16")]; int32 var_6191 = const()[name = string("op_6191"), val = int32(2)]; tensor attn_weights_351_cast_fp16 = softmax(axis = var_6191, x = attn_weights_349_cast_fp16)[name = string("attn_weights_351_cast_fp16")]; bool attn_output_127_transpose_x_1 = const()[name = string("attn_output_127_transpose_x_1"), val = bool(true)]; bool attn_output_127_transpose_y_1 = const()[name = string("attn_output_127_transpose_y_1"), val = bool(false)]; tensor attn_output_127_cast_fp16 = matmul(transpose_x = attn_output_127_transpose_x_1, transpose_y = attn_output_127_transpose_y_1, x = attn_weights_351_cast_fp16, y = var_6169_cast_fp16_1)[name = string("attn_output_127_cast_fp16")]; int32 var_6199 = const()[name = string("op_6199"), val = int32(1)]; bool attn_output_129_interleave_0 = const()[name = string("attn_output_129_interleave_0"), val = bool(false)]; tensor attn_output_129_cast_fp16 = concat(axis = var_6199, interleave = attn_output_129_interleave_0, values = (var_6185_cast_fp16, attn_output_127_cast_fp16))[name = string("attn_output_129_cast_fp16")]; tensor var_6203_perm_0 = const()[name = string("op_6203_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_6208 = const()[name = string("op_6208"), val = tensor([1, 1024, 1, 8])]; tensor var_6203_cast_fp16 = transpose(perm = var_6203_perm_0, x = attn_output_129_cast_fp16)[name = string("transpose_6")]; tensor x_383_cast_fp16 = reshape(shape = var_6208, x = var_6203_cast_fp16)[name = string("x_383_cast_fp16")]; string hidden_states_129_pad_type_0 = const()[name = string("hidden_states_129_pad_type_0"), val = string("valid")]; tensor hidden_states_129_strides_0 = const()[name = string("hidden_states_129_strides_0"), val = tensor([1, 1])]; tensor hidden_states_129_pad_0 = const()[name = string("hidden_states_129_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_129_dilations_0 = const()[name = string("hidden_states_129_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_129_groups_0 = const()[name = string("hidden_states_129_groups_0"), val = int32(1)]; tensor var_6215_to_fp16 = const()[name = string("op_6215_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(638783872)))]; tensor hidden_states_129_cast_fp16 = conv(dilations = hidden_states_129_dilations_0, groups = hidden_states_129_groups_0, pad = hidden_states_129_pad_0, pad_type = hidden_states_129_pad_type_0, strides = hidden_states_129_strides_0, weight = var_6215_to_fp16, x = x_383_cast_fp16)[name = string("hidden_states_129_cast_fp16")]; tensor x_385_cast_fp16 = add(x = x_377_cast_fp16, y = hidden_states_129_cast_fp16)[name = string("x_385_cast_fp16")]; int32 var_6227 = const()[name = string("op_6227"), val = int32(1)]; fp16 const_223_promoted_to_fp16 = const()[name = string("const_223_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_6230_cast_fp16 = mul(x = x_385_cast_fp16, y = const_223_promoted_to_fp16)[name = string("op_6230_cast_fp16")]; bool x_387_interleave_0 = const()[name = string("x_387_interleave_0"), val = bool(false)]; tensor x_387_cast_fp16 = concat(axis = var_6227, interleave = x_387_interleave_0, values = (x_385_cast_fp16, var_6230_cast_fp16))[name = string("x_387_cast_fp16")]; tensor out_259_axes_0 = const()[name = string("out_259_axes_0"), val = tensor([1])]; fp16 var_6240_to_fp16 = const()[name = string("op_6240_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_259_cast_fp16 = layer_norm(axes = out_259_axes_0, epsilon = var_6240_to_fp16, x = x_387_cast_fp16)[name = string("out_259_cast_fp16")]; tensor layer_layers_21_post_attention_layernorm_weight_to_fp16 = const()[name = string("layer_layers_21_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(640881088)))]; tensor out_261_cast_fp16 = mul(x = out_259_cast_fp16, y = layer_layers_21_post_attention_layernorm_weight_to_fp16)[name = string("out_261_cast_fp16")]; tensor var_6246_split_sizes_0 = const()[name = string("op_6246_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_6246_axis_0 = const()[name = string("op_6246_axis_0"), val = int32(1)]; tensor var_6246_cast_fp16_0, tensor var_6246_cast_fp16_1 = split(axis = var_6246_axis_0, split_sizes = var_6246_split_sizes_0, x = out_261_cast_fp16)[name = string("op_6246_cast_fp16")]; string input_43_pad_type_0 = const()[name = string("input_43_pad_type_0"), val = string("valid")]; tensor input_43_strides_0 = const()[name = string("input_43_strides_0"), val = tensor([1, 1])]; tensor input_43_pad_0 = const()[name = string("input_43_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_43_dilations_0 = const()[name = string("input_43_dilations_0"), val = tensor([1, 1])]; int32 input_43_groups_0 = const()[name = string("input_43_groups_0"), val = int32(1)]; tensor var_6251_to_fp16 = const()[name = string("op_6251_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(640885248)))]; tensor input_43_cast_fp16 = conv(dilations = input_43_dilations_0, groups = input_43_groups_0, pad = input_43_pad_0, pad_type = input_43_pad_type_0, strides = input_43_strides_0, weight = var_6251_to_fp16, x = var_6246_cast_fp16_0)[name = string("input_43_cast_fp16")]; tensor var_6262_cast_fp16 = silu(x = input_43_cast_fp16)[name = string("op_6262_cast_fp16")]; string var_6267_pad_type_0 = const()[name = string("op_6267_pad_type_0"), val = string("valid")]; tensor var_6267_strides_0 = const()[name = string("op_6267_strides_0"), val = tensor([1, 1])]; tensor var_6267_pad_0 = const()[name = string("op_6267_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_6267_dilations_0 = const()[name = string("op_6267_dilations_0"), val = tensor([1, 1])]; int32 var_6267_groups_0 = const()[name = string("op_6267_groups_0"), val = int32(1)]; tensor var_6250_to_fp16 = const()[name = string("op_6250_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(649273920)))]; tensor var_6267_cast_fp16 = conv(dilations = var_6267_dilations_0, groups = var_6267_groups_0, pad = var_6267_pad_0, pad_type = var_6267_pad_type_0, strides = var_6267_strides_0, weight = var_6250_to_fp16, x = var_6246_cast_fp16_0)[name = string("op_6267_cast_fp16")]; tensor x_393_cast_fp16 = mul(x = var_6262_cast_fp16, y = var_6267_cast_fp16)[name = string("x_393_cast_fp16")]; string hidden_states_131_pad_type_0 = const()[name = string("hidden_states_131_pad_type_0"), val = string("valid")]; tensor hidden_states_131_strides_0 = const()[name = string("hidden_states_131_strides_0"), val = tensor([1, 1])]; tensor hidden_states_131_pad_0 = const()[name = string("hidden_states_131_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_131_dilations_0 = const()[name = string("hidden_states_131_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_131_groups_0 = const()[name = string("hidden_states_131_groups_0"), val = int32(1)]; tensor var_6249_to_fp16 = const()[name = string("op_6249_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(657662592)))]; tensor hidden_states_131_cast_fp16 = conv(dilations = hidden_states_131_dilations_0, groups = hidden_states_131_groups_0, pad = hidden_states_131_pad_0, pad_type = hidden_states_131_pad_type_0, strides = hidden_states_131_strides_0, weight = var_6249_to_fp16, x = x_393_cast_fp16)[name = string("hidden_states_131_cast_fp16")]; tensor x_395_cast_fp16 = add(x = x_385_cast_fp16, y = hidden_states_131_cast_fp16)[name = string("x_395_cast_fp16")]; int32 var_6280 = const()[name = string("op_6280"), val = int32(1)]; fp16 const_224_promoted_to_fp16 = const()[name = string("const_224_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_6283_cast_fp16 = mul(x = x_395_cast_fp16, y = const_224_promoted_to_fp16)[name = string("op_6283_cast_fp16")]; bool x_397_interleave_0 = const()[name = string("x_397_interleave_0"), val = bool(false)]; tensor x_397_cast_fp16 = concat(axis = var_6280, interleave = x_397_interleave_0, values = (x_395_cast_fp16, var_6283_cast_fp16))[name = string("x_397_cast_fp16")]; tensor out_265_axes_0 = const()[name = string("out_265_axes_0"), val = tensor([1])]; fp16 var_6293_to_fp16 = const()[name = string("op_6293_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_265_cast_fp16 = layer_norm(axes = out_265_axes_0, epsilon = var_6293_to_fp16, x = x_397_cast_fp16)[name = string("out_265_cast_fp16")]; tensor layer_layers_22_input_layernorm_weight_to_fp16 = const()[name = string("layer_layers_22_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(666051264)))]; tensor out_267_cast_fp16 = mul(x = out_265_cast_fp16, y = layer_layers_22_input_layernorm_weight_to_fp16)[name = string("out_267_cast_fp16")]; tensor var_6299_split_sizes_0 = const()[name = string("op_6299_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_6299_axis_0 = const()[name = string("op_6299_axis_0"), val = int32(1)]; tensor var_6299_cast_fp16_0, tensor var_6299_cast_fp16_1 = split(axis = var_6299_axis_0, split_sizes = var_6299_split_sizes_0, x = out_267_cast_fp16)[name = string("op_6299_cast_fp16")]; string query_states_89_pad_type_0 = const()[name = string("query_states_89_pad_type_0"), val = string("valid")]; tensor query_states_89_strides_0 = const()[name = string("query_states_89_strides_0"), val = tensor([1, 1])]; tensor query_states_89_pad_0 = const()[name = string("query_states_89_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_states_89_dilations_0 = const()[name = string("query_states_89_dilations_0"), val = tensor([1, 1])]; int32 query_states_89_groups_0 = const()[name = string("query_states_89_groups_0"), val = int32(1)]; tensor var_6321_to_fp16 = const()[name = string("op_6321_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(666055424)))]; tensor query_states_89_cast_fp16 = conv(dilations = query_states_89_dilations_0, groups = query_states_89_groups_0, pad = query_states_89_pad_0, pad_type = query_states_89_pad_type_0, strides = query_states_89_strides_0, weight = var_6321_to_fp16, x = var_6299_cast_fp16_0)[name = string("query_states_89_cast_fp16")]; string key_states_89_pad_type_0 = const()[name = string("key_states_89_pad_type_0"), val = string("valid")]; tensor key_states_89_strides_0 = const()[name = string("key_states_89_strides_0"), val = tensor([1, 1])]; tensor key_states_89_pad_0 = const()[name = string("key_states_89_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_states_89_dilations_0 = const()[name = string("key_states_89_dilations_0"), val = tensor([1, 1])]; int32 key_states_89_groups_0 = const()[name = string("key_states_89_groups_0"), val = int32(1)]; tensor var_6332_to_fp16 = const()[name = string("op_6332_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(668152640)))]; tensor key_states_89_cast_fp16 = conv(dilations = key_states_89_dilations_0, groups = key_states_89_groups_0, pad = key_states_89_pad_0, pad_type = key_states_89_pad_type_0, strides = key_states_89_strides_0, weight = var_6332_to_fp16, x = var_6299_cast_fp16_0)[name = string("key_states_89_cast_fp16")]; string value_states_89_pad_type_0 = const()[name = string("value_states_89_pad_type_0"), val = string("valid")]; tensor value_states_89_strides_0 = const()[name = string("value_states_89_strides_0"), val = tensor([1, 1])]; tensor value_states_89_pad_0 = const()[name = string("value_states_89_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_states_89_dilations_0 = const()[name = string("value_states_89_dilations_0"), val = tensor([1, 1])]; int32 value_states_89_groups_0 = const()[name = string("value_states_89_groups_0"), val = int32(1)]; tensor var_6343_to_fp16 = const()[name = string("op_6343_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(668414848)))]; tensor value_states_89_cast_fp16 = conv(dilations = value_states_89_dilations_0, groups = value_states_89_groups_0, pad = value_states_89_pad_0, pad_type = value_states_89_pad_type_0, strides = value_states_89_strides_0, weight = var_6343_to_fp16, x = var_6299_cast_fp16_0)[name = string("value_states_89_cast_fp16")]; tensor var_6351 = const()[name = string("op_6351"), val = tensor([1, 16, 64, 8])]; tensor embed_89_cast_fp16 = reshape(shape = var_6351, x = query_states_89_cast_fp16)[name = string("embed_89_cast_fp16")]; tensor var_6355 = const()[name = string("op_6355"), val = tensor([1, 2, 64, 8])]; tensor var_6356_cast_fp16 = reshape(shape = var_6355, x = key_states_89_cast_fp16)[name = string("op_6356_cast_fp16")]; tensor embed_91_perm_0 = const()[name = string("embed_91_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_6362 = const()[name = string("op_6362"), val = tensor([1, 2, 64, 8])]; tensor var_6363_cast_fp16 = reshape(shape = var_6362, x = value_states_89_cast_fp16)[name = string("op_6363_cast_fp16")]; tensor value_states_91_perm_0 = const()[name = string("value_states_91_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_6367_cast_fp16 = mul(x = embed_89_cast_fp16, y = cos_1_cast_fp16)[name = string("op_6367_cast_fp16")]; tensor var_6368_split_sizes_0 = const()[name = string("op_6368_split_sizes_0"), val = tensor([32, 32])]; int32 var_6368_axis_0 = const()[name = string("op_6368_axis_0"), val = int32(-2)]; tensor var_6368_cast_fp16_0, tensor var_6368_cast_fp16_1 = split(axis = var_6368_axis_0, split_sizes = var_6368_split_sizes_0, x = embed_89_cast_fp16)[name = string("op_6368_cast_fp16")]; fp16 const_227_promoted_to_fp16 = const()[name = string("const_227_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_6370_cast_fp16 = mul(x = var_6368_cast_fp16_1, y = const_227_promoted_to_fp16)[name = string("op_6370_cast_fp16")]; int32 var_6372 = const()[name = string("op_6372"), val = int32(-2)]; bool var_6373_interleave_0 = const()[name = string("op_6373_interleave_0"), val = bool(false)]; tensor var_6373_cast_fp16 = concat(axis = var_6372, interleave = var_6373_interleave_0, values = (var_6370_cast_fp16, var_6368_cast_fp16_0))[name = string("op_6373_cast_fp16")]; tensor var_6374_cast_fp16 = mul(x = var_6373_cast_fp16, y = sin_1_cast_fp16)[name = string("op_6374_cast_fp16")]; tensor query_states_91_cast_fp16 = add(x = var_6367_cast_fp16, y = var_6374_cast_fp16)[name = string("query_states_91_cast_fp16")]; tensor embed_91_cast_fp16 = transpose(perm = embed_91_perm_0, x = var_6356_cast_fp16)[name = string("transpose_5")]; tensor var_6377_cast_fp16 = mul(x = embed_91_cast_fp16, y = cos_cast_fp16)[name = string("op_6377_cast_fp16")]; tensor var_6378_split_sizes_0 = const()[name = string("op_6378_split_sizes_0"), val = tensor([32, 32])]; int32 var_6378_axis_0 = const()[name = string("op_6378_axis_0"), val = int32(-1)]; tensor var_6378_cast_fp16_0, tensor var_6378_cast_fp16_1 = split(axis = var_6378_axis_0, split_sizes = var_6378_split_sizes_0, x = embed_91_cast_fp16)[name = string("op_6378_cast_fp16")]; fp16 const_228_promoted_to_fp16 = const()[name = string("const_228_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_6380_cast_fp16 = mul(x = var_6378_cast_fp16_1, y = const_228_promoted_to_fp16)[name = string("op_6380_cast_fp16")]; int32 var_6382 = const()[name = string("op_6382"), val = int32(-1)]; bool var_6383_interleave_0 = const()[name = string("op_6383_interleave_0"), val = bool(false)]; tensor var_6383_cast_fp16 = concat(axis = var_6382, interleave = var_6383_interleave_0, values = (var_6380_cast_fp16, var_6378_cast_fp16_0))[name = string("op_6383_cast_fp16")]; tensor var_6384_cast_fp16 = mul(x = var_6383_cast_fp16, y = sin_cast_fp16)[name = string("op_6384_cast_fp16")]; tensor key_states_91_cast_fp16 = add(x = var_6377_cast_fp16, y = var_6384_cast_fp16)[name = string("key_states_91_cast_fp16")]; tensor expand_dims_221 = const()[name = string("expand_dims_221"), val = tensor([22])]; tensor expand_dims_222 = const()[name = string("expand_dims_222"), val = tensor([0])]; tensor expand_dims_224 = const()[name = string("expand_dims_224"), val = tensor([23])]; tensor concat_179_values3_0 = const()[name = string("concat_179_values3_0"), val = tensor([0])]; int32 concat_179_axis_0 = const()[name = string("concat_179_axis_0"), val = int32(0)]; bool concat_179_interleave_0 = const()[name = string("concat_179_interleave_0"), val = bool(false)]; tensor concat_179 = concat(axis = concat_179_axis_0, interleave = concat_179_interleave_0, values = (expand_dims_221, expand_dims_222, position_id, concat_179_values3_0))[name = string("concat_179")]; tensor concat_180_values1_0 = const()[name = string("concat_180_values1_0"), val = tensor([0])]; tensor concat_180_values3_0 = const()[name = string("concat_180_values3_0"), val = tensor([0])]; int32 concat_180_axis_0 = const()[name = string("concat_180_axis_0"), val = int32(0)]; bool concat_180_interleave_0 = const()[name = string("concat_180_interleave_0"), val = bool(false)]; tensor concat_180 = concat(axis = concat_180_axis_0, interleave = concat_180_interleave_0, values = (expand_dims_224, concat_180_values1_0, var_426, concat_180_values3_0))[name = string("concat_180")]; tensor key_cache_internal_tensor_assign_23_stride_0 = const()[name = string("key_cache_internal_tensor_assign_23_stride_0"), val = tensor([1, 1, 1, 1])]; tensor key_cache_internal_tensor_assign_23_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_23_begin_mask_0"), val = tensor([false, false, false, true])]; tensor key_cache_internal_tensor_assign_23_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_23_end_mask_0"), val = tensor([false, true, false, true])]; tensor key_cache_internal_tensor_assign_23_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_23_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor key_cache_internal_tensor_assign_23_cast_fp16 = slice_update(begin = concat_179, begin_mask = key_cache_internal_tensor_assign_23_begin_mask_0, end = concat_180, end_mask = key_cache_internal_tensor_assign_23_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_23_squeeze_mask_0, stride = key_cache_internal_tensor_assign_23_stride_0, update = key_states_91_cast_fp16, x = coreml_update_state_90)[name = string("key_cache_internal_tensor_assign_23_cast_fp16")]; write_state(data = key_cache_internal_tensor_assign_23_cast_fp16, input = key_cache)[name = string("coreml_update_state_258_write_state")]; tensor coreml_update_state_92 = read_state(input = key_cache)[name = string("coreml_update_state_258")]; tensor value_cache_internal_tensor_assign_23_stride_0 = const()[name = string("value_cache_internal_tensor_assign_23_stride_0"), val = tensor([1, 1, 1, 1])]; tensor value_cache_internal_tensor_assign_23_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_23_begin_mask_0"), val = tensor([false, false, false, true])]; tensor value_cache_internal_tensor_assign_23_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_23_end_mask_0"), val = tensor([false, true, false, true])]; tensor value_cache_internal_tensor_assign_23_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_23_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor value_states_91_cast_fp16 = transpose(perm = value_states_91_perm_0, x = var_6363_cast_fp16)[name = string("transpose_4")]; tensor value_cache_internal_tensor_assign_23_cast_fp16 = slice_update(begin = concat_179, begin_mask = value_cache_internal_tensor_assign_23_begin_mask_0, end = concat_180, end_mask = value_cache_internal_tensor_assign_23_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_23_squeeze_mask_0, stride = value_cache_internal_tensor_assign_23_stride_0, update = value_states_91_cast_fp16, x = coreml_update_state_91)[name = string("value_cache_internal_tensor_assign_23_cast_fp16")]; write_state(data = value_cache_internal_tensor_assign_23_cast_fp16, input = value_cache)[name = string("coreml_update_state_259_write_state")]; tensor coreml_update_state_93 = read_state(input = value_cache)[name = string("coreml_update_state_259")]; tensor var_6427_begin_0 = const()[name = string("op_6427_begin_0"), val = tensor([22, 0, 0, 0])]; tensor var_6427_end_0 = const()[name = string("op_6427_end_0"), val = tensor([23, 2, 2048, 64])]; tensor var_6427_end_mask_0 = const()[name = string("op_6427_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_6427_cast_fp16 = slice_by_index(begin = var_6427_begin_0, end = var_6427_end_0, end_mask = var_6427_end_mask_0, x = coreml_update_state_92)[name = string("op_6427_cast_fp16")]; tensor tile_44 = const()[name = string("tile_44"), val = tensor([1, 1])]; int32 var_6430_axis_0 = const()[name = string("op_6430_axis_0"), val = int32(1)]; tensor var_6430_cast_fp16_0, tensor var_6430_cast_fp16_1 = split(axis = var_6430_axis_0, split_sizes = tile_44, x = var_6427_cast_fp16)[name = string("op_6430_cast_fp16")]; tensor var_6437_begin_0 = const()[name = string("op_6437_begin_0"), val = tensor([22, 0, 0, 0])]; tensor var_6437_end_0 = const()[name = string("op_6437_end_0"), val = tensor([23, 2, 2048, 64])]; tensor var_6437_end_mask_0 = const()[name = string("op_6437_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_6437_cast_fp16 = slice_by_index(begin = var_6437_begin_0, end = var_6437_end_0, end_mask = var_6437_end_mask_0, x = coreml_update_state_93)[name = string("op_6437_cast_fp16")]; tensor tile_45 = const()[name = string("tile_45"), val = tensor([1, 1])]; int32 var_6440_axis_0 = const()[name = string("op_6440_axis_0"), val = int32(1)]; tensor var_6440_cast_fp16_0, tensor var_6440_cast_fp16_1 = split(axis = var_6440_axis_0, split_sizes = tile_45, x = var_6437_cast_fp16)[name = string("op_6440_cast_fp16")]; tensor var_6443_split_sizes_0 = const()[name = string("op_6443_split_sizes_0"), val = tensor([8, 8])]; int32 var_6443_axis_0 = const()[name = string("op_6443_axis_0"), val = int32(1)]; tensor var_6443_cast_fp16_0, tensor var_6443_cast_fp16_1 = split(axis = var_6443_axis_0, split_sizes = var_6443_split_sizes_0, x = query_states_91_cast_fp16)[name = string("op_6443_cast_fp16")]; bool attn_weights_353_transpose_x_0 = const()[name = string("attn_weights_353_transpose_x_0"), val = bool(false)]; bool attn_weights_353_transpose_y_0 = const()[name = string("attn_weights_353_transpose_y_0"), val = bool(false)]; tensor attn_weights_353_cast_fp16 = matmul(transpose_x = attn_weights_353_transpose_x_0, transpose_y = attn_weights_353_transpose_y_0, x = var_6430_cast_fp16_0, y = var_6443_cast_fp16_0)[name = string("attn_weights_353_cast_fp16")]; fp16 _inversed_attn_weights_355_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_355_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_355_cast_fp16 = mul(x = attn_weights_353_cast_fp16, y = _inversed_attn_weights_355_y_0_to_fp16)[name = string("_inversed_attn_weights_355_cast_fp16")]; tensor attn_weights_357_cast_fp16 = add(x = _inversed_attn_weights_355_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_357_cast_fp16")]; int32 var_6450 = const()[name = string("op_6450"), val = int32(2)]; tensor attn_weights_359_cast_fp16 = softmax(axis = var_6450, x = attn_weights_357_cast_fp16)[name = string("attn_weights_359_cast_fp16")]; bool var_6456_transpose_x_1 = const()[name = string("op_6456_transpose_x_1"), val = bool(true)]; bool var_6456_transpose_y_1 = const()[name = string("op_6456_transpose_y_1"), val = bool(false)]; tensor var_6456_cast_fp16 = matmul(transpose_x = var_6456_transpose_x_1, transpose_y = var_6456_transpose_y_1, x = attn_weights_359_cast_fp16, y = var_6440_cast_fp16_0)[name = string("op_6456_cast_fp16")]; bool attn_weights_361_transpose_x_0 = const()[name = string("attn_weights_361_transpose_x_0"), val = bool(false)]; bool attn_weights_361_transpose_y_0 = const()[name = string("attn_weights_361_transpose_y_0"), val = bool(false)]; tensor attn_weights_361_cast_fp16 = matmul(transpose_x = attn_weights_361_transpose_x_0, transpose_y = attn_weights_361_transpose_y_0, x = var_6430_cast_fp16_1, y = var_6443_cast_fp16_1)[name = string("attn_weights_361_cast_fp16")]; fp16 _inversed_attn_weights_363_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_363_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_363_cast_fp16 = mul(x = attn_weights_361_cast_fp16, y = _inversed_attn_weights_363_y_0_to_fp16)[name = string("_inversed_attn_weights_363_cast_fp16")]; tensor attn_weights_365_cast_fp16 = add(x = _inversed_attn_weights_363_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_365_cast_fp16")]; int32 var_6462 = const()[name = string("op_6462"), val = int32(2)]; tensor attn_weights_367_cast_fp16 = softmax(axis = var_6462, x = attn_weights_365_cast_fp16)[name = string("attn_weights_367_cast_fp16")]; bool attn_output_133_transpose_x_1 = const()[name = string("attn_output_133_transpose_x_1"), val = bool(true)]; bool attn_output_133_transpose_y_1 = const()[name = string("attn_output_133_transpose_y_1"), val = bool(false)]; tensor attn_output_133_cast_fp16 = matmul(transpose_x = attn_output_133_transpose_x_1, transpose_y = attn_output_133_transpose_y_1, x = attn_weights_367_cast_fp16, y = var_6440_cast_fp16_1)[name = string("attn_output_133_cast_fp16")]; int32 var_6470 = const()[name = string("op_6470"), val = int32(1)]; bool attn_output_135_interleave_0 = const()[name = string("attn_output_135_interleave_0"), val = bool(false)]; tensor attn_output_135_cast_fp16 = concat(axis = var_6470, interleave = attn_output_135_interleave_0, values = (var_6456_cast_fp16, attn_output_133_cast_fp16))[name = string("attn_output_135_cast_fp16")]; tensor var_6474_perm_0 = const()[name = string("op_6474_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_6479 = const()[name = string("op_6479"), val = tensor([1, 1024, 1, 8])]; tensor var_6474_cast_fp16 = transpose(perm = var_6474_perm_0, x = attn_output_135_cast_fp16)[name = string("transpose_3")]; tensor x_401_cast_fp16 = reshape(shape = var_6479, x = var_6474_cast_fp16)[name = string("x_401_cast_fp16")]; string hidden_states_135_pad_type_0 = const()[name = string("hidden_states_135_pad_type_0"), val = string("valid")]; tensor hidden_states_135_strides_0 = const()[name = string("hidden_states_135_strides_0"), val = tensor([1, 1])]; tensor hidden_states_135_pad_0 = const()[name = string("hidden_states_135_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_135_dilations_0 = const()[name = string("hidden_states_135_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_135_groups_0 = const()[name = string("hidden_states_135_groups_0"), val = int32(1)]; tensor var_6486_to_fp16 = const()[name = string("op_6486_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(668677056)))]; tensor hidden_states_135_cast_fp16 = conv(dilations = hidden_states_135_dilations_0, groups = hidden_states_135_groups_0, pad = hidden_states_135_pad_0, pad_type = hidden_states_135_pad_type_0, strides = hidden_states_135_strides_0, weight = var_6486_to_fp16, x = x_401_cast_fp16)[name = string("hidden_states_135_cast_fp16")]; tensor x_403_cast_fp16 = add(x = x_395_cast_fp16, y = hidden_states_135_cast_fp16)[name = string("x_403_cast_fp16")]; int32 var_6498 = const()[name = string("op_6498"), val = int32(1)]; fp16 const_233_promoted_to_fp16 = const()[name = string("const_233_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_6501_cast_fp16 = mul(x = x_403_cast_fp16, y = const_233_promoted_to_fp16)[name = string("op_6501_cast_fp16")]; bool x_405_interleave_0 = const()[name = string("x_405_interleave_0"), val = bool(false)]; tensor x_405_cast_fp16 = concat(axis = var_6498, interleave = x_405_interleave_0, values = (x_403_cast_fp16, var_6501_cast_fp16))[name = string("x_405_cast_fp16")]; tensor out_271_axes_0 = const()[name = string("out_271_axes_0"), val = tensor([1])]; fp16 var_6511_to_fp16 = const()[name = string("op_6511_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_271_cast_fp16 = layer_norm(axes = out_271_axes_0, epsilon = var_6511_to_fp16, x = x_405_cast_fp16)[name = string("out_271_cast_fp16")]; tensor layer_layers_22_post_attention_layernorm_weight_to_fp16 = const()[name = string("layer_layers_22_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(670774272)))]; tensor out_273_cast_fp16 = mul(x = out_271_cast_fp16, y = layer_layers_22_post_attention_layernorm_weight_to_fp16)[name = string("out_273_cast_fp16")]; tensor var_6517_split_sizes_0 = const()[name = string("op_6517_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_6517_axis_0 = const()[name = string("op_6517_axis_0"), val = int32(1)]; tensor var_6517_cast_fp16_0, tensor var_6517_cast_fp16_1 = split(axis = var_6517_axis_0, split_sizes = var_6517_split_sizes_0, x = out_273_cast_fp16)[name = string("op_6517_cast_fp16")]; string input_45_pad_type_0 = const()[name = string("input_45_pad_type_0"), val = string("valid")]; tensor input_45_strides_0 = const()[name = string("input_45_strides_0"), val = tensor([1, 1])]; tensor input_45_pad_0 = const()[name = string("input_45_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_45_dilations_0 = const()[name = string("input_45_dilations_0"), val = tensor([1, 1])]; int32 input_45_groups_0 = const()[name = string("input_45_groups_0"), val = int32(1)]; tensor var_6522_to_fp16 = const()[name = string("op_6522_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(670778432)))]; tensor input_45_cast_fp16 = conv(dilations = input_45_dilations_0, groups = input_45_groups_0, pad = input_45_pad_0, pad_type = input_45_pad_type_0, strides = input_45_strides_0, weight = var_6522_to_fp16, x = var_6517_cast_fp16_0)[name = string("input_45_cast_fp16")]; tensor var_6533_cast_fp16 = silu(x = input_45_cast_fp16)[name = string("op_6533_cast_fp16")]; string var_6538_pad_type_0 = const()[name = string("op_6538_pad_type_0"), val = string("valid")]; tensor var_6538_strides_0 = const()[name = string("op_6538_strides_0"), val = tensor([1, 1])]; tensor var_6538_pad_0 = const()[name = string("op_6538_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_6538_dilations_0 = const()[name = string("op_6538_dilations_0"), val = tensor([1, 1])]; int32 var_6538_groups_0 = const()[name = string("op_6538_groups_0"), val = int32(1)]; tensor var_6521_to_fp16 = const()[name = string("op_6521_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(679167104)))]; tensor var_6538_cast_fp16 = conv(dilations = var_6538_dilations_0, groups = var_6538_groups_0, pad = var_6538_pad_0, pad_type = var_6538_pad_type_0, strides = var_6538_strides_0, weight = var_6521_to_fp16, x = var_6517_cast_fp16_0)[name = string("op_6538_cast_fp16")]; tensor x_411_cast_fp16 = mul(x = var_6533_cast_fp16, y = var_6538_cast_fp16)[name = string("x_411_cast_fp16")]; string hidden_states_137_pad_type_0 = const()[name = string("hidden_states_137_pad_type_0"), val = string("valid")]; tensor hidden_states_137_strides_0 = const()[name = string("hidden_states_137_strides_0"), val = tensor([1, 1])]; tensor hidden_states_137_pad_0 = const()[name = string("hidden_states_137_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_137_dilations_0 = const()[name = string("hidden_states_137_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_137_groups_0 = const()[name = string("hidden_states_137_groups_0"), val = int32(1)]; tensor var_6520_to_fp16 = const()[name = string("op_6520_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(687555776)))]; tensor hidden_states_137_cast_fp16 = conv(dilations = hidden_states_137_dilations_0, groups = hidden_states_137_groups_0, pad = hidden_states_137_pad_0, pad_type = hidden_states_137_pad_type_0, strides = hidden_states_137_strides_0, weight = var_6520_to_fp16, x = x_411_cast_fp16)[name = string("hidden_states_137_cast_fp16")]; tensor x_413_cast_fp16 = add(x = x_403_cast_fp16, y = hidden_states_137_cast_fp16)[name = string("x_413_cast_fp16")]; int32 var_6551 = const()[name = string("op_6551"), val = int32(1)]; fp16 const_234_promoted_to_fp16 = const()[name = string("const_234_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_6554_cast_fp16 = mul(x = x_413_cast_fp16, y = const_234_promoted_to_fp16)[name = string("op_6554_cast_fp16")]; bool x_415_interleave_0 = const()[name = string("x_415_interleave_0"), val = bool(false)]; tensor x_415_cast_fp16 = concat(axis = var_6551, interleave = x_415_interleave_0, values = (x_413_cast_fp16, var_6554_cast_fp16))[name = string("x_415_cast_fp16")]; tensor out_277_axes_0 = const()[name = string("out_277_axes_0"), val = tensor([1])]; fp16 var_6564_to_fp16 = const()[name = string("op_6564_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_277_cast_fp16 = layer_norm(axes = out_277_axes_0, epsilon = var_6564_to_fp16, x = x_415_cast_fp16)[name = string("out_277_cast_fp16")]; tensor layer_layers_23_input_layernorm_weight_to_fp16 = const()[name = string("layer_layers_23_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(695944448)))]; tensor out_279_cast_fp16 = mul(x = out_277_cast_fp16, y = layer_layers_23_input_layernorm_weight_to_fp16)[name = string("out_279_cast_fp16")]; tensor var_6570_split_sizes_0 = const()[name = string("op_6570_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_6570_axis_0 = const()[name = string("op_6570_axis_0"), val = int32(1)]; tensor var_6570_cast_fp16_0, tensor var_6570_cast_fp16_1 = split(axis = var_6570_axis_0, split_sizes = var_6570_split_sizes_0, x = out_279_cast_fp16)[name = string("op_6570_cast_fp16")]; string query_states_93_pad_type_0 = const()[name = string("query_states_93_pad_type_0"), val = string("valid")]; tensor query_states_93_strides_0 = const()[name = string("query_states_93_strides_0"), val = tensor([1, 1])]; tensor query_states_93_pad_0 = const()[name = string("query_states_93_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_states_93_dilations_0 = const()[name = string("query_states_93_dilations_0"), val = tensor([1, 1])]; int32 query_states_93_groups_0 = const()[name = string("query_states_93_groups_0"), val = int32(1)]; tensor var_6592_to_fp16 = const()[name = string("op_6592_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(695948608)))]; tensor query_states_93_cast_fp16 = conv(dilations = query_states_93_dilations_0, groups = query_states_93_groups_0, pad = query_states_93_pad_0, pad_type = query_states_93_pad_type_0, strides = query_states_93_strides_0, weight = var_6592_to_fp16, x = var_6570_cast_fp16_0)[name = string("query_states_93_cast_fp16")]; string key_states_93_pad_type_0 = const()[name = string("key_states_93_pad_type_0"), val = string("valid")]; tensor key_states_93_strides_0 = const()[name = string("key_states_93_strides_0"), val = tensor([1, 1])]; tensor key_states_93_pad_0 = const()[name = string("key_states_93_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_states_93_dilations_0 = const()[name = string("key_states_93_dilations_0"), val = tensor([1, 1])]; int32 key_states_93_groups_0 = const()[name = string("key_states_93_groups_0"), val = int32(1)]; tensor var_6603_to_fp16 = const()[name = string("op_6603_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(698045824)))]; tensor key_states_93_cast_fp16 = conv(dilations = key_states_93_dilations_0, groups = key_states_93_groups_0, pad = key_states_93_pad_0, pad_type = key_states_93_pad_type_0, strides = key_states_93_strides_0, weight = var_6603_to_fp16, x = var_6570_cast_fp16_0)[name = string("key_states_93_cast_fp16")]; string value_states_93_pad_type_0 = const()[name = string("value_states_93_pad_type_0"), val = string("valid")]; tensor value_states_93_strides_0 = const()[name = string("value_states_93_strides_0"), val = tensor([1, 1])]; tensor value_states_93_pad_0 = const()[name = string("value_states_93_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_states_93_dilations_0 = const()[name = string("value_states_93_dilations_0"), val = tensor([1, 1])]; int32 value_states_93_groups_0 = const()[name = string("value_states_93_groups_0"), val = int32(1)]; tensor var_6614_to_fp16 = const()[name = string("op_6614_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(698308032)))]; tensor value_states_93_cast_fp16 = conv(dilations = value_states_93_dilations_0, groups = value_states_93_groups_0, pad = value_states_93_pad_0, pad_type = value_states_93_pad_type_0, strides = value_states_93_strides_0, weight = var_6614_to_fp16, x = var_6570_cast_fp16_0)[name = string("value_states_93_cast_fp16")]; tensor var_6622 = const()[name = string("op_6622"), val = tensor([1, 16, 64, 8])]; tensor embed_93_cast_fp16 = reshape(shape = var_6622, x = query_states_93_cast_fp16)[name = string("embed_93_cast_fp16")]; tensor var_6626 = const()[name = string("op_6626"), val = tensor([1, 2, 64, 8])]; tensor var_6627_cast_fp16 = reshape(shape = var_6626, x = key_states_93_cast_fp16)[name = string("op_6627_cast_fp16")]; tensor embed_perm_0 = const()[name = string("embed_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_6633 = const()[name = string("op_6633"), val = tensor([1, 2, 64, 8])]; tensor var_6634_cast_fp16 = reshape(shape = var_6633, x = value_states_93_cast_fp16)[name = string("op_6634_cast_fp16")]; tensor value_states_perm_0 = const()[name = string("value_states_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_6638_cast_fp16 = mul(x = embed_93_cast_fp16, y = cos_1_cast_fp16)[name = string("op_6638_cast_fp16")]; tensor var_6639_split_sizes_0 = const()[name = string("op_6639_split_sizes_0"), val = tensor([32, 32])]; int32 var_6639_axis_0 = const()[name = string("op_6639_axis_0"), val = int32(-2)]; tensor var_6639_cast_fp16_0, tensor var_6639_cast_fp16_1 = split(axis = var_6639_axis_0, split_sizes = var_6639_split_sizes_0, x = embed_93_cast_fp16)[name = string("op_6639_cast_fp16")]; fp16 const_237_promoted_to_fp16 = const()[name = string("const_237_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_6641_cast_fp16 = mul(x = var_6639_cast_fp16_1, y = const_237_promoted_to_fp16)[name = string("op_6641_cast_fp16")]; int32 var_6643 = const()[name = string("op_6643"), val = int32(-2)]; bool var_6644_interleave_0 = const()[name = string("op_6644_interleave_0"), val = bool(false)]; tensor var_6644_cast_fp16 = concat(axis = var_6643, interleave = var_6644_interleave_0, values = (var_6641_cast_fp16, var_6639_cast_fp16_0))[name = string("op_6644_cast_fp16")]; tensor var_6645_cast_fp16 = mul(x = var_6644_cast_fp16, y = sin_1_cast_fp16)[name = string("op_6645_cast_fp16")]; tensor query_states_cast_fp16 = add(x = var_6638_cast_fp16, y = var_6645_cast_fp16)[name = string("query_states_cast_fp16")]; tensor embed_cast_fp16 = transpose(perm = embed_perm_0, x = var_6627_cast_fp16)[name = string("transpose_2")]; tensor var_6648_cast_fp16 = mul(x = embed_cast_fp16, y = cos_cast_fp16)[name = string("op_6648_cast_fp16")]; tensor var_6649_split_sizes_0 = const()[name = string("op_6649_split_sizes_0"), val = tensor([32, 32])]; int32 var_6649_axis_0 = const()[name = string("op_6649_axis_0"), val = int32(-1)]; tensor var_6649_cast_fp16_0, tensor var_6649_cast_fp16_1 = split(axis = var_6649_axis_0, split_sizes = var_6649_split_sizes_0, x = embed_cast_fp16)[name = string("op_6649_cast_fp16")]; fp16 const_238_promoted_to_fp16 = const()[name = string("const_238_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_6651_cast_fp16 = mul(x = var_6649_cast_fp16_1, y = const_238_promoted_to_fp16)[name = string("op_6651_cast_fp16")]; int32 var_6653 = const()[name = string("op_6653"), val = int32(-1)]; bool var_6654_interleave_0 = const()[name = string("op_6654_interleave_0"), val = bool(false)]; tensor var_6654_cast_fp16 = concat(axis = var_6653, interleave = var_6654_interleave_0, values = (var_6651_cast_fp16, var_6649_cast_fp16_0))[name = string("op_6654_cast_fp16")]; tensor var_6655_cast_fp16 = mul(x = var_6654_cast_fp16, y = sin_cast_fp16)[name = string("op_6655_cast_fp16")]; tensor key_states_cast_fp16 = add(x = var_6648_cast_fp16, y = var_6655_cast_fp16)[name = string("key_states_cast_fp16")]; tensor expand_dims_231 = const()[name = string("expand_dims_231"), val = tensor([23])]; tensor expand_dims_232 = const()[name = string("expand_dims_232"), val = tensor([0])]; tensor expand_dims_234 = const()[name = string("expand_dims_234"), val = tensor([24])]; tensor concat_187_values3_0 = const()[name = string("concat_187_values3_0"), val = tensor([0])]; int32 concat_187_axis_0 = const()[name = string("concat_187_axis_0"), val = int32(0)]; bool concat_187_interleave_0 = const()[name = string("concat_187_interleave_0"), val = bool(false)]; tensor concat_187 = concat(axis = concat_187_axis_0, interleave = concat_187_interleave_0, values = (expand_dims_231, expand_dims_232, position_id, concat_187_values3_0))[name = string("concat_187")]; tensor concat_188_values1_0 = const()[name = string("concat_188_values1_0"), val = tensor([0])]; tensor concat_188_values3_0 = const()[name = string("concat_188_values3_0"), val = tensor([0])]; int32 concat_188_axis_0 = const()[name = string("concat_188_axis_0"), val = int32(0)]; bool concat_188_interleave_0 = const()[name = string("concat_188_interleave_0"), val = bool(false)]; tensor concat_188 = concat(axis = concat_188_axis_0, interleave = concat_188_interleave_0, values = (expand_dims_234, concat_188_values1_0, var_426, concat_188_values3_0))[name = string("concat_188")]; tensor key_cache_internal_tensor_assign_24_stride_0 = const()[name = string("key_cache_internal_tensor_assign_24_stride_0"), val = tensor([1, 1, 1, 1])]; tensor key_cache_internal_tensor_assign_24_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_24_begin_mask_0"), val = tensor([false, false, false, true])]; tensor key_cache_internal_tensor_assign_24_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_24_end_mask_0"), val = tensor([false, true, false, true])]; tensor key_cache_internal_tensor_assign_24_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_24_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor key_cache_internal_tensor_assign_24_cast_fp16 = slice_update(begin = concat_187, begin_mask = key_cache_internal_tensor_assign_24_begin_mask_0, end = concat_188, end_mask = key_cache_internal_tensor_assign_24_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_24_squeeze_mask_0, stride = key_cache_internal_tensor_assign_24_stride_0, update = key_states_cast_fp16, x = coreml_update_state_92)[name = string("key_cache_internal_tensor_assign_24_cast_fp16")]; write_state(data = key_cache_internal_tensor_assign_24_cast_fp16, input = key_cache)[name = string("coreml_update_state_260_write_state")]; tensor coreml_update_state_94 = read_state(input = key_cache)[name = string("coreml_update_state_260")]; tensor value_cache_internal_tensor_assign_24_stride_0 = const()[name = string("value_cache_internal_tensor_assign_24_stride_0"), val = tensor([1, 1, 1, 1])]; tensor value_cache_internal_tensor_assign_24_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_24_begin_mask_0"), val = tensor([false, false, false, true])]; tensor value_cache_internal_tensor_assign_24_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_24_end_mask_0"), val = tensor([false, true, false, true])]; tensor value_cache_internal_tensor_assign_24_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_24_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor value_states_cast_fp16 = transpose(perm = value_states_perm_0, x = var_6634_cast_fp16)[name = string("transpose_1")]; tensor value_cache_internal_tensor_assign_24_cast_fp16 = slice_update(begin = concat_187, begin_mask = value_cache_internal_tensor_assign_24_begin_mask_0, end = concat_188, end_mask = value_cache_internal_tensor_assign_24_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_24_squeeze_mask_0, stride = value_cache_internal_tensor_assign_24_stride_0, update = value_states_cast_fp16, x = coreml_update_state_93)[name = string("value_cache_internal_tensor_assign_24_cast_fp16")]; write_state(data = value_cache_internal_tensor_assign_24_cast_fp16, input = value_cache)[name = string("coreml_update_state_261_write_state")]; tensor coreml_update_state_95 = read_state(input = value_cache)[name = string("coreml_update_state_261")]; tensor var_6698_begin_0 = const()[name = string("op_6698_begin_0"), val = tensor([23, 0, 0, 0])]; tensor var_6698_end_0 = const()[name = string("op_6698_end_0"), val = tensor([1, 2, 2048, 64])]; tensor var_6698_end_mask_0 = const()[name = string("op_6698_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_6698_cast_fp16 = slice_by_index(begin = var_6698_begin_0, end = var_6698_end_0, end_mask = var_6698_end_mask_0, x = coreml_update_state_94)[name = string("op_6698_cast_fp16")]; tensor tile_46 = const()[name = string("tile_46"), val = tensor([1, 1])]; int32 var_6701_axis_0 = const()[name = string("op_6701_axis_0"), val = int32(1)]; tensor var_6701_cast_fp16_0, tensor var_6701_cast_fp16_1 = split(axis = var_6701_axis_0, split_sizes = tile_46, x = var_6698_cast_fp16)[name = string("op_6701_cast_fp16")]; tensor var_6708_begin_0 = const()[name = string("op_6708_begin_0"), val = tensor([23, 0, 0, 0])]; tensor var_6708_end_0 = const()[name = string("op_6708_end_0"), val = tensor([1, 2, 2048, 64])]; tensor var_6708_end_mask_0 = const()[name = string("op_6708_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_6708_cast_fp16 = slice_by_index(begin = var_6708_begin_0, end = var_6708_end_0, end_mask = var_6708_end_mask_0, x = coreml_update_state_95)[name = string("op_6708_cast_fp16")]; tensor tile_47 = const()[name = string("tile_47"), val = tensor([1, 1])]; int32 var_6711_axis_0 = const()[name = string("op_6711_axis_0"), val = int32(1)]; tensor var_6711_cast_fp16_0, tensor var_6711_cast_fp16_1 = split(axis = var_6711_axis_0, split_sizes = tile_47, x = var_6708_cast_fp16)[name = string("op_6711_cast_fp16")]; tensor var_6714_split_sizes_0 = const()[name = string("op_6714_split_sizes_0"), val = tensor([8, 8])]; int32 var_6714_axis_0 = const()[name = string("op_6714_axis_0"), val = int32(1)]; tensor var_6714_cast_fp16_0, tensor var_6714_cast_fp16_1 = split(axis = var_6714_axis_0, split_sizes = var_6714_split_sizes_0, x = query_states_cast_fp16)[name = string("op_6714_cast_fp16")]; bool attn_weights_369_transpose_x_0 = const()[name = string("attn_weights_369_transpose_x_0"), val = bool(false)]; bool attn_weights_369_transpose_y_0 = const()[name = string("attn_weights_369_transpose_y_0"), val = bool(false)]; tensor attn_weights_369_cast_fp16 = matmul(transpose_x = attn_weights_369_transpose_x_0, transpose_y = attn_weights_369_transpose_y_0, x = var_6701_cast_fp16_0, y = var_6714_cast_fp16_0)[name = string("attn_weights_369_cast_fp16")]; fp16 _inversed_attn_weights_371_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_371_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_371_cast_fp16 = mul(x = attn_weights_369_cast_fp16, y = _inversed_attn_weights_371_y_0_to_fp16)[name = string("_inversed_attn_weights_371_cast_fp16")]; tensor attn_weights_373_cast_fp16 = add(x = _inversed_attn_weights_371_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_373_cast_fp16")]; int32 var_6721 = const()[name = string("op_6721"), val = int32(2)]; tensor attn_weights_375_cast_fp16 = softmax(axis = var_6721, x = attn_weights_373_cast_fp16)[name = string("attn_weights_375_cast_fp16")]; bool var_6727_transpose_x_1 = const()[name = string("op_6727_transpose_x_1"), val = bool(true)]; bool var_6727_transpose_y_1 = const()[name = string("op_6727_transpose_y_1"), val = bool(false)]; tensor var_6727_cast_fp16 = matmul(transpose_x = var_6727_transpose_x_1, transpose_y = var_6727_transpose_y_1, x = attn_weights_375_cast_fp16, y = var_6711_cast_fp16_0)[name = string("op_6727_cast_fp16")]; bool attn_weights_377_transpose_x_0 = const()[name = string("attn_weights_377_transpose_x_0"), val = bool(false)]; bool attn_weights_377_transpose_y_0 = const()[name = string("attn_weights_377_transpose_y_0"), val = bool(false)]; tensor attn_weights_377_cast_fp16 = matmul(transpose_x = attn_weights_377_transpose_x_0, transpose_y = attn_weights_377_transpose_y_0, x = var_6701_cast_fp16_1, y = var_6714_cast_fp16_1)[name = string("attn_weights_377_cast_fp16")]; fp16 _inversed_attn_weights_379_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_379_y_0_to_fp16"), val = fp16(0x1p-3)]; tensor _inversed_attn_weights_379_cast_fp16 = mul(x = attn_weights_377_cast_fp16, y = _inversed_attn_weights_379_y_0_to_fp16)[name = string("_inversed_attn_weights_379_cast_fp16")]; tensor attn_weights_381_cast_fp16 = add(x = _inversed_attn_weights_379_cast_fp16, y = attn_mask_cast_fp16)[name = string("attn_weights_381_cast_fp16")]; int32 var_6733 = const()[name = string("op_6733"), val = int32(2)]; tensor attn_weights_cast_fp16 = softmax(axis = var_6733, x = attn_weights_381_cast_fp16)[name = string("attn_weights_cast_fp16")]; bool attn_output_139_transpose_x_1 = const()[name = string("attn_output_139_transpose_x_1"), val = bool(true)]; bool attn_output_139_transpose_y_1 = const()[name = string("attn_output_139_transpose_y_1"), val = bool(false)]; tensor attn_output_139_cast_fp16 = matmul(transpose_x = attn_output_139_transpose_x_1, transpose_y = attn_output_139_transpose_y_1, x = attn_weights_cast_fp16, y = var_6711_cast_fp16_1)[name = string("attn_output_139_cast_fp16")]; int32 var_6741 = const()[name = string("op_6741"), val = int32(1)]; bool attn_output_141_interleave_0 = const()[name = string("attn_output_141_interleave_0"), val = bool(false)]; tensor attn_output_141_cast_fp16 = concat(axis = var_6741, interleave = attn_output_141_interleave_0, values = (var_6727_cast_fp16, attn_output_139_cast_fp16))[name = string("attn_output_141_cast_fp16")]; tensor var_6745_perm_0 = const()[name = string("op_6745_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_6750 = const()[name = string("op_6750"), val = tensor([1, 1024, 1, 8])]; tensor var_6745_cast_fp16 = transpose(perm = var_6745_perm_0, x = attn_output_141_cast_fp16)[name = string("transpose_0")]; tensor x_419_cast_fp16 = reshape(shape = var_6750, x = var_6745_cast_fp16)[name = string("x_419_cast_fp16")]; string hidden_states_141_pad_type_0 = const()[name = string("hidden_states_141_pad_type_0"), val = string("valid")]; tensor hidden_states_141_strides_0 = const()[name = string("hidden_states_141_strides_0"), val = tensor([1, 1])]; tensor hidden_states_141_pad_0 = const()[name = string("hidden_states_141_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_141_dilations_0 = const()[name = string("hidden_states_141_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_141_groups_0 = const()[name = string("hidden_states_141_groups_0"), val = int32(1)]; tensor var_6757_to_fp16 = const()[name = string("op_6757_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(698570240)))]; tensor hidden_states_141_cast_fp16 = conv(dilations = hidden_states_141_dilations_0, groups = hidden_states_141_groups_0, pad = hidden_states_141_pad_0, pad_type = hidden_states_141_pad_type_0, strides = hidden_states_141_strides_0, weight = var_6757_to_fp16, x = x_419_cast_fp16)[name = string("hidden_states_141_cast_fp16")]; tensor x_421_cast_fp16 = add(x = x_413_cast_fp16, y = hidden_states_141_cast_fp16)[name = string("x_421_cast_fp16")]; int32 var_6769 = const()[name = string("op_6769"), val = int32(1)]; fp16 const_243_promoted_to_fp16 = const()[name = string("const_243_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_6772_cast_fp16 = mul(x = x_421_cast_fp16, y = const_243_promoted_to_fp16)[name = string("op_6772_cast_fp16")]; bool x_423_interleave_0 = const()[name = string("x_423_interleave_0"), val = bool(false)]; tensor x_423_cast_fp16 = concat(axis = var_6769, interleave = x_423_interleave_0, values = (x_421_cast_fp16, var_6772_cast_fp16))[name = string("x_423_cast_fp16")]; tensor out_283_axes_0 = const()[name = string("out_283_axes_0"), val = tensor([1])]; fp16 var_6782_to_fp16 = const()[name = string("op_6782_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_283_cast_fp16 = layer_norm(axes = out_283_axes_0, epsilon = var_6782_to_fp16, x = x_423_cast_fp16)[name = string("out_283_cast_fp16")]; tensor layer_layers_23_post_attention_layernorm_weight_to_fp16 = const()[name = string("layer_layers_23_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(700667456)))]; tensor out_285_cast_fp16 = mul(x = out_283_cast_fp16, y = layer_layers_23_post_attention_layernorm_weight_to_fp16)[name = string("out_285_cast_fp16")]; tensor var_6788_split_sizes_0 = const()[name = string("op_6788_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_6788_axis_0 = const()[name = string("op_6788_axis_0"), val = int32(1)]; tensor var_6788_cast_fp16_0, tensor var_6788_cast_fp16_1 = split(axis = var_6788_axis_0, split_sizes = var_6788_split_sizes_0, x = out_285_cast_fp16)[name = string("op_6788_cast_fp16")]; string input_pad_type_0 = const()[name = string("input_pad_type_0"), val = string("valid")]; tensor input_strides_0 = const()[name = string("input_strides_0"), val = tensor([1, 1])]; tensor input_pad_0 = const()[name = string("input_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_dilations_0 = const()[name = string("input_dilations_0"), val = tensor([1, 1])]; int32 input_groups_0 = const()[name = string("input_groups_0"), val = int32(1)]; tensor var_6793_to_fp16 = const()[name = string("op_6793_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(700671616)))]; tensor input_cast_fp16 = conv(dilations = input_dilations_0, groups = input_groups_0, pad = input_pad_0, pad_type = input_pad_type_0, strides = input_strides_0, weight = var_6793_to_fp16, x = var_6788_cast_fp16_0)[name = string("input_cast_fp16")]; tensor var_6804_cast_fp16 = silu(x = input_cast_fp16)[name = string("op_6804_cast_fp16")]; string var_6809_pad_type_0 = const()[name = string("op_6809_pad_type_0"), val = string("valid")]; tensor var_6809_strides_0 = const()[name = string("op_6809_strides_0"), val = tensor([1, 1])]; tensor var_6809_pad_0 = const()[name = string("op_6809_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_6809_dilations_0 = const()[name = string("op_6809_dilations_0"), val = tensor([1, 1])]; int32 var_6809_groups_0 = const()[name = string("op_6809_groups_0"), val = int32(1)]; tensor var_6792_to_fp16 = const()[name = string("op_6792_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(709060288)))]; tensor var_6809_cast_fp16 = conv(dilations = var_6809_dilations_0, groups = var_6809_groups_0, pad = var_6809_pad_0, pad_type = var_6809_pad_type_0, strides = var_6809_strides_0, weight = var_6792_to_fp16, x = var_6788_cast_fp16_0)[name = string("op_6809_cast_fp16")]; tensor x_429_cast_fp16 = mul(x = var_6804_cast_fp16, y = var_6809_cast_fp16)[name = string("x_429_cast_fp16")]; string hidden_states_pad_type_0 = const()[name = string("hidden_states_pad_type_0"), val = string("valid")]; tensor hidden_states_strides_0 = const()[name = string("hidden_states_strides_0"), val = tensor([1, 1])]; tensor hidden_states_pad_0 = const()[name = string("hidden_states_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_dilations_0 = const()[name = string("hidden_states_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_groups_0 = const()[name = string("hidden_states_groups_0"), val = int32(1)]; tensor var_6791_to_fp16 = const()[name = string("op_6791_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(717448960)))]; tensor hidden_states_cast_fp16 = conv(dilations = hidden_states_dilations_0, groups = hidden_states_groups_0, pad = hidden_states_pad_0, pad_type = hidden_states_pad_type_0, strides = hidden_states_strides_0, weight = var_6791_to_fp16, x = x_429_cast_fp16)[name = string("hidden_states_cast_fp16")]; tensor x_431_cast_fp16 = add(x = x_421_cast_fp16, y = hidden_states_cast_fp16)[name = string("x_431_cast_fp16")]; int32 var_6822 = const()[name = string("op_6822"), val = int32(1)]; fp16 const_244_promoted_to_fp16 = const()[name = string("const_244_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_6825_cast_fp16 = mul(x = x_431_cast_fp16, y = const_244_promoted_to_fp16)[name = string("op_6825_cast_fp16")]; bool x_433_interleave_0 = const()[name = string("x_433_interleave_0"), val = bool(false)]; tensor x_433_cast_fp16 = concat(axis = var_6822, interleave = x_433_interleave_0, values = (x_431_cast_fp16, var_6825_cast_fp16))[name = string("x_433_cast_fp16")]; tensor out_289_axes_0 = const()[name = string("out_289_axes_0"), val = tensor([1])]; fp16 var_6835_to_fp16 = const()[name = string("op_6835_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_289_cast_fp16 = layer_norm(axes = out_289_axes_0, epsilon = var_6835_to_fp16, x = x_433_cast_fp16)[name = string("out_289_cast_fp16")]; tensor layer_norm_weight_to_fp16 = const()[name = string("layer_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(725837632)))]; tensor out_291_cast_fp16 = mul(x = out_289_cast_fp16, y = layer_norm_weight_to_fp16)[name = string("out_291_cast_fp16")]; tensor var_6841_split_sizes_0 = const()[name = string("op_6841_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_6841_axis_0 = const()[name = string("op_6841_axis_0"), val = int32(1)]; tensor output, tensor var_6841_cast_fp16_1 = split(axis = var_6841_axis_0, split_sizes = var_6841_split_sizes_0, x = out_291_cast_fp16)[name = string("op_6841_cast_fp16")]; } -> (output); }