numb3r3 commited on
Commit
9cfeff2
·
verified ·
1 Parent(s): 2f894e6

fix: empty passage

Browse files
Files changed (1) hide show
  1. modeling_xlm_roberta.py +2 -2
modeling_xlm_roberta.py CHANGED
@@ -1056,9 +1056,9 @@ def reranker_tokenize_preproc(
1056
  chunk1['input_ids'].append(sep_id)
1057
  chunk1['input_ids'].extend(chunk2['input_ids'])
1058
  chunk1['input_ids'].append(sep_id)
1059
- chunk1['attention_mask'].append(chunk2['attention_mask'][0])
1060
  chunk1['attention_mask'].extend(chunk2['attention_mask'])
1061
- chunk1['attention_mask'].append(chunk2['attention_mask'][-1])
1062
  if 'token_type_ids' in chunk1:
1063
  token_type_ids = [1 for _ in range(len(chunk2['token_type_ids']) + 2)]
1064
  chunk1['token_type_ids'].extend(token_type_ids)
 
1056
  chunk1['input_ids'].append(sep_id)
1057
  chunk1['input_ids'].extend(chunk2['input_ids'])
1058
  chunk1['input_ids'].append(sep_id)
1059
+ chunk1['attention_mask'].append(1)
1060
  chunk1['attention_mask'].extend(chunk2['attention_mask'])
1061
+ chunk1['attention_mask'].append(1)
1062
  if 'token_type_ids' in chunk1:
1063
  token_type_ids = [1 for _ in range(len(chunk2['token_type_ids']) + 2)]
1064
  chunk1['token_type_ids'].extend(token_type_ids)