Avoid duplicate input kwargs in `_decode`
#28
by
HwwwH
- opened
- modeling_minicpmo.py +6 -0
modeling_minicpmo.py
CHANGED
|
@@ -636,6 +636,8 @@ class MiniCPMO(MiniCPMOPreTrainedModel):
|
|
| 636 |
return self.llm(input_ids=None, position_ids=position_ids, inputs_embeds=vllm_embedding, **kwargs)
|
| 637 |
|
| 638 |
def _decode(self, inputs_embeds, tokenizer, attention_mask, **kwargs):
|
|
|
|
|
|
|
| 639 |
terminators = [tokenizer.convert_tokens_to_ids(i) for i in self.terminators]
|
| 640 |
outputs = self.llm.generate(
|
| 641 |
inputs_embeds=inputs_embeds,
|
|
@@ -777,6 +779,7 @@ class MiniCPMO(MiniCPMOPreTrainedModel):
|
|
| 777 |
tokenizer=None,
|
| 778 |
vision_hidden_states=None,
|
| 779 |
stream=False,
|
|
|
|
| 780 |
**kwargs,
|
| 781 |
):
|
| 782 |
assert input_ids is not None
|
|
@@ -815,6 +818,9 @@ class MiniCPMO(MiniCPMOPreTrainedModel):
|
|
| 815 |
|
| 816 |
result = self._decode_text(outputs.sequences, tokenizer)
|
| 817 |
|
|
|
|
|
|
|
|
|
|
| 818 |
return result, outputs
|
| 819 |
|
| 820 |
def chat(
|
|
|
|
| 636 |
return self.llm(input_ids=None, position_ids=position_ids, inputs_embeds=vllm_embedding, **kwargs)
|
| 637 |
|
| 638 |
def _decode(self, inputs_embeds, tokenizer, attention_mask, **kwargs):
|
| 639 |
+
kwargs.pop("output_hidden_states", None)
|
| 640 |
+
kwargs.pop("return_dict_in_generate", None)
|
| 641 |
terminators = [tokenizer.convert_tokens_to_ids(i) for i in self.terminators]
|
| 642 |
outputs = self.llm.generate(
|
| 643 |
inputs_embeds=inputs_embeds,
|
|
|
|
| 779 |
tokenizer=None,
|
| 780 |
vision_hidden_states=None,
|
| 781 |
stream=False,
|
| 782 |
+
decode_text=True,
|
| 783 |
**kwargs,
|
| 784 |
):
|
| 785 |
assert input_ids is not None
|
|
|
|
| 818 |
|
| 819 |
result = self._decode_text(outputs.sequences, tokenizer)
|
| 820 |
|
| 821 |
+
if decode_text is False:
|
| 822 |
+
return outputs
|
| 823 |
+
|
| 824 |
return result, outputs
|
| 825 |
|
| 826 |
def chat(
|