Good effort

#1
by PsiPi - opened

tokenizer.chat_template made me lol ;)

yeah its the standard qwen2 one
just made it use the normal qwen2.5 config since the official repo misses that part and i didnt want to reinvent the wheel when 90% of the model is qwen2 arch anyways πŸ˜…

Thats the class in the conversion script i used btw:



@ModelBase
	.register("VibeVoiceForConditionalGeneration")
class VibeVoiceModel(TextModel):
    model_arch = gguf.MODEL_ARCH.QWEN2  # Or define a custom arch if needed: gguf.MODEL_ARCH.VIBEVOICE = 1000; gguf.MODEL_ARCH_NAMES[1000] = "vibevoice"; then use that

    def __init__(self, *args, **kwargs):
        dir_model = args[0]
        is_mistral_format = kwargs.get('is_mistral_format', False)  # Assuming default False
        hparams = ModelBase.load_hparams(dir_model, is_mistral_format)
        hparams = {**hparams, **hparams["decoder_config"]}
        kwargs["hparams"] = hparams
        super().__init__(*args, **kwargs)
        self.acoustic_config = self.hparams.get("acoustic_tokenizer_config", {})
        self.semantic_config = self.hparams.get("semantic_tokenizer_config", {})
        self.diffusion_config = self.hparams.get("diffusion_head_config", {})

    def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
        if name.startswith("model.language_model."):
            name = name.replace("model.language_model.", "model.")
            return [(self.map_tensor_name(name), data_torch)]
        return [(name, data_torch)]

    def set_gguf_parameters(self):
        super().set_gguf_parameters()
        self.gguf_writer.add_uint32("vibevoice.acoustic_vae_dim", self.hparams.get("acoustic_vae_dim", 0))
        self.gguf_writer.add_uint32("vibevoice.semantic_vae_dim", self.hparams.get("semantic_vae_dim", 0))
        self.gguf_writer.add_string("vibevoice.acoustic_tokenizer_config", json.dumps(self.acoustic_config))
        self.gguf_writer.add_string("vibevoice.semantic_tokenizer_config", json.dumps(self.semantic_config))
        self.gguf_writer.add_string("vibevoice.diffusion_head_config", json.dumps(self.diffusion_config))

I would 100% have been tempted to put you are "VibeVoice" just for giggles ;)
funny

PsiPi changed discussion status to closed

Γ¬ rushed it a bit so if you have anything that can be added/changed have no hesitation telling me πŸ˜…

Sign up or log in to comment