Patched LlamaCPP?

by urroxyz - opened Sep 2

Discussion

urroxyz

Sep 2

Hey, is there a patched version of LlamaCPP or PR that can run this?

If not, do you want me to try to make my own?

wsbagnsv1

Owner Sep 2

No there is not, if you want you can try to implement this. This is merely an experimental quant, ive just made it in a rush so the conversion code can be optimized changed (;
I used this code for conversion in the convert_hf_to_gguf.py file:



@ModelBase
	.register("VibeVoiceForConditionalGeneration")
class VibeVoiceModel(TextModel):
    model_arch = gguf.MODEL_ARCH.QWEN2  # Or define a custom arch if needed: gguf.MODEL_ARCH.VIBEVOICE = 1000; gguf.MODEL_ARCH_NAMES[1000] = "vibevoice"; then use that

    def __init__(self, *args, **kwargs):
        dir_model = args[0]
        is_mistral_format = kwargs.get('is_mistral_format', False)  # Assuming default False
        hparams = ModelBase.load_hparams(dir_model, is_mistral_format)
        hparams = {**hparams, **hparams["decoder_config"]}
        kwargs["hparams"] = hparams
        super().__init__(*args, **kwargs)
        self.acoustic_config = self.hparams.get("acoustic_tokenizer_config", {})
        self.semantic_config = self.hparams.get("semantic_tokenizer_config", {})
        self.diffusion_config = self.hparams.get("diffusion_head_config", {})

    def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
        if name.startswith("model.language_model."):
            name = name.replace("model.language_model.", "model.")
            return [(self.map_tensor_name(name), data_torch)]
        return [(name, data_torch)]

    def set_gguf_parameters(self):
        super().set_gguf_parameters()
        self.gguf_writer.add_uint32("vibevoice.acoustic_vae_dim", self.hparams.get("acoustic_vae_dim", 0))
        self.gguf_writer.add_uint32("vibevoice.semantic_vae_dim", self.hparams.get("semantic_vae_dim", 0))
        self.gguf_writer.add_string("vibevoice.acoustic_tokenizer_config", json.dumps(self.acoustic_config))
        self.gguf_writer.add_string("vibevoice.semantic_tokenizer_config", json.dumps(self.semantic_config))
        self.gguf_writer.add_string("vibevoice.diffusion_head_config", json.dumps(self.diffusion_config))

And ive just used the qwen2.5 tokenizer.json lol 😅