Good effort
#1
by
PsiPi
- opened
tokenizer.chat_template made me lol ;)
yeah its the standard qwen2 one
just made it use the normal qwen2.5 config since the official repo misses that part and i didnt want to reinvent the wheel when 90% of the model is qwen2 arch anyways π
Thats the class in the conversion script i used btw:
@ModelBase
.register("VibeVoiceForConditionalGeneration")
class VibeVoiceModel(TextModel):
model_arch = gguf.MODEL_ARCH.QWEN2 # Or define a custom arch if needed: gguf.MODEL_ARCH.VIBEVOICE = 1000; gguf.MODEL_ARCH_NAMES[1000] = "vibevoice"; then use that
def __init__(self, *args, **kwargs):
dir_model = args[0]
is_mistral_format = kwargs.get('is_mistral_format', False) # Assuming default False
hparams = ModelBase.load_hparams(dir_model, is_mistral_format)
hparams = {**hparams, **hparams["decoder_config"]}
kwargs["hparams"] = hparams
super().__init__(*args, **kwargs)
self.acoustic_config = self.hparams.get("acoustic_tokenizer_config", {})
self.semantic_config = self.hparams.get("semantic_tokenizer_config", {})
self.diffusion_config = self.hparams.get("diffusion_head_config", {})
def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
if name.startswith("model.language_model."):
name = name.replace("model.language_model.", "model.")
return [(self.map_tensor_name(name), data_torch)]
return [(name, data_torch)]
def set_gguf_parameters(self):
super().set_gguf_parameters()
self.gguf_writer.add_uint32("vibevoice.acoustic_vae_dim", self.hparams.get("acoustic_vae_dim", 0))
self.gguf_writer.add_uint32("vibevoice.semantic_vae_dim", self.hparams.get("semantic_vae_dim", 0))
self.gguf_writer.add_string("vibevoice.acoustic_tokenizer_config", json.dumps(self.acoustic_config))
self.gguf_writer.add_string("vibevoice.semantic_tokenizer_config", json.dumps(self.semantic_config))
self.gguf_writer.add_string("vibevoice.diffusion_head_config", json.dumps(self.diffusion_config))
I would 100% have been tempted to put you are "VibeVoice" just for giggles ;)
funny
PsiPi
changed discussion status to
closed
Γ¬ rushed it a bit so if you have anything that can be added/changed have no hesitation telling me π