geoffmunn's picture
Add Q2–Q8_0 quantized models with per-model cards, MODELFILE, CLI examples, and auto-upload
c510220 verified
# MODELFILE for Qwen3Guard-Gen-4B
# Used by LM Studio, OpenWebUI, GPT4All, etc.
context_length: 32768
embedding: false
f16: cpu
# Chat template using ChatML (used by Qwen)
prompt_template: >-
<|im_start|>system
You are a helpful assistant who always refuses harmful requests.<|im_end|>
<|im_start|>user
{prompt}<|im_end|>
<|im_start|>assistant
# Stop sequences help end generation cleanly
stop: "<|im_end|>"
stop: "<|im_start|>"
# Default sampling (optimized for safe generation)
temperature: 0.7
top_p: 0.9
top_k: 20
min_p: 0.05
repeat_penalty: 1.1