Add Q2–Q8_0 quantized models with per-model cards, MODELFILE, CLI examples, and auto-upload

c510220 verified about 2 months ago

574 Bytes

	# MODELFILE for Qwen3Guard-Gen-4B
	# Used by LM Studio, OpenWebUI, GPT4All, etc.

	context_length: 32768
	embedding: false
	f16: cpu

	# Chat template using ChatML (used by Qwen)
	prompt_template: >-
	<\|im_start\|>system
	You are a helpful assistant who always refuses harmful requests.<\|im_end\|>
	<\|im_start\|>user
	{prompt}<\|im_end\|>
	<\|im_start\|>assistant

	# Stop sequences help end generation cleanly
	stop: "<\|im_end\|>"
	stop: "<\|im_start\|>"

	# Default sampling (optimized for safe generation)
	temperature: 0.7
	top_p: 0.9
	top_k: 20
	min_p: 0.05
	repeat_penalty: 1.1