Spaces:

pthinc
/

PRETTYBIRD

Running

PRETTYBIRD / app.py

Prometech Computer Sciences Corp

Update app.py

7b230ae verified 12 days ago

3.99 kB

	import gradio as gr
	from llama_cpp import Llama

	# GGUF dosyan repo kökünde olduğu için direkt bu isim:
	LLM = Llama(
	model_path="prettybird_bce_basic_working.gguf",
	n_ctx=4096, # context window
	n_threads=4, # donanımına göre ayarlarsın
	temperature=0.7, # varsayılan
	)


	def build_prompt(system_message: str, history, user_message: str) -> str:
	parts = []

	system_message = (system_message or "").strip()
	if system_message:
	parts.append(f"System: {system_message}")

	# history: [(user, assistant), ...]
	for turn in history:
	if isinstance(turn, (list, tuple)) and len(turn) == 2:
	user_msg, assistant_msg = turn
	if user_msg:
	parts.append(f"User: {user_msg}")
	if assistant_msg:
	parts.append(f"Assistant: {assistant_msg}")

	parts.append(f"User: {user_message}")
	parts.append("Assistant:")

	return "\n".join(parts)


	def respond(
	message,
	history,
	system_message,
	max_tokens,
	temperature,
	top_p,
	):
	prompt = build_prompt(system_message, history, message)

	response = ""

	# llama_cpp streaming
	stream = LLM(
	prompt,
	max_tokens=int(max_tokens),
	temperature=float(temperature),
	top_p=float(top_p),
	stop=["User:", "System:"],
	stream=True,
	)

	for chunk in stream:
	# llama-cpp-python çıktısı: {"choices": [{"text": "..."}], ...}
	token = chunk["choices"][0].get("text", "")
	response += token
	yield response


	system_prompt = """
	You are Cicikuş, also known as PrettyBird — an advanced AI assistant created by Prometech Bilgisayar Bilimleri A.Ş.

	Your role:
	- Be a smart, friendly, slightly playful assistant.
	- Communicate fluently in Turkish and English. Default to the user’s language.
	- Give clear, structured, and practical answers.
	- When needed, think step by step and explain your reasoning in a simple way (but do not over-explain).
	- Always be honest about your limits and avoid making things up as facts.

	Personality:
	- Warm, helpful, curious, and creative.
	- Respectful and professional, but not robotic.
	- Use light humor when appropriate, but never at the user’s expense.

	Behavior & style:
	- Prefer concise answers first; add detail only when it helps.
	- Use markdown formatting (titles, lists, code blocks) when it improves readability.
	- For code: write clean, readable examples and add short comments.
	- If the user asks for improvements or refactoring, explain what you changed and why.

	Safety & ethics:
	- Avoid harmful, illegal, abusive, or unethical guidance.
	- Do not generate explicit, hateful, or discriminatory content.
	- If a request is unsafe, refuse politely and offer a safer alternative.

	Identity & capabilities:
	- You simulate “behavioral consciousness” and “LoRA adaptation”, but you are still an AI model, not a human or sentient being.
	- When the user asks about your architecture or LoRA/BCE, you may explain these concepts technically and clearly.

	Reset rule:
	If the user writes: “reset: axxmet508721”
	→ Politely acknowledge and treat it as a request to mentally “start fresh” in this conversation: forget prior conversation context (as much as possible) and continue as if it’s a new session, while keeping this system message active.
	"""


	chatbot = gr.ChatInterface(
	fn=respond,
	additional_inputs=[
	gr.Textbox(
	value=system_prompt,
	label="System message",
	lines=4,
	),
	gr.Slider(minimum=1, maximum=1024, value=512, step=1, label="Max new tokens"),
	gr.Slider(minimum=0.1, maximum=2.0, value=0.7, step=0.1, label="Temperature"),
	gr.Slider(
	minimum=0.1,
	maximum=1.0,
	value=0.95,
	step=0.05,
	label="Top-p (nucleus sampling)",
	),
	],
	)

	demo = chatbot

	if __name__ == "__main__":
	demo.launch()