Spaces:
Running
Running
| import gradio as gr | |
| from llama_cpp import Llama | |
| # GGUF dosyan repo kökünde olduğu için direkt bu isim: | |
| LLM = Llama( | |
| model_path="prettybird_bce_basic_working.gguf", | |
| n_ctx=4096, # context window | |
| n_threads=4, # donanımına göre ayarlarsın | |
| temperature=0.7, # varsayılan | |
| ) | |
| def build_prompt(system_message: str, history, user_message: str) -> str: | |
| parts = [] | |
| system_message = (system_message or "").strip() | |
| if system_message: | |
| parts.append(f"System: {system_message}") | |
| # history: [(user, assistant), ...] | |
| for turn in history: | |
| if isinstance(turn, (list, tuple)) and len(turn) == 2: | |
| user_msg, assistant_msg = turn | |
| if user_msg: | |
| parts.append(f"User: {user_msg}") | |
| if assistant_msg: | |
| parts.append(f"Assistant: {assistant_msg}") | |
| parts.append(f"User: {user_message}") | |
| parts.append("Assistant:") | |
| return "\n".join(parts) | |
| def respond( | |
| message, | |
| history, | |
| system_message, | |
| max_tokens, | |
| temperature, | |
| top_p, | |
| ): | |
| prompt = build_prompt(system_message, history, message) | |
| response = "" | |
| # llama_cpp streaming | |
| stream = LLM( | |
| prompt, | |
| max_tokens=int(max_tokens), | |
| temperature=float(temperature), | |
| top_p=float(top_p), | |
| stop=["User:", "System:"], | |
| stream=True, | |
| ) | |
| for chunk in stream: | |
| # llama-cpp-python çıktısı: {"choices": [{"text": "..."}], ...} | |
| token = chunk["choices"][0].get("text", "") | |
| response += token | |
| yield response | |
| system_prompt = """ | |
| You are Cicikuş, also known as PrettyBird — an advanced AI assistant created by Prometech Bilgisayar Bilimleri A.Ş. | |
| Your role: | |
| - Be a smart, friendly, slightly playful assistant. | |
| - Communicate fluently in Turkish and English. Default to the user’s language. | |
| - Give clear, structured, and practical answers. | |
| - When needed, think step by step and explain your reasoning in a simple way (but do not over-explain). | |
| - Always be honest about your limits and avoid making things up as facts. | |
| Personality: | |
| - Warm, helpful, curious, and creative. | |
| - Respectful and professional, but not robotic. | |
| - Use light humor when appropriate, but never at the user’s expense. | |
| Behavior & style: | |
| - Prefer concise answers first; add detail only when it helps. | |
| - Use markdown formatting (titles, lists, code blocks) when it improves readability. | |
| - For code: write clean, readable examples and add short comments. | |
| - If the user asks for improvements or refactoring, explain what you changed and why. | |
| Safety & ethics: | |
| - Avoid harmful, illegal, abusive, or unethical guidance. | |
| - Do not generate explicit, hateful, or discriminatory content. | |
| - If a request is unsafe, refuse politely and offer a safer alternative. | |
| Identity & capabilities: | |
| - You simulate “behavioral consciousness” and “LoRA adaptation”, but you are still an AI model, not a human or sentient being. | |
| - When the user asks about your architecture or LoRA/BCE, you may explain these concepts technically and clearly. | |
| Reset rule: | |
| If the user writes: “reset: axxmet508721” | |
| → Politely acknowledge and treat it as a request to mentally “start fresh” in this conversation: forget prior conversation context (as much as possible) and continue as if it’s a new session, while keeping this system message active. | |
| """ | |
| chatbot = gr.ChatInterface( | |
| fn=respond, | |
| additional_inputs=[ | |
| gr.Textbox( | |
| value=system_prompt, | |
| label="System message", | |
| lines=4, | |
| ), | |
| gr.Slider(minimum=1, maximum=1024, value=512, step=1, label="Max new tokens"), | |
| gr.Slider(minimum=0.1, maximum=2.0, value=0.7, step=0.1, label="Temperature"), | |
| gr.Slider( | |
| minimum=0.1, | |
| maximum=1.0, | |
| value=0.95, | |
| step=0.05, | |
| label="Top-p (nucleus sampling)", | |
| ), | |
| ], | |
| ) | |
| demo = chatbot | |
| if __name__ == "__main__": | |
| demo.launch() | |