from ctransformers import AutoModelForCausalLM import gradio as gr # Carrega CodeLlama 7B Instruct quantizado (GGUF) direto do Hugging Face llm = AutoModelForCausalLM.from_pretrained( "TheBloke/CodeLlama-7B-Instruct-GGUF", model_file="codellama-7b-instruct.Q4_K_M.gguf", # versão leve model_type="llama" ) def chat(message, history): prompt = f"### Instrução:\n{message}\n### Resposta:\n" response = llm(prompt, max_new_tokens=512) return response # Layout igual ao anterior (estilo chat) iface = gr.ChatInterface( fn=chat, title="MEu ChatBot CodeLlama", description="Assistente de Programação rodando no Hugging Face Spaces (CodeLlama-7B Instruct).", theme="soft" ) iface.launch()