Spaces:
Sleeping
Sleeping
| import os | |
| import gradio as gr | |
| from llama_cpp import Llama | |
| import requests | |
| from tqdm import tqdm | |
| # モデル情報 | |
| MODEL_URL = "https://huggingface.co/mradermacher/Saka-14B-GGUF/resolve/main/Saka-14B.Q4_K_M.gguf" | |
| MODEL_PATH = "models/Saka-14B.Q4_K_M.gguf" | |
| # システムプロンプト(自由に変更してください) | |
| SYSTEM_PROMPT = "あなたは丁寧で知的な日本語AIアシスタントです。ユーザーの質問にわかりやすく答えてください。" | |
| def download_model(url=MODEL_URL, path=MODEL_PATH): | |
| os.makedirs(os.path.dirname(path), exist_ok=True) | |
| if os.path.exists(path): | |
| print("モデルファイルは既に存在します。") | |
| return | |
| print(f"モデルをダウンロード中: {url}") | |
| response = requests.get(url, stream=True) | |
| total = int(response.headers.get('content-length', 0)) | |
| with open(path, 'wb') as file, tqdm( | |
| desc=path, | |
| total=total, | |
| unit='iB', | |
| unit_scale=True, | |
| unit_divisor=1024, | |
| ) as bar: | |
| for data in response.iter_content(chunk_size=1024): | |
| size = file.write(data) | |
| bar.update(size) | |
| print("モデルのダウンロードが完了しました。") | |
| # モデルダウンロード | |
| download_model() | |
| # モデルロード | |
| llm = Llama(model_path=MODEL_PATH) | |
| def build_prompt(messages): | |
| prompt = f"<|system|>\n{SYSTEM_PROMPT}\n" | |
| for msg in messages: | |
| if msg["role"] == "user": | |
| prompt += f"<|user|>\n{msg['content']}\n" | |
| elif msg["role"] == "assistant": | |
| prompt += f"<|assistant|>\n{msg['content']}\n" | |
| prompt += "<|assistant|>\n" | |
| return prompt | |
| def generate_response(messages, temperature, top_p, max_tokens): | |
| prompt = build_prompt(messages) | |
| response = llm.create_completion( | |
| prompt=prompt, | |
| temperature=temperature, | |
| top_p=top_p, | |
| max_tokens=max_tokens, | |
| stop=["<|user|>", "<|system|>", "<|assistant|>"] | |
| ) | |
| return response["choices"][0]["text"].strip() | |
| def chat_interface(user_input, history, temperature, top_p, max_tokens): | |
| if history is None or len(history) == 0: | |
| history = [] | |
| history.append({"role": "user", "content": user_input}) | |
| response = generate_response(history, temperature, top_p, max_tokens) | |
| history.append({"role": "assistant", "content": response}) | |
| chat_display = [] | |
| for msg in history: | |
| role = "ユーザー" if msg["role"] == "user" else "AI" | |
| chat_display.append((role, msg["content"])) | |
| return chat_display, history | |
| with gr.Blocks() as demo: | |
| gr.Markdown("# Saka-14B GGUF 日本語チャット(システムプロンプト+履歴対応)") | |
| chatbot = gr.Chatbot() | |
| user_input = gr.Textbox(placeholder="質問をどうぞ", label="あなたの入力") | |
| temperature = gr.Slider(minimum=0.0, maximum=1.0, value=0.7, step=0.05, label="Temperature(創造性)") | |
| top_p = gr.Slider(minimum=0.1, maximum=1.0, value=0.8, step=0.05, label="Top-p(確率の上位何%から生成するか)") | |
| max_tokens = gr.Slider(minimum=16, maximum=2048, value=512, step=16, label="最大トークン数") | |
| history = gr.State([]) | |
| submit_btn = gr.Button("送信") | |
| submit_btn.click(chat_interface, inputs=[user_input, history, temperature, top_p, max_tokens], outputs=[chatbot, history]) | |
| user_input.submit(chat_interface, inputs=[user_input, history, temperature, top_p, max_tokens], outputs=[chatbot, history]) | |
| demo.launch() |