Spaces:

Sakalti
/

Saka14-chat

Sleeping

App Files Files Community

Sakalti commited on Jul 2

Commit

8087873

verified ·

1 Parent(s): 1a51f1e

Create app.py

Browse files

Files changed (1) hide show

app.py +73 -0

app.py ADDED Viewed

	@@ -0,0 +1,73 @@

+import os
+import gradio as gr
+from llama_cpp import Llama
+import requests
+from tqdm import tqdm
+# モデルファイルURL（Mistral-Nemo-Japanese-Instruct-2408-GGUF）
+MODEL_URL = "https://huggingface.co/mradermacher/Saka-14B-GGUF/resolve/main/Saka-14B.Q4_K_M.gguf"
+def download_model(url=MODEL_URL, path=MODEL_PATH):
+    os.makedirs(os.path.dirname(path), exist_ok=True)
+    if os.path.exists(path):
+        print("モデルファイルは既に存在します。")
+        return
+    print(f"モデルをダウンロード中: {url}")
+    response = requests.get(url, stream=True)
+    total = int(response.headers.get('content-length', 0))
+    with open(path, 'wb') as file, tqdm(
+        desc=path,
+        total=total,
+        unit='iB',
+        unit_scale=True,
+        unit_divisor=1024,
+    ) as bar:
+        for data in response.iter_content(chunk_size=1024):
+            size = file.write(data)
+            bar.update(size)
+    print("モデルのダウンロードが完了しました。")
+# モデルのダウンロード（初回起動時）
+download_model()
+# モデルロード
+llm = Llama(model_path=MODEL_PATH)
+def generate_response(prompt, temperature, top_p, max_tokens):
+    response = llm.create_completion(
+        prompt=prompt,
+        temperature=temperature,
+        top_p=top_p,
+        max_tokens=max_tokens,
+        stop=["\n\n"]
+    )
+    return response.choices[0].text.strip()
+def chat_interface(user_input, history, temperature, top_p, max_tokens):
+    if history is None:
+        history = []
+    history.append(("ユーザー", user_input))
+    prompt = ""
+    for speaker, text in history:
+        prompt += f"{speaker}: {text}\n"
+    prompt += "AI: "
+    response = generate_response(prompt, temperature, top_p, max_tokens)
+    history.append(("AI", response))
+    return history, history
+with gr.Blocks() as demo:
+    gr.Markdown("# Saka GGUF 日本語チャット")
+    chatbot = gr.Chatbot()
+    user_input = gr.Textbox(placeholder="質問をどうぞ", label="あなたの入力")
+    temperature = gr.Slider(minimum=0.0, maximum=1.0, value=0.7, step=0.05, label="Temperature（創造性）")
+    top_p = gr.Slider(minimum=0.1, maximum=1.0, value=0.8, step=0.05, label="Top-p（確率の上位何%から生成するか）")
+    max_tokens = gr.Slider(minimum=16, maximum=1024, value=256, step=16, label="最大トークン数")
+    history = gr.State([])
+    submit_btn = gr.Button("送信")
+    submit_btn.click(chat_interface, inputs=[user_input, history, temperature, top_p, max_tokens], outputs=[chatbot, history])
+    user_input.submit(chat_interface, inputs=[user_input, history, temperature, top_p, max_tokens], outputs=[chatbot, history])
+demo.launch()