Sakalti commited on
Commit
8087873
·
verified ·
1 Parent(s): 1a51f1e

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +73 -0
app.py ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import gradio as gr
3
+ from llama_cpp import Llama
4
+ import requests
5
+ from tqdm import tqdm
6
+
7
+ # モデルファイルURL(Mistral-Nemo-Japanese-Instruct-2408-GGUF)
8
+ MODEL_URL = "https://huggingface.co/mradermacher/Saka-14B-GGUF/resolve/main/Saka-14B.Q4_K_M.gguf"
9
+ def download_model(url=MODEL_URL, path=MODEL_PATH):
10
+ os.makedirs(os.path.dirname(path), exist_ok=True)
11
+ if os.path.exists(path):
12
+ print("モデルファイルは既に存在します。")
13
+ return
14
+ print(f"モデルをダウンロード中: {url}")
15
+ response = requests.get(url, stream=True)
16
+ total = int(response.headers.get('content-length', 0))
17
+ with open(path, 'wb') as file, tqdm(
18
+ desc=path,
19
+ total=total,
20
+ unit='iB',
21
+ unit_scale=True,
22
+ unit_divisor=1024,
23
+ ) as bar:
24
+ for data in response.iter_content(chunk_size=1024):
25
+ size = file.write(data)
26
+ bar.update(size)
27
+ print("モデルのダウンロードが完了しました。")
28
+
29
+ # モデルのダウンロード(初回起動時)
30
+ download_model()
31
+
32
+ # モデルロード
33
+ llm = Llama(model_path=MODEL_PATH)
34
+
35
+ def generate_response(prompt, temperature, top_p, max_tokens):
36
+ response = llm.create_completion(
37
+ prompt=prompt,
38
+ temperature=temperature,
39
+ top_p=top_p,
40
+ max_tokens=max_tokens,
41
+ stop=["\n\n"]
42
+ )
43
+ return response.choices[0].text.strip()
44
+
45
+ def chat_interface(user_input, history, temperature, top_p, max_tokens):
46
+ if history is None:
47
+ history = []
48
+ history.append(("ユーザー", user_input))
49
+ prompt = ""
50
+ for speaker, text in history:
51
+ prompt += f"{speaker}: {text}\n"
52
+ prompt += "AI: "
53
+ response = generate_response(prompt, temperature, top_p, max_tokens)
54
+ history.append(("AI", response))
55
+ return history, history
56
+
57
+ with gr.Blocks() as demo:
58
+ gr.Markdown("# Saka GGUF 日本語チャット")
59
+ chatbot = gr.Chatbot()
60
+ user_input = gr.Textbox(placeholder="質問をどうぞ", label="あなたの入力")
61
+
62
+ temperature = gr.Slider(minimum=0.0, maximum=1.0, value=0.7, step=0.05, label="Temperature(創造性)")
63
+ top_p = gr.Slider(minimum=0.1, maximum=1.0, value=0.8, step=0.05, label="Top-p(確率の上位何%から生成するか)")
64
+ max_tokens = gr.Slider(minimum=16, maximum=1024, value=256, step=16, label="最大トークン数")
65
+
66
+ history = gr.State([])
67
+
68
+ submit_btn = gr.Button("送信")
69
+ submit_btn.click(chat_interface, inputs=[user_input, history, temperature, top_p, max_tokens], outputs=[chatbot, history])
70
+
71
+ user_input.submit(chat_interface, inputs=[user_input, history, temperature, top_p, max_tokens], outputs=[chatbot, history])
72
+
73
+ demo.launch()