Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -12,12 +12,11 @@ model_path = hf_hub_download(model_name, filename=model_file)
|
|
| 12 |
llm = Llama(
|
| 13 |
model_path=model_path,
|
| 14 |
n_ctx=2048, # Context length to use
|
| 15 |
-
|
| 16 |
# n_gpu_layers=0 # Number of model layers to offload to GPU
|
| 17 |
# chat_format="chatml",
|
| 18 |
verbose=False
|
| 19 |
-
|
| 20 |
-
)
|
| 21 |
|
| 22 |
prompt_template="""<|begin_of_text|>Dibawah ini adalah percakapan antara dokter dengan pasiennya yang ingin berkonsultasi terkait kesehatan. Tuliskan jawaban yang tepat dan lengkap sesuai sesuai pertanyaan dari pasien.<|end_of_text|>
|
| 23 |
|
|
@@ -49,8 +48,8 @@ def output_inference(tanya, history):
|
|
| 49 |
yield temp
|
| 50 |
|
| 51 |
history = ["init", prompt]
|
| 52 |
-
|
| 53 |
-
|
| 54 |
gr.ChatInterface(
|
| 55 |
output_inference,
|
| 56 |
chatbot=gr.Chatbot(height=300),
|
|
@@ -63,4 +62,4 @@ gr.ChatInterface(
|
|
| 63 |
retry_btn=None,
|
| 64 |
undo_btn="Delete Previous",
|
| 65 |
clear_btn="Clear",
|
| 66 |
-
).launch()
|
|
|
|
| 12 |
llm = Llama(
|
| 13 |
model_path=model_path,
|
| 14 |
n_ctx=2048, # Context length to use
|
| 15 |
+
n_threads=4, # Number of CPU threads to use
|
| 16 |
# n_gpu_layers=0 # Number of model layers to offload to GPU
|
| 17 |
# chat_format="chatml",
|
| 18 |
verbose=False
|
| 19 |
+
)
|
|
|
|
| 20 |
|
| 21 |
prompt_template="""<|begin_of_text|>Dibawah ini adalah percakapan antara dokter dengan pasiennya yang ingin berkonsultasi terkait kesehatan. Tuliskan jawaban yang tepat dan lengkap sesuai sesuai pertanyaan dari pasien.<|end_of_text|>
|
| 22 |
|
|
|
|
| 48 |
yield temp
|
| 49 |
|
| 50 |
history = ["init", prompt]
|
| 51 |
+
|
| 52 |
+
|
| 53 |
gr.ChatInterface(
|
| 54 |
output_inference,
|
| 55 |
chatbot=gr.Chatbot(height=300),
|
|
|
|
| 62 |
retry_btn=None,
|
| 63 |
undo_btn="Delete Previous",
|
| 64 |
clear_btn="Clear",
|
| 65 |
+
).launch()
|