from llama_cpp import Llama

# Insert your medical query here
MEDICAL_QUERY = """

"""

model_path = "./" # Path to the directory containing your model weight files

llm = Llama(
    model_path=model_path,
    n_gpu_layers=40,  
    n_ctx=10000,     
    n_threads=4      
)

medical_query = MEDICAL_QUERY.strip()
prompt = f"USER: <medical_query>{medical_query}</medical_query>\nASSISTANT:"

output = llm(
    prompt,
    max_tokens=12000,
    temperature=0.3,
    top_p=0.7,
    repeat_penalty=1.05
)

result = output.get("choices", [{}])[0].get("text", "").strip()

if "</answer>" in result:
    end_pos = result.find("</answer>") + len("</answer>")
    result = result[:end_pos]

print(result)