from llama_cpp import Llama # Insert your medical query here MEDICAL_QUERY = """ """ model_path = "./" # Path to the directory containing your model weight files llm = Llama( model_path=model_path, n_gpu_layers=40, n_ctx=10000, n_threads=4 ) medical_query = MEDICAL_QUERY.strip() prompt = f"USER: {medical_query}\nASSISTANT:" output = llm( prompt, max_tokens=12000, temperature=0.3, top_p=0.7, repeat_penalty=1.05 ) result = output.get("choices", [{}])[0].get("text", "").strip() if "" in result: end_pos = result.find("") + len("") result = result[:end_pos] print(result)