import gradio as gr available_models = { "Llama 3.2": "unsloth/Llama-3.2-3B-Instruct", "Microsoft Phi-4 Mini": "microsoft/Phi-4-mini-instruct", "Google Gemma 3": "unsloth/gemma-3-4b-it-GGUF" } # --- Global State (or use gr.State in Blocks) --- # To keep track of the currently loaded model/pipeline current_model_id = None current_pipeline = None print(f"Models available for selection: {list(available_models.keys())}") # Define a function to Load/Switch Models def load_llm_model(model_name): """Loads the selected LLM, unloading the previous one.""" global current_model_id, current_pipeline, tokenizer, model new_model_id = available_models.get(model_name) if not new_model_id: return "Invalid model selected.", None # Return error message and None pipeline if new_model_id == current_model_id and current_pipeline is not None: print(f"Model {model_name} is already loaded.") # Indicate success but don't reload return f"{model_name} already loaded.", current_pipeline print(f"Switching to model: {model_name} ({new_model_id})...") # Unload previous model (important for memory) # Clear variables and run garbage collection current_pipeline = None if "model" in locals(): del model if "tokenizer" in locals(): del tokenizer if "pipe" in locals(): del pipe torch.cuda.empty_cache() # Clear GPU memory cache import gc gc.collect() print("Previous model unloaded (if any).") # --- Load the new model --- loading_message = f"Loading {model_name}..." try: # Load Tokenizer tokenizer = AutoTokenizer.from_pretrained(new_model_id, trust_remote_code = True) # Load Model (Quantized) model = AutoModelForCausalLM.from_pretrained(new_model_id, torch_dtype = "auto", # "torch.float16", # Or bfloat16 if available load_in_4bit = True, device_map = "auto", trust_remote_code = True) # Create Pipeline loaded_pipeline = pipeline( "text-generation", model = model, tokenizer = tokenizer, torch_dtype = "auto", device_map = "auto") print(f"Model {model_name} loaded successfully!") current_model_id = new_model_id current_pipeline = loaded_pipeline # Update global state # Use locals() or return values with gr.State for better Gradio practice return f"{model_name} loaded successfully!", loaded_pipeline # Status message and the pipeline object except Exception as e: print(f"Error loading model {model_name}: {e}") current_model_id = None current_pipeline = None return f"Error loading {model_name}: {e}", None # Error message and None pipeline # --- Function to handle Q&A Submission --- # This function now relies on the globally managed 'current_pipeline' # In a more robust Gradio app, you'd pass the pipeline via gr.State def handle_submit(question): """Handles the user submitting a question.""" if not current_pipeline: return "Error: No model is currently loaded. Please select a model." if not pdf_text: return "Error: PDF text is not loaded. Please run Section 4." if not question: return "Please enter a question." print(f"Handling submission for question: '{question}' using {current_model_id}") # Call the Q&A function defined in Section 5 answer = answer_question_from_pdf(pdf_text, question, current_pipeline) return answer # --- Build Gradio Interface using Blocks --- print("Building Gradio interface...") with gr.Blocks(theme=gr.themes.Soft()) as demo: gr.Markdown( f""" # PDF Q&A Bot Using Hugging Face Open-Source Models Ask questions about the document ('{pdf_filename}' if loaded, {len(pdf_text)} chars). Select an open-source LLM to answer your question. **Note:** Switching models takes time as the new model needs to be downloaded and loaded into the GPU. """ ) # Store the pipeline in Gradio state for better practice (optional for this simple version) # llm_pipeline_state = gr.State(None) with gr.Row(): model_dropdown = gr.Dropdown( choices=list(available_models.keys()), label="🤖 Select LLM Model", value=list(available_models.keys())[0], # Default to the first model ) status_textbox = gr.Textbox(label="Model Status", interactive=False) question_textbox = gr.Textbox( label="❓ Your Question", lines=2, placeholder="Enter your question about the document here..." ) submit_button = gr.Button("Submit Question", variant="primary") answer_textbox = gr.Textbox(label="💡 Answer", lines=5, interactive=False) # --- Event Handlers --- # When the dropdown changes, load the selected model model_dropdown.change( fn = load_llm_model, inputs = [model_dropdown], outputs = [status_textbox], # Update status text. Ideally also update a gr.State for the pipeline # outputs=[status_textbox, llm_pipeline_state] # If using gr.State ) # When the button is clicked, call the submit handler submit_button.click( fn = handle_submit, inputs = [question_textbox], outputs = [answer_textbox], # inputs=[question_textbox, llm_pipeline_state], # Pass state if using it ) # --- Initial Model Load --- # Easier: Manually load first model *before* launching Gradio for simplicity here initial_model_name = list(available_models.keys())[0] print(f"Performing initial load of default model: {initial_model_name}...") status, _ = load_llm_model(initial_model_name) status_textbox.value = status # Set initial status print("Initial load complete.") # --- Launch the Gradio App --- print("Launching Gradio demo...") demo.launch(debug=True) # debug=True provides more detailed logs