import os
from fastapi import FastAPI, Form, HTTPException
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
from typing import Optional

app = FastAPI(title="🤖 Qwen 4B AI Chatbot")

# ✅ Set writable cache directory
os.environ["HF_HOME"] = "/tmp/huggingface_cache"

# ------------------ Load Qwen 4B Model ------------------ 
print("🔄 Loading Qwen 4B model...")
model_name = "Sameer-Handsome173/qwen_model_4B"

tokenizer = AutoTokenizer.from_pretrained(
    model_name,
    trust_remote_code=True
)

model = AutoModelForCausalLM.from_pretrained(
    model_name,
    dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
    device_map="auto",
    trust_remote_code=True
)

print("✅ Model loaded successfully!")

# ------------------ System Prompt ------------------ 
SYSTEM_PROMPT = """You are an intelligent AI chatbot assistant powered by Qwen 4B. You have access to various tools to help solve complex tasks.

🛠️ Available Tools:
- **web_search**: Search the internet for current information and facts
- **calculator**: Perform mathematical calculations and computations
- **code_executor**: Write and execute code to solve problems
- **text_analyzer**: Analyze, summarize, and extract information from text
- **knowledge_base**: Access stored information and documents

When a user asks a question:
1. Analyze what they need
2. Decide which tool(s) would help
3. Use the appropriate tool(s) to gather information
4. Synthesize the results into a helpful response

For complex tasks, you can use multiple tools in sequence. Always explain your reasoning and show which tools you're using.

Example:
User: "What's 25% of 847 and what's the weather in Minsk?"
Your response:
🔧 Using CALCULATOR tool: 25% of 847 = 211.75
🔧 Using WEB_SEARCH tool: Searching current weather in Minsk...
Result: The answer is 211.75. The current weather in Minsk is...

Be helpful, clear, and show your thought process when using tools."""

# ------------------ Helper Function ------------------ 
def generate_response(user_query: str, max_tokens: int = 512, temperature: float = 0.7):
    """Generate response using Qwen model with system prompt"""
    try:
        messages = [
            {"role": "system", "content": SYSTEM_PROMPT},
            {"role": "user", "content": user_query}
        ]
        
        # Format messages for Qwen chat template
        text = tokenizer.apply_chat_template(
            messages,
            tokenize=False,
            add_generation_prompt=True
        )
        
        inputs = tokenizer([text], return_tensors="pt").to(model.device)
        
        outputs = model.generate(
            **inputs,
            max_new_tokens=max_tokens,
            do_sample=True,
            temperature=temperature,
            top_p=0.9,
            repetition_penalty=1.1
        )
        
        response = tokenizer.decode(outputs[0], skip_special_tokens=True)
        
        # Extract only the assistant's response
        if "<|im_start|>assistant" in response:
            response = response.split("<|im_start|>assistant")[-1].strip()
        
        return response
    except Exception as e:
        raise HTTPException(status_code=500, detail=f"Generation failed: {str(e)}")

# ------------------ API Endpoints ------------------ 

@app.get("/")
def home():
    return {
        "message": "✅ Qwen 4B AI Chatbot is running!",
        "model": "Sameer-Handsome173/qwen_model_4B",
        "description": "An intelligent chatbot that uses tools to solve complex tasks",
        "endpoint": "/chat"
    }

@app.post("/chat")
async def chat(
    query: str = Form(...),
    max_tokens: int = Form(512),
    temperature: float = Form(0.7)
):
    """
    Main chat endpoint - AI chatbot with tool-calling capabilities
    
    The chatbot will:
    - Understand your question
    - Decide which tools to use
    - Call appropriate tools to solve the task
    - Provide a comprehensive answer
    
    Example queries:
    - "Calculate 15% of 2500 and explain compound interest"
    - "Search for the latest AI news and summarize the top 3 trends"
    - "Write Python code to sort a list and explain how it works"
    - "What's the population of Belarus and its GDP?"
    """
    try:
        if not query or len(query.strip()) == 0:
            raise HTTPException(status_code=400, detail="Query cannot be empty")
        
        response = generate_response(
            user_query=query,
            max_tokens=max_tokens,
            temperature=temperature
        )
        
        return {
            "query": query,
            "response": response,
            "model": "Qwen 4B",
            "status": "success"
        }
    except Exception as e:
        return {
            "query": query,
            "error": str(e),
            "status": "failed"
        }

@app.get("/health")
def health_check():
    return {
        "status": "healthy",
        "model_loaded": model is not None,
        "device": str(model.device) if model else "unknown"
    }

if __name__ == "__main__":
    import uvicorn
    uvicorn.run(app, host="0.0.0.0", port=7860)