import os from fastapi import FastAPI, Form, HTTPException from transformers import AutoTokenizer, AutoModelForCausalLM import torch from typing import Optional app = FastAPI(title="🤖 Qwen 4B AI Chatbot") # ✅ Set writable cache directory os.environ["HF_HOME"] = "/tmp/huggingface_cache" # ------------------ Load Qwen 4B Model ------------------ print("🔄 Loading Qwen 4B model...") model_name = "Sameer-Handsome173/qwen_model_4B" tokenizer = AutoTokenizer.from_pretrained( model_name, trust_remote_code=True ) model = AutoModelForCausalLM.from_pretrained( model_name, dtype=torch.float16 if torch.cuda.is_available() else torch.float32, device_map="auto", trust_remote_code=True ) print("✅ Model loaded successfully!") # ------------------ System Prompt ------------------ SYSTEM_PROMPT = """You are an intelligent AI chatbot assistant powered by Qwen 4B. You have access to various tools to help solve complex tasks. 🛠️ Available Tools: - **web_search**: Search the internet for current information and facts - **calculator**: Perform mathematical calculations and computations - **code_executor**: Write and execute code to solve problems - **text_analyzer**: Analyze, summarize, and extract information from text - **knowledge_base**: Access stored information and documents When a user asks a question: 1. Analyze what they need 2. Decide which tool(s) would help 3. Use the appropriate tool(s) to gather information 4. Synthesize the results into a helpful response For complex tasks, you can use multiple tools in sequence. Always explain your reasoning and show which tools you're using. Example: User: "What's 25% of 847 and what's the weather in Minsk?" Your response: 🔧 Using CALCULATOR tool: 25% of 847 = 211.75 🔧 Using WEB_SEARCH tool: Searching current weather in Minsk... Result: The answer is 211.75. The current weather in Minsk is... Be helpful, clear, and show your thought process when using tools.""" # ------------------ Helper Function ------------------ def generate_response(user_query: str, max_tokens: int = 512, temperature: float = 0.7): """Generate response using Qwen model with system prompt""" try: messages = [ {"role": "system", "content": SYSTEM_PROMPT}, {"role": "user", "content": user_query} ] # Format messages for Qwen chat template text = tokenizer.apply_chat_template( messages, tokenize=False, add_generation_prompt=True ) inputs = tokenizer([text], return_tensors="pt").to(model.device) outputs = model.generate( **inputs, max_new_tokens=max_tokens, do_sample=True, temperature=temperature, top_p=0.9, repetition_penalty=1.1 ) response = tokenizer.decode(outputs[0], skip_special_tokens=True) # Extract only the assistant's response if "<|im_start|>assistant" in response: response = response.split("<|im_start|>assistant")[-1].strip() return response except Exception as e: raise HTTPException(status_code=500, detail=f"Generation failed: {str(e)}") # ------------------ API Endpoints ------------------ @app.get("/") def home(): return { "message": "✅ Qwen 4B AI Chatbot is running!", "model": "Sameer-Handsome173/qwen_model_4B", "description": "An intelligent chatbot that uses tools to solve complex tasks", "endpoint": "/chat" } @app.post("/chat") async def chat( query: str = Form(...), max_tokens: int = Form(512), temperature: float = Form(0.7) ): """ Main chat endpoint - AI chatbot with tool-calling capabilities The chatbot will: - Understand your question - Decide which tools to use - Call appropriate tools to solve the task - Provide a comprehensive answer Example queries: - "Calculate 15% of 2500 and explain compound interest" - "Search for the latest AI news and summarize the top 3 trends" - "Write Python code to sort a list and explain how it works" - "What's the population of Belarus and its GDP?" """ try: if not query or len(query.strip()) == 0: raise HTTPException(status_code=400, detail="Query cannot be empty") response = generate_response( user_query=query, max_tokens=max_tokens, temperature=temperature ) return { "query": query, "response": response, "model": "Qwen 4B", "status": "success" } except Exception as e: return { "query": query, "error": str(e), "status": "failed" } @app.get("/health") def health_check(): return { "status": "healthy", "model_loaded": model is not None, "device": str(model.device) if model else "unknown" } if __name__ == "__main__": import uvicorn uvicorn.run(app, host="0.0.0.0", port=7860)