Spaces:
Sleeping
Sleeping
| import os | |
| from fastapi import FastAPI, Form, HTTPException | |
| from transformers import AutoTokenizer, AutoModelForCausalLM | |
| import torch | |
| from typing import Optional | |
| app = FastAPI(title="π€ Qwen 4B AI Chatbot") | |
| # β Set writable cache directory | |
| os.environ["HF_HOME"] = "/tmp/huggingface_cache" | |
| # ------------------ Load Qwen 4B Model ------------------ | |
| print("π Loading Qwen 4B model...") | |
| model_name = "Sameer-Handsome173/qwen_model_4B" | |
| tokenizer = AutoTokenizer.from_pretrained( | |
| model_name, | |
| trust_remote_code=True | |
| ) | |
| model = AutoModelForCausalLM.from_pretrained( | |
| model_name, | |
| dtype=torch.float16 if torch.cuda.is_available() else torch.float32, | |
| device_map="auto", | |
| trust_remote_code=True | |
| ) | |
| print("β Model loaded successfully!") | |
| # ------------------ System Prompt ------------------ | |
| SYSTEM_PROMPT = """You are an intelligent AI chatbot assistant powered by Qwen 4B. You have access to various tools to help solve complex tasks. | |
| π οΈ Available Tools: | |
| - **web_search**: Search the internet for current information and facts | |
| - **calculator**: Perform mathematical calculations and computations | |
| - **code_executor**: Write and execute code to solve problems | |
| - **text_analyzer**: Analyze, summarize, and extract information from text | |
| - **knowledge_base**: Access stored information and documents | |
| When a user asks a question: | |
| 1. Analyze what they need | |
| 2. Decide which tool(s) would help | |
| 3. Use the appropriate tool(s) to gather information | |
| 4. Synthesize the results into a helpful response | |
| For complex tasks, you can use multiple tools in sequence. Always explain your reasoning and show which tools you're using. | |
| Example: | |
| User: "What's 25% of 847 and what's the weather in Minsk?" | |
| Your response: | |
| π§ Using CALCULATOR tool: 25% of 847 = 211.75 | |
| π§ Using WEB_SEARCH tool: Searching current weather in Minsk... | |
| Result: The answer is 211.75. The current weather in Minsk is... | |
| Be helpful, clear, and show your thought process when using tools.""" | |
| # ------------------ Helper Function ------------------ | |
| def generate_response(user_query: str, max_tokens: int = 512, temperature: float = 0.7): | |
| """Generate response using Qwen model with system prompt""" | |
| try: | |
| messages = [ | |
| {"role": "system", "content": SYSTEM_PROMPT}, | |
| {"role": "user", "content": user_query} | |
| ] | |
| # Format messages for Qwen chat template | |
| text = tokenizer.apply_chat_template( | |
| messages, | |
| tokenize=False, | |
| add_generation_prompt=True | |
| ) | |
| inputs = tokenizer([text], return_tensors="pt").to(model.device) | |
| outputs = model.generate( | |
| **inputs, | |
| max_new_tokens=max_tokens, | |
| do_sample=True, | |
| temperature=temperature, | |
| top_p=0.9, | |
| repetition_penalty=1.1 | |
| ) | |
| response = tokenizer.decode(outputs[0], skip_special_tokens=True) | |
| # Extract only the assistant's response | |
| if "<|im_start|>assistant" in response: | |
| response = response.split("<|im_start|>assistant")[-1].strip() | |
| return response | |
| except Exception as e: | |
| raise HTTPException(status_code=500, detail=f"Generation failed: {str(e)}") | |
| # ------------------ API Endpoints ------------------ | |
| def home(): | |
| return { | |
| "message": "β Qwen 4B AI Chatbot is running!", | |
| "model": "Sameer-Handsome173/qwen_model_4B", | |
| "description": "An intelligent chatbot that uses tools to solve complex tasks", | |
| "endpoint": "/chat" | |
| } | |
| async def chat( | |
| query: str = Form(...), | |
| max_tokens: int = Form(512), | |
| temperature: float = Form(0.7) | |
| ): | |
| """ | |
| Main chat endpoint - AI chatbot with tool-calling capabilities | |
| The chatbot will: | |
| - Understand your question | |
| - Decide which tools to use | |
| - Call appropriate tools to solve the task | |
| - Provide a comprehensive answer | |
| Example queries: | |
| - "Calculate 15% of 2500 and explain compound interest" | |
| - "Search for the latest AI news and summarize the top 3 trends" | |
| - "Write Python code to sort a list and explain how it works" | |
| - "What's the population of Belarus and its GDP?" | |
| """ | |
| try: | |
| if not query or len(query.strip()) == 0: | |
| raise HTTPException(status_code=400, detail="Query cannot be empty") | |
| response = generate_response( | |
| user_query=query, | |
| max_tokens=max_tokens, | |
| temperature=temperature | |
| ) | |
| return { | |
| "query": query, | |
| "response": response, | |
| "model": "Qwen 4B", | |
| "status": "success" | |
| } | |
| except Exception as e: | |
| return { | |
| "query": query, | |
| "error": str(e), | |
| "status": "failed" | |
| } | |
| def health_check(): | |
| return { | |
| "status": "healthy", | |
| "model_loaded": model is not None, | |
| "device": str(model.device) if model else "unknown" | |
| } | |
| if __name__ == "__main__": | |
| import uvicorn | |
| uvicorn.run(app, host="0.0.0.0", port=7860) |