llmops1 / app.py
Sameer-Handsome173's picture
Update app.py
b85067b verified
raw
history blame
5.15 kB
import os
from fastapi import FastAPI, Form, HTTPException
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
from typing import Optional
app = FastAPI(title="πŸ€– Qwen 4B AI Chatbot")
# βœ… Set writable cache directory
os.environ["HF_HOME"] = "/tmp/huggingface_cache"
# ------------------ Load Qwen 4B Model ------------------
print("πŸ”„ Loading Qwen 4B model...")
model_name = "Sameer-Handsome173/qwen_model_4B"
tokenizer = AutoTokenizer.from_pretrained(
model_name,
trust_remote_code=True
)
model = AutoModelForCausalLM.from_pretrained(
model_name,
dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
device_map="auto",
trust_remote_code=True
)
print("βœ… Model loaded successfully!")
# ------------------ System Prompt ------------------
SYSTEM_PROMPT = """You are an intelligent AI chatbot assistant powered by Qwen 4B. You have access to various tools to help solve complex tasks.
πŸ› οΈ Available Tools:
- **web_search**: Search the internet for current information and facts
- **calculator**: Perform mathematical calculations and computations
- **code_executor**: Write and execute code to solve problems
- **text_analyzer**: Analyze, summarize, and extract information from text
- **knowledge_base**: Access stored information and documents
When a user asks a question:
1. Analyze what they need
2. Decide which tool(s) would help
3. Use the appropriate tool(s) to gather information
4. Synthesize the results into a helpful response
For complex tasks, you can use multiple tools in sequence. Always explain your reasoning and show which tools you're using.
Example:
User: "What's 25% of 847 and what's the weather in Minsk?"
Your response:
πŸ”§ Using CALCULATOR tool: 25% of 847 = 211.75
πŸ”§ Using WEB_SEARCH tool: Searching current weather in Minsk...
Result: The answer is 211.75. The current weather in Minsk is...
Be helpful, clear, and show your thought process when using tools."""
# ------------------ Helper Function ------------------
def generate_response(user_query: str, max_tokens: int = 512, temperature: float = 0.7):
"""Generate response using Qwen model with system prompt"""
try:
messages = [
{"role": "system", "content": SYSTEM_PROMPT},
{"role": "user", "content": user_query}
]
# Format messages for Qwen chat template
text = tokenizer.apply_chat_template(
messages,
tokenize=False,
add_generation_prompt=True
)
inputs = tokenizer([text], return_tensors="pt").to(model.device)
outputs = model.generate(
**inputs,
max_new_tokens=max_tokens,
do_sample=True,
temperature=temperature,
top_p=0.9,
repetition_penalty=1.1
)
response = tokenizer.decode(outputs[0], skip_special_tokens=True)
# Extract only the assistant's response
if "<|im_start|>assistant" in response:
response = response.split("<|im_start|>assistant")[-1].strip()
return response
except Exception as e:
raise HTTPException(status_code=500, detail=f"Generation failed: {str(e)}")
# ------------------ API Endpoints ------------------
@app.get("/")
def home():
return {
"message": "βœ… Qwen 4B AI Chatbot is running!",
"model": "Sameer-Handsome173/qwen_model_4B",
"description": "An intelligent chatbot that uses tools to solve complex tasks",
"endpoint": "/chat"
}
@app.post("/chat")
async def chat(
query: str = Form(...),
max_tokens: int = Form(512),
temperature: float = Form(0.7)
):
"""
Main chat endpoint - AI chatbot with tool-calling capabilities
The chatbot will:
- Understand your question
- Decide which tools to use
- Call appropriate tools to solve the task
- Provide a comprehensive answer
Example queries:
- "Calculate 15% of 2500 and explain compound interest"
- "Search for the latest AI news and summarize the top 3 trends"
- "Write Python code to sort a list and explain how it works"
- "What's the population of Belarus and its GDP?"
"""
try:
if not query or len(query.strip()) == 0:
raise HTTPException(status_code=400, detail="Query cannot be empty")
response = generate_response(
user_query=query,
max_tokens=max_tokens,
temperature=temperature
)
return {
"query": query,
"response": response,
"model": "Qwen 4B",
"status": "success"
}
except Exception as e:
return {
"query": query,
"error": str(e),
"status": "failed"
}
@app.get("/health")
def health_check():
return {
"status": "healthy",
"model_loaded": model is not None,
"device": str(model.device) if model else "unknown"
}
if __name__ == "__main__":
import uvicorn
uvicorn.run(app, host="0.0.0.0", port=7860)