filter / app.py
habulaj's picture
Update app.py
9650ab7 verified
raw
history blame
5.53 kB
from fastapi import FastAPI, Query, HTTPException
import os
os.environ["CUDA_VISIBLE_DEVICES"] = ""
import torch
import re
import time
import logging
import os
import gc
import json
from transformers import AutoTokenizer, GenerationConfig
from peft import AutoPeftModelForCausalLM
from unsloth.chat_templates import get_chat_template
from unsloth import FastLanguageModel
# -------- CONFIGURAÇÕES DE OTIMIZAÇÃO --------
os.environ["TOKENIZERS_PARALLELISM"] = "false"
os.environ["OMP_NUM_THREADS"] = "2"
os.environ["MKL_NUM_THREADS"] = "2"
torch.set_num_threads(2)
torch.set_num_interop_threads(1)
# -------- LOGGING --------
logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s")
log = logging.getLogger("news-filter")
# -------- MODELO --------
model_name = "habulaj/filterinstruct180"
log.info("🚀 Carregando modelo e tokenizer...")
tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=True)
tokenizer = get_chat_template(tokenizer, chat_template="llama-3.1")
if tokenizer.pad_token is None:
tokenizer.pad_token = tokenizer.eos_token
model = AutoPeftModelForCausalLM.from_pretrained(
model_name,
device_map="cpu",
torch_dtype=torch.bfloat16,
low_cpu_mem_usage=True,
trust_remote_code=True
)
FastLanguageModel.for_inference(model, cpu=True)
model.eval()
log.info("✅ Modelo carregado (modo eval).")
generation_config = GenerationConfig(
max_new_tokens=128,
temperature=1.0,
do_sample=False,
num_beams=1,
use_cache=True,
eos_token_id=tokenizer.eos_token_id,
pad_token_id=tokenizer.eos_token_id,
repetition_penalty=1.1,
length_penalty=1.0
)
# -------- FASTAPI --------
app = FastAPI(title="News Filter JSON API")
@app.get("/")
def read_root():
return {"message": "News Filter JSON API is running!", "docs": "/docs"}
@app.get("/filter")
def get_filter(
title: str = Query(..., description="News title"),
content: str = Query(..., description="News content")
):
try:
result = infer_filter(title, content)
try:
return {"result": json.loads(result)}
except json.JSONDecodeError:
return {"result": result, "warning": "Returned as string due to JSON parsing error"}
except HTTPException as he:
raise he
except Exception as e:
log.exception("❌ Erro inesperado:")
raise HTTPException(status_code=500, detail="Internal server error during inference.")
@app.on_event("startup")
async def warmup():
log.info("🔥 Executando warmup...")
try:
infer_filter("Test title", "Test content")
log.info("✅ Warmup concluído.")
except Exception as e:
log.warning(f"⚠️ Warmup falhou: {e}")
# -------- INFERÊNCIA --------
def infer_filter(title, content):
messages = [
{
"role": "user",
"content": """Analyze the news title and content, and return the filters in JSON format with the defined fields.
Please respond ONLY with the JSON filter, do NOT add any explanations, system messages, or extra text.
Title: "New 'Star Wars' Movie Announced"
Content: "Lucasfilm confirmed a new Star Wars movie set to release in 2026, directed by a rising filmmaker."
"""
},
{
"role": "assistant",
"content": '{ "death_related": false, "relevance": "high", "global_interest": true, "entity_type": "movie", "entity_name": "Star Wars", "breaking_news": true, "has_video_content": false }'
},
{
"role": "user",
"content": """Analyze the news title and content, and return the filters in JSON format with the defined fields.
Please respond ONLY with the JSON filter, do NOT add any explanations, system messages, or extra text.
Title: "Legendary Musician Carlos Mendes Dies at 78"
Content: "Carlos Mendes, the internationally acclaimed Brazilian guitarist and composer known for blending traditional bossa nova with modern jazz, has died at the age of 78."
"""
},
{
"role": "assistant",
"content": '{ "death_related": true, "relevance": "high", "global_interest": true, "entity_type": "person", "entity_name": "Carlos Mendes", "breaking_news": true, "has_video_content": false }'
},
{
"role": "user",
"content": f"""Analyze the news title and content, and return the filters in JSON format with the defined fields.
Please respond ONLY with the JSON filter, do NOT add any explanations, system messages, or extra text.
Title: "{title}"
Content: "{content}"
"""
}
]
log.info(f"🧠 Inferência iniciada para: {title}")
start_time = time.time()
inputs = tokenizer.apply_chat_template(
messages,
tokenize=True,
add_generation_prompt=True,
return_tensors="pt",
).to("cpu")
with torch.no_grad(), torch.inference_mode():
outputs = model.generate(
input_ids=inputs,
generation_config=generation_config,
)
prompt_text = tokenizer.decode(inputs[0], skip_special_tokens=True)
full_output = tokenizer.decode(outputs[0], skip_special_tokens=True)
generated = full_output[len(prompt_text):].strip()
json_str = extract_json(generated)
duration = time.time() - start_time
log.info(f"✅ JSON extraído em {duration:.2f}s")
return json_str
def extract_json(text):
match = re.search(r'\{.*?\}', text, flags=re.DOTALL)
if match:
return match.group(0)
return text