|
|
from fastapi import FastAPI, Query, HTTPException |
|
|
import os |
|
|
os.environ["CUDA_VISIBLE_DEVICES"] = "" |
|
|
import torch |
|
|
import re |
|
|
import time |
|
|
import logging |
|
|
import os |
|
|
import gc |
|
|
import json |
|
|
from transformers import AutoTokenizer, GenerationConfig |
|
|
from peft import AutoPeftModelForCausalLM |
|
|
from unsloth.chat_templates import get_chat_template |
|
|
from unsloth import FastLanguageModel |
|
|
|
|
|
|
|
|
os.environ["TOKENIZERS_PARALLELISM"] = "false" |
|
|
os.environ["OMP_NUM_THREADS"] = "2" |
|
|
os.environ["MKL_NUM_THREADS"] = "2" |
|
|
torch.set_num_threads(2) |
|
|
torch.set_num_interop_threads(1) |
|
|
|
|
|
|
|
|
logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s") |
|
|
log = logging.getLogger("news-filter") |
|
|
|
|
|
|
|
|
model_name = "habulaj/filterinstruct180" |
|
|
log.info("🚀 Carregando modelo e tokenizer...") |
|
|
|
|
|
tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=True) |
|
|
tokenizer = get_chat_template(tokenizer, chat_template="llama-3.1") |
|
|
|
|
|
if tokenizer.pad_token is None: |
|
|
tokenizer.pad_token = tokenizer.eos_token |
|
|
|
|
|
model = AutoPeftModelForCausalLM.from_pretrained( |
|
|
model_name, |
|
|
device_map="cpu", |
|
|
torch_dtype=torch.bfloat16, |
|
|
low_cpu_mem_usage=True, |
|
|
trust_remote_code=True |
|
|
) |
|
|
FastLanguageModel.for_inference(model, cpu=True) |
|
|
model.eval() |
|
|
log.info("✅ Modelo carregado (modo eval).") |
|
|
|
|
|
generation_config = GenerationConfig( |
|
|
max_new_tokens=128, |
|
|
temperature=1.0, |
|
|
do_sample=False, |
|
|
num_beams=1, |
|
|
use_cache=True, |
|
|
eos_token_id=tokenizer.eos_token_id, |
|
|
pad_token_id=tokenizer.eos_token_id, |
|
|
repetition_penalty=1.1, |
|
|
length_penalty=1.0 |
|
|
) |
|
|
|
|
|
|
|
|
app = FastAPI(title="News Filter JSON API") |
|
|
|
|
|
@app.get("/") |
|
|
def read_root(): |
|
|
return {"message": "News Filter JSON API is running!", "docs": "/docs"} |
|
|
|
|
|
@app.get("/filter") |
|
|
def get_filter( |
|
|
title: str = Query(..., description="News title"), |
|
|
content: str = Query(..., description="News content") |
|
|
): |
|
|
try: |
|
|
result = infer_filter(title, content) |
|
|
try: |
|
|
return {"result": json.loads(result)} |
|
|
except json.JSONDecodeError: |
|
|
return {"result": result, "warning": "Returned as string due to JSON parsing error"} |
|
|
except HTTPException as he: |
|
|
raise he |
|
|
except Exception as e: |
|
|
log.exception("❌ Erro inesperado:") |
|
|
raise HTTPException(status_code=500, detail="Internal server error during inference.") |
|
|
|
|
|
@app.on_event("startup") |
|
|
async def warmup(): |
|
|
log.info("🔥 Executando warmup...") |
|
|
try: |
|
|
infer_filter("Test title", "Test content") |
|
|
log.info("✅ Warmup concluído.") |
|
|
except Exception as e: |
|
|
log.warning(f"⚠️ Warmup falhou: {e}") |
|
|
|
|
|
|
|
|
def infer_filter(title, content): |
|
|
messages = [ |
|
|
{ |
|
|
"role": "user", |
|
|
"content": """Analyze the news title and content, and return the filters in JSON format with the defined fields. |
|
|
|
|
|
Please respond ONLY with the JSON filter, do NOT add any explanations, system messages, or extra text. |
|
|
|
|
|
Title: "New 'Star Wars' Movie Announced" |
|
|
Content: "Lucasfilm confirmed a new Star Wars movie set to release in 2026, directed by a rising filmmaker." |
|
|
""" |
|
|
}, |
|
|
{ |
|
|
"role": "assistant", |
|
|
"content": '{ "death_related": false, "relevance": "high", "global_interest": true, "entity_type": "movie", "entity_name": "Star Wars", "breaking_news": true, "has_video_content": false }' |
|
|
}, |
|
|
{ |
|
|
"role": "user", |
|
|
"content": """Analyze the news title and content, and return the filters in JSON format with the defined fields. |
|
|
|
|
|
Please respond ONLY with the JSON filter, do NOT add any explanations, system messages, or extra text. |
|
|
|
|
|
Title: "Legendary Musician Carlos Mendes Dies at 78" |
|
|
Content: "Carlos Mendes, the internationally acclaimed Brazilian guitarist and composer known for blending traditional bossa nova with modern jazz, has died at the age of 78." |
|
|
""" |
|
|
}, |
|
|
{ |
|
|
"role": "assistant", |
|
|
"content": '{ "death_related": true, "relevance": "high", "global_interest": true, "entity_type": "person", "entity_name": "Carlos Mendes", "breaking_news": true, "has_video_content": false }' |
|
|
}, |
|
|
{ |
|
|
"role": "user", |
|
|
"content": f"""Analyze the news title and content, and return the filters in JSON format with the defined fields. |
|
|
|
|
|
Please respond ONLY with the JSON filter, do NOT add any explanations, system messages, or extra text. |
|
|
|
|
|
Title: "{title}" |
|
|
Content: "{content}" |
|
|
""" |
|
|
} |
|
|
] |
|
|
|
|
|
log.info(f"🧠 Inferência iniciada para: {title}") |
|
|
start_time = time.time() |
|
|
|
|
|
inputs = tokenizer.apply_chat_template( |
|
|
messages, |
|
|
tokenize=True, |
|
|
add_generation_prompt=True, |
|
|
return_tensors="pt", |
|
|
).to("cpu") |
|
|
|
|
|
with torch.no_grad(), torch.inference_mode(): |
|
|
outputs = model.generate( |
|
|
input_ids=inputs, |
|
|
generation_config=generation_config, |
|
|
) |
|
|
|
|
|
prompt_text = tokenizer.decode(inputs[0], skip_special_tokens=True) |
|
|
full_output = tokenizer.decode(outputs[0], skip_special_tokens=True) |
|
|
generated = full_output[len(prompt_text):].strip() |
|
|
|
|
|
json_str = extract_json(generated) |
|
|
duration = time.time() - start_time |
|
|
log.info(f"✅ JSON extraído em {duration:.2f}s") |
|
|
return json_str |
|
|
|
|
|
def extract_json(text): |
|
|
match = re.search(r'\{.*?\}', text, flags=re.DOTALL) |
|
|
if match: |
|
|
return match.group(0) |
|
|
return text |