Spaces:

habulaj
/

filter

Runtime error

App Files Files Community

filter / app.py

habulaj

Update app.py

9650ab7 verified 6 months ago

raw

history blame

5.53 kB

	from fastapi import FastAPI, Query, HTTPException
	import os
	os.environ["CUDA_VISIBLE_DEVICES"] = ""
	import torch
	import re
	import time
	import logging
	import os
	import gc
	import json
	from transformers import AutoTokenizer, GenerationConfig
	from peft import AutoPeftModelForCausalLM
	from unsloth.chat_templates import get_chat_template
	from unsloth import FastLanguageModel

	# -------- CONFIGURAÇÕES DE OTIMIZAÇÃO --------
	os.environ["TOKENIZERS_PARALLELISM"] = "false"
	os.environ["OMP_NUM_THREADS"] = "2"
	os.environ["MKL_NUM_THREADS"] = "2"
	torch.set_num_threads(2)
	torch.set_num_interop_threads(1)

	# -------- LOGGING --------
	logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s")
	log = logging.getLogger("news-filter")

	# -------- MODELO --------
	model_name = "habulaj/filterinstruct180"
	log.info("🚀 Carregando modelo e tokenizer...")

	tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=True)
	tokenizer = get_chat_template(tokenizer, chat_template="llama-3.1")

	if tokenizer.pad_token is None:
	tokenizer.pad_token = tokenizer.eos_token

	model = AutoPeftModelForCausalLM.from_pretrained(
	model_name,
	device_map="cpu",
	torch_dtype=torch.bfloat16,
	low_cpu_mem_usage=True,
	trust_remote_code=True
	)
	FastLanguageModel.for_inference(model, cpu=True)
	model.eval()
	log.info("✅ Modelo carregado (modo eval).")

	generation_config = GenerationConfig(
	max_new_tokens=128,
	temperature=1.0,
	do_sample=False,
	num_beams=1,
	use_cache=True,
	eos_token_id=tokenizer.eos_token_id,
	pad_token_id=tokenizer.eos_token_id,
	repetition_penalty=1.1,
	length_penalty=1.0
	)

	# -------- FASTAPI --------
	app = FastAPI(title="News Filter JSON API")

	@app.get("/")
	def read_root():
	return {"message": "News Filter JSON API is running!", "docs": "/docs"}

	@app.get("/filter")
	def get_filter(
	title: str = Query(..., description="News title"),
	content: str = Query(..., description="News content")
	):
	try:
	result = infer_filter(title, content)
	try:
	return {"result": json.loads(result)}
	except json.JSONDecodeError:
	return {"result": result, "warning": "Returned as string due to JSON parsing error"}
	except HTTPException as he:
	raise he
	except Exception as e:
	log.exception("❌ Erro inesperado:")
	raise HTTPException(status_code=500, detail="Internal server error during inference.")

	@app.on_event("startup")
	async def warmup():
	log.info("🔥 Executando warmup...")
	try:
	infer_filter("Test title", "Test content")
	log.info("✅ Warmup concluído.")
	except Exception as e:
	log.warning(f"⚠️ Warmup falhou: {e}")

	# -------- INFERÊNCIA --------
	def infer_filter(title, content):
	messages = [
	{
	"role": "user",
	"content": """Analyze the news title and content, and return the filters in JSON format with the defined fields.

	Please respond ONLY with the JSON filter, do NOT add any explanations, system messages, or extra text.

	Title: "New 'Star Wars' Movie Announced"
	Content: "Lucasfilm confirmed a new Star Wars movie set to release in 2026, directed by a rising filmmaker."
	"""
	},
	{
	"role": "assistant",
	"content": '{ "death_related": false, "relevance": "high", "global_interest": true, "entity_type": "movie", "entity_name": "Star Wars", "breaking_news": true, "has_video_content": false }'
	},
	{
	"role": "user",
	"content": """Analyze the news title and content, and return the filters in JSON format with the defined fields.

	Please respond ONLY with the JSON filter, do NOT add any explanations, system messages, or extra text.

	Title: "Legendary Musician Carlos Mendes Dies at 78"
	Content: "Carlos Mendes, the internationally acclaimed Brazilian guitarist and composer known for blending traditional bossa nova with modern jazz, has died at the age of 78."
	"""
	},
	{
	"role": "assistant",
	"content": '{ "death_related": true, "relevance": "high", "global_interest": true, "entity_type": "person", "entity_name": "Carlos Mendes", "breaking_news": true, "has_video_content": false }'
	},
	{
	"role": "user",
	"content": f"""Analyze the news title and content, and return the filters in JSON format with the defined fields.

	Please respond ONLY with the JSON filter, do NOT add any explanations, system messages, or extra text.

	Title: "{title}"
	Content: "{content}"
	"""
	}
	]

	log.info(f"🧠 Inferência iniciada para: {title}")
	start_time = time.time()

	inputs = tokenizer.apply_chat_template(
	messages,
	tokenize=True,
	add_generation_prompt=True,
	return_tensors="pt",
	).to("cpu")

	with torch.no_grad(), torch.inference_mode():
	outputs = model.generate(
	input_ids=inputs,
	generation_config=generation_config,
	)

	prompt_text = tokenizer.decode(inputs[0], skip_special_tokens=True)
	full_output = tokenizer.decode(outputs[0], skip_special_tokens=True)
	generated = full_output[len(prompt_text):].strip()

	json_str = extract_json(generated)
	duration = time.time() - start_time
	log.info(f"✅ JSON extraído em {duration:.2f}s")
	return json_str

	def extract_json(text):
	match = re.search(r'\{.*?\}', text, flags=re.DOTALL)
	if match:
	return match.group(0)
	return text