from __future__ import annotations import json, re from typing import List, Dict, Any from .config import OpenRouterConfig from .llm import build_openrouter_client, chat_complete from .models import normalize_result class QueryRewriter: def __init__(self, cfg: OpenRouterConfig): self.cfg = cfg self.client = build_openrouter_client(cfg) def rewrite(self, query: str) -> List[str]: if not self.client: return list({query, f"{query} Pakistan", f"{query} Urdu"}) prompt = ("Generate 3 diverse search queries for Pakistani news related to the user's query. " "Consider Urdu-English variations and synonyms. Return only the queries, one per line without numbering.\n\n" f"User query: {query}") try: out = chat_complete(self.client, self.cfg.model, prompt, temperature=self.cfg.temperature, max_tokens=min(400, self.cfg.max_tokens)) lines = [ln.strip(" -•\t").strip() for ln in out.splitlines()] queries = [ln for ln in lines if ln] return queries[:3] if queries else [query] except Exception as e: print(f"❌ Query rewriting error: {e}") return [query] class ClaimAnalyzer: def __init__(self, cfg: OpenRouterConfig): self.cfg = cfg self.client = build_openrouter_client(cfg) def analyze(self, claim: str, articles: List[Dict[str, Any]]) -> Dict[str, Any]: # Deduplicate articles by URL to reduce duplicates and noise deduped: List[Dict[str, Any]] = [] seen = set() for a in articles: u = (a.get('url') or '').strip() if u and u in seen: continue seen.add(u) deduped.append(a) if not self.client: # Heuristic fallback: simple keyword overlap scoring. claim_lc = claim.lower() keywords = {w for w in claim_lc.split() if len(w) > 4} supporting: List[str] = [] score = 0 for a in deduped: text = (a.get('content','') or '').lower() overlap = sum(1 for k in keywords if k in text) if overlap: supporting.append(f"Match ({overlap}) in {a.get('url','')}") score += overlap confidence = min(0.6, 0.1 * score) if supporting else 0.05 verdict = "POSSIBLY TRUE" if confidence > 0.3 else "UNVERIFIED" return { "verdict": verdict, "confidence": confidence, "reasoning": "Heuristic fallback (no LLM). Confidence based on keyword overlap in retrieved articles.", "supporting_evidence": supporting[:5], "contradicting_evidence": [], "context_quality": "medium" if supporting else "low", } context = "\n\n".join([f"Article {i+1}:\nTitle: {a.get('title','Unknown')}\nSource: {a.get('source','Unknown')}\nURL: {a.get('url','')}\nContent: {a.get('content','')[:500]}..." for i,a in enumerate(deduped)]) prompt = ( "You are an expert Pakistani fact-checker. Analyze the claim against the retrieved context.\n" "Return JSON ONLY. No prose. Use this exact schema keys: \n" "{verdict: string, confidence: number between 0 and 1, reasoning: string, supporting_evidence: string[], contradicting_evidence: string[], context_quality: string}.\n" "Do not include code fences. Do not include comments." f"\n\nNEWS CLAIM: {claim}\n\nRETRIEVED CONTEXT:\n{context}\n" ) try: # Request structured JSON when supported content = chat_complete( self.client, self.cfg.model, prompt, temperature=self.cfg.temperature, max_tokens=self.cfg.max_tokens, response_format={"type": "json_object"} ).strip() if content.startswith("```"): content = content.strip("`") if "\n" in content: content = "\n".join(content.split("\n")[1:]) m = re.search(r"\{[\s\S]*\}", content) if m: content = m.group(0) data = json.loads(content) except Exception as e: # Robust fallback instead of returning ERROR: use heuristic pathway on failure print(f"⚠️ JSON parse failed, falling back to heuristic: {e}") claim_lc = claim.lower() keywords = {w for w in claim_lc.split() if len(w) > 4} supporting: List[str] = [] score = 0 for a in deduped: text = (a.get('content','') or '').lower() overlap = sum(1 for k in keywords if k in text) if overlap: supporting.append(f"Match ({overlap}) in {a.get('url','')}") score += overlap confidence = min(0.6, 0.1 * score) if supporting else 0.05 verdict = "POSSIBLY TRUE" if confidence > 0.3 else "UNVERIFIED" data = { "verdict": verdict, "confidence": confidence, "reasoning": "LLM output invalid; heuristic fallback used.", "supporting_evidence": supporting[:5], "contradicting_evidence": [], "context_quality": "medium" if supporting else "low", } return normalize_result(data)