from __future__ import annotations
import json, re
from typing import List, Dict, Any
from .config import OpenRouterConfig
from .llm import build_openrouter_client, chat_complete
from .models import normalize_result

class QueryRewriter:
    def __init__(self, cfg: OpenRouterConfig):
        self.cfg = cfg
        self.client = build_openrouter_client(cfg)
    def rewrite(self, query: str) -> List[str]:
        if not self.client:
            return list({query, f"{query} Pakistan", f"{query} Urdu"})
        prompt = ("Generate 3 diverse search queries for Pakistani news related to the user's query. "
                  "Consider Urdu-English variations and synonyms. Return only the queries, one per line without numbering.\n\n"
                  f"User query: {query}")
        try:
            out = chat_complete(self.client, self.cfg.model, prompt, temperature=self.cfg.temperature, max_tokens=min(400, self.cfg.max_tokens))
            lines = [ln.strip(" -•\t").strip() for ln in out.splitlines()]
            queries = [ln for ln in lines if ln]
            return queries[:3] if queries else [query]
        except Exception as e:
            print(f"❌ Query rewriting error: {e}")
            return [query]

class ClaimAnalyzer:
    def __init__(self, cfg: OpenRouterConfig):
        self.cfg = cfg
        self.client = build_openrouter_client(cfg)
    def analyze(self, claim: str, articles: List[Dict[str, Any]]) -> Dict[str, Any]:
        # Deduplicate articles by URL to reduce duplicates and noise
        deduped: List[Dict[str, Any]] = []
        seen = set()
        for a in articles:
            u = (a.get('url') or '').strip()
            if u and u in seen:
                continue
            seen.add(u)
            deduped.append(a)

        if not self.client:
            # Heuristic fallback: simple keyword overlap scoring.
            claim_lc = claim.lower()
            keywords = {w for w in claim_lc.split() if len(w) > 4}
            supporting: List[str] = []
            score = 0
            for a in deduped:
                text = (a.get('content','') or '').lower()
                overlap = sum(1 for k in keywords if k in text)
                if overlap:
                    supporting.append(f"Match ({overlap}) in {a.get('url','')}")
                    score += overlap
            confidence = min(0.6, 0.1 * score) if supporting else 0.05
            verdict = "POSSIBLY TRUE" if confidence > 0.3 else "UNVERIFIED"
            return {
                "verdict": verdict,
                "confidence": confidence,
                "reasoning": "Heuristic fallback (no LLM). Confidence based on keyword overlap in retrieved articles.",
                "supporting_evidence": supporting[:5],
                "contradicting_evidence": [],
                "context_quality": "medium" if supporting else "low",
            }
        context = "\n\n".join([f"Article {i+1}:\nTitle: {a.get('title','Unknown')}\nSource: {a.get('source','Unknown')}\nURL: {a.get('url','')}\nContent: {a.get('content','')[:500]}..." for i,a in enumerate(deduped)])
        prompt = (
            "You are an expert Pakistani fact-checker. Analyze the claim against the retrieved context.\n"
            "Return JSON ONLY. No prose. Use this exact schema keys: \n"
            "{verdict: string, confidence: number between 0 and 1, reasoning: string, supporting_evidence: string[], contradicting_evidence: string[], context_quality: string}.\n"
            "Do not include code fences. Do not include comments."
            f"\n\nNEWS CLAIM: {claim}\n\nRETRIEVED CONTEXT:\n{context}\n"
        )
        try:
            # Request structured JSON when supported
            content = chat_complete(
                self.client,
                self.cfg.model,
                prompt,
                temperature=self.cfg.temperature,
                max_tokens=self.cfg.max_tokens,
                response_format={"type": "json_object"}
            ).strip()
            if content.startswith("```"):
                content = content.strip("`")
                if "\n" in content:
                    content = "\n".join(content.split("\n")[1:])
            m = re.search(r"\{[\s\S]*\}", content)
            if m:
                content = m.group(0)
            data = json.loads(content)
        except Exception as e:
            # Robust fallback instead of returning ERROR: use heuristic pathway on failure
            print(f"⚠️ JSON parse failed, falling back to heuristic: {e}")
            claim_lc = claim.lower()
            keywords = {w for w in claim_lc.split() if len(w) > 4}
            supporting: List[str] = []
            score = 0
            for a in deduped:
                text = (a.get('content','') or '').lower()
                overlap = sum(1 for k in keywords if k in text)
                if overlap:
                    supporting.append(f"Match ({overlap}) in {a.get('url','')}")
                    score += overlap
            confidence = min(0.6, 0.1 * score) if supporting else 0.05
            verdict = "POSSIBLY TRUE" if confidence > 0.3 else "UNVERIFIED"
            data = {
                "verdict": verdict,
                "confidence": confidence,
                "reasoning": "LLM output invalid; heuristic fallback used.",
                "supporting_evidence": supporting[:5],
                "contradicting_evidence": [],
                "context_quality": "medium" if supporting else "low",
            }
        return normalize_result(data)