ZENLLC commited on
Commit
1d80b6a
·
verified ·
1 Parent(s): d7234a0

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +526 -0
app.py ADDED
@@ -0,0 +1,526 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os, io, json, zipfile, hashlib, time
2
+ from typing import List, Dict, Any, Optional, Tuple
3
+ import gradio as gr
4
+ from pydantic import BaseModel
5
+ from tenacity import retry, stop_after_attempt, wait_exponential, RetryError
6
+
7
+ # .env support (optional)
8
+ try:
9
+ from dotenv import load_dotenv
10
+ load_dotenv()
11
+ except Exception:
12
+ pass
13
+
14
+ # SDKs
15
+ try:
16
+ from openai import OpenAI
17
+ except Exception:
18
+ OpenAI = None
19
+
20
+ try:
21
+ import anthropic
22
+ from anthropic import NotFoundError as AnthropicNotFound
23
+ except Exception:
24
+ anthropic = None
25
+ AnthropicNotFound = Exception # fallback type
26
+
27
+ from firecrawl import Firecrawl # v2.x
28
+
29
+ # -------------------- utils --------------------
30
+ def _to_dict(obj: Any) -> Any:
31
+ if isinstance(obj, BaseModel):
32
+ return obj.model_dump()
33
+ if isinstance(obj, dict):
34
+ return {k: _to_dict(v) for k, v in obj.items()}
35
+ if isinstance(obj, (list, tuple)):
36
+ return [_to_dict(v) for v in obj]
37
+ if hasattr(obj, "__dict__") and not isinstance(obj, (str, bytes)):
38
+ try:
39
+ return {k: _to_dict(v) for k, v in vars(obj).items()}
40
+ except Exception:
41
+ pass
42
+ return obj
43
+
44
+ def _pretty_json(data: Any, limit: int = 300_000) -> str:
45
+ try:
46
+ s = json.dumps(_to_dict(data), indent=2)
47
+ return s[:limit]
48
+ except Exception as e:
49
+ return f"<!> Could not serialize to JSON: {e}"
50
+
51
+ def _listify(x) -> List[Any]:
52
+ if x is None:
53
+ return []
54
+ if isinstance(x, list):
55
+ return x
56
+ return [x]
57
+
58
+ def _hash(s: str) -> str:
59
+ return hashlib.sha1(s.encode("utf-8")).hexdigest()[:10]
60
+
61
+ # -------------------- keys --------------------
62
+ class Keys(BaseModel):
63
+ openai: Optional[str] = None
64
+ anthropic: Optional[str] = None
65
+ firecrawl: Optional[str] = None
66
+
67
+ def resolve_keys(s: Keys) -> Keys:
68
+ return Keys(
69
+ openai=s.openai or os.getenv("OPENAI_API_KEY"),
70
+ anthropic=s.anthropic or os.getenv("ANTHROPIC_API_KEY"),
71
+ firecrawl=s.firecrawl or os.getenv("FIRECRAWL_API_KEY"),
72
+ )
73
+
74
+ # -------------------- firecrawl --------------------
75
+ def fc_client(s: Keys) -> Firecrawl:
76
+ k = resolve_keys(s)
77
+ if not k.firecrawl:
78
+ raise gr.Error("Missing FIRECRAWL_API_KEY. Enter it in Keys → Save.")
79
+ return Firecrawl(api_key=k.firecrawl)
80
+
81
+ @retry(stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=1, max=8))
82
+ def fc_search(s: Keys, query: str, limit: int = 5, scrape_formats: Optional[List[str]] = None, location: Optional[str] = None) -> Dict[str, Any]:
83
+ fc = fc_client(s)
84
+ kwargs: Dict[str, Any] = {"query": query, "limit": limit}
85
+ if location: kwargs["location"] = location
86
+ if scrape_formats: kwargs["scrape_options"] = {"formats": scrape_formats}
87
+ res = fc.search(**kwargs)
88
+ return _to_dict(res)
89
+
90
+ @retry(stop=stop_after_attempt(2), wait=wait_exponential(multiplier=1, min=1, max=10))
91
+ def fc_scrape(s: Keys, url: str, formats: Optional[List[str]] = None, timeout_ms: Optional[int] = None, mobile: bool = False) -> Dict[str, Any]:
92
+ fc = fc_client(s)
93
+ kwargs: Dict[str, Any] = {"url": url}
94
+ if formats: kwargs["formats"] = formats
95
+ if timeout_ms: kwargs["timeout"] = min(int(timeout_ms), 40000) # cap 40s
96
+ if mobile: kwargs["mobile"] = True
97
+ res = fc.scrape(**kwargs)
98
+ return _to_dict(res)
99
+
100
+ @retry(stop=stop_after_attempt(2), wait=wait_exponential(multiplier=1, min=1, max=10))
101
+ def fc_crawl(s: Keys, url: str, max_pages: int = 25, formats: Optional[List[str]] = None) -> Dict[str, Any]:
102
+ fc = fc_client(s)
103
+ kwargs: Dict[str, Any] = {"url": url, "limit": max_pages}
104
+ if formats: kwargs["scrape_options"] = {"formats": formats}
105
+ res = fc.crawl(**kwargs)
106
+ return _to_dict(res)
107
+
108
+ # -------------------- LLMs --------------------
109
+ SYSTEM_STEER = (
110
+ "You are ZEN's VibeCoder: extract web insights, generate clean scaffolds, "
111
+ "and produce production-ready artifacts. Prefer structured outlines, code blocks, and checklists. "
112
+ "When asked to clone or refactor, output file trees and exact text."
113
+ )
114
+
115
+ def use_openai(s: Keys):
116
+ k = resolve_keys(s)
117
+ if not k.openai: raise gr.Error("Missing OPENAI_API_KEY.")
118
+ if OpenAI is None: raise gr.Error("OpenAI SDK not installed.")
119
+ return OpenAI(api_key=k.openai)
120
+
121
+ def use_anthropic(s: Keys):
122
+ k = resolve_keys(s)
123
+ if not k.anthropic: raise gr.Error("Missing ANTHROPIC_API_KEY.")
124
+ if anthropic is None: raise gr.Error("Anthropic SDK not installed.")
125
+ return anthropic.Anthropic(api_key=k.anthropic)
126
+
127
+ ANTHROPIC_FALLBACKS = [
128
+ "claude-3-5-sonnet-20240620",
129
+ ]
130
+ OPENAI_FALLBACKS = ["gpt-4o", "gpt-4-turbo"]
131
+
132
+ def llm_once_openai(s: Keys, model: str, prompt: str, ctx: str, temp: float) -> str:
133
+ client = use_openai(s)
134
+ resp = client.chat.completions.create(
135
+ model=model, temperature=temp,
136
+ messages=[{"role":"system","content":SYSTEM_STEER},
137
+ {"role":"user","content":f"{prompt}\n\n=== SOURCE (markdown) ===\n{ctx}"}]
138
+ )
139
+ return (resp.choices[0].message.content or "").strip()
140
+
141
+ def llm_once_anthropic(s: Keys, model: str, prompt: str, ctx: str, temp: float) -> str:
142
+ client = use_anthropic(s)
143
+ resp = client.messages.create(
144
+ model=model, max_tokens=4000, temperature=temp, system=SYSTEM_STEER,
145
+ messages=[{"role":"user","content":f"{prompt}\n\n=== SOURCE (markdown) ===\n{ctx}"}],
146
+ )
147
+ out=[]
148
+ for blk in resp.content:
149
+ t=getattr(blk,"text",None)
150
+ if t: out.append(t)
151
+ return "".join(out).strip()
152
+
153
+ def llm_summarize(s: Keys, provider: str, model_name: str, prompt: str, ctx_md: str, temp: float=0.4) -> str:
154
+ ctx = (ctx_md or "")[:150000]
155
+ if provider == "openai":
156
+ candidates = [model_name] + OPENAI_FALLBACKS if model_name else OPENAI_FALLBACKS
157
+ last=None
158
+ for m in candidates:
159
+ try: return llm_once_openai(s, m, prompt, ctx, temp)
160
+ except Exception as e: last=e; continue
161
+ raise gr.Error(f"OpenAI failed across fallbacks: {last}")
162
+ else:
163
+ candidates = [model_name] + ANTHROPIC_FALLBACKS if model_name else ANTHROPIC_FALLBACKS
164
+ last=None
165
+ for m in candidates:
166
+ try: return llm_once_anthropic(s, m, prompt, ctx, temp)
167
+ except AnthropicNotFound as e: last=e; continue
168
+ except Exception as e: last=e; continue
169
+ raise gr.Error(f"Anthropic failed across fallbacks: {last}")
170
+
171
+ # -------------------- ZIP export helpers --------------------
172
+ def pack_zip_pages(pages: List[Dict[str, Any]]) -> bytes:
173
+ mem = io.BytesIO()
174
+ with zipfile.ZipFile(mem, mode="w", compression=zipfile.ZIP_DEFLATED) as zf:
175
+ manifest = []
176
+ for i, p in enumerate(pages, start=1):
177
+ url = p.get("url") or p.get("metadata", {}).get("sourceURL") or f"page_{i}"
178
+ slug = _hash(str(url))
179
+ md = p.get("markdown") or p.get("data", {}).get("markdown") or p.get("content") or ""
180
+ html = p.get("html") or p.get("data", {}).get("html") or ""
181
+ links = p.get("links") or p.get("data", {}).get("links") or []
182
+ title = p.get("title") or p.get("metadata", {}).get("title")
183
+ if md: zf.writestr(f"{i:03d}_{slug}.md", md)
184
+ if html: zf.writestr(f"{i:03d}_{slug}.html", html)
185
+ manifest.append({"url": url, "title": title, "links": links})
186
+ zf.writestr("manifest.json", json.dumps(manifest, indent=2))
187
+ mem.seek(0); return mem.read()
188
+
189
+ def pack_zip_corpus(corpus: List[Dict[str, Any]], merged_md: str, extras: Dict[str,str]) -> bytes:
190
+ mem = io.BytesIO()
191
+ with zipfile.ZipFile(mem, mode="w", compression=zipfile.ZIP_DEFLATED) as zf:
192
+ zf.writestr("corpus_merged.md", merged_md or "")
193
+ zf.writestr("corpus_manifest.json", json.dumps(corpus, indent=2))
194
+ for name,content in extras.items():
195
+ zf.writestr(name, content)
196
+ mem.seek(0); return mem.read()
197
+
198
+ # -------------------- actions: keys/search/scrape/crawl/generate --------------------
199
+ def save_keys(openai_key, anthropic_key, firecrawl_key):
200
+ return Keys(
201
+ openai=(openai_key or "").strip() or None,
202
+ anthropic=(anthropic_key or "").strip() or None,
203
+ firecrawl=(firecrawl_key or "").strip() or None,
204
+ ), gr.Info("Keys saved to this session. (Env vars still apply if set.)")
205
+
206
+ def action_search(sess: Keys, query: str, limit: int, scrape_content: bool, location: str):
207
+ if not query.strip(): raise gr.Error("Enter a search query.")
208
+ formats = ["markdown", "links"] if scrape_content else None
209
+ res = fc_search(sess, query=query.strip(), limit=limit, scrape_formats=formats, location=(location or None))
210
+ data = res.get("data", res)
211
+ items: List[Any] = []
212
+ if isinstance(data, dict):
213
+ for bucket in ("web", "news", "images", "videos", "discussion"):
214
+ b = data.get(bucket)
215
+ if b:
216
+ items.extend(_listify(_to_dict(b)))
217
+ elif isinstance(data, list):
218
+ items = _to_dict(data)
219
+ else:
220
+ items = _listify(_to_dict(data))
221
+ if not items:
222
+ return _pretty_json(res), res # return raw and obj (store for later)
223
+ return json.dumps(items, indent=2), items
224
+
225
+ def action_scrape(sess: Keys, url: str, mobile: bool, formats_sel: List[str], timeout_ms: int):
226
+ if not url.strip(): raise gr.Error("Enter a URL.")
227
+ formats = formats_sel or ["markdown", "links"]
228
+ try:
229
+ out = fc_scrape(sess, url.strip(), formats=formats, timeout_ms=(timeout_ms or 15000), mobile=mobile)
230
+ pretty = _pretty_json(out)
231
+ md = out.get("markdown") or out.get("data", {}).get("markdown") or out.get("content") or ""
232
+ return pretty, md, out
233
+ except RetryError as e:
234
+ return f"<!> Scrape timed out after retries. Try increasing timeout, unchecking 'mobile', or limiting formats.\n\n{e}", "", {}
235
+ except Exception as e:
236
+ return f"<!> Scrape error: {e}", "", {}
237
+
238
+ def action_crawl(sess: Keys, base_url: str, max_pages: int, formats_sel: List[str]):
239
+ if not base_url.strip(): raise gr.Error("Enter a base URL to crawl.")
240
+ formats = formats_sel or ["markdown", "links"]
241
+ try:
242
+ out = fc_crawl(sess, base_url.strip(), max_pages=max_pages, formats=formats)
243
+ pages = out.get("data")
244
+ if not isinstance(pages, list) or not pages: raise gr.Error("Crawl returned no pages.")
245
+ zip_bytes = pack_zip_pages(pages)
246
+ return gr.File.update(value=io.BytesIO(zip_bytes), visible=True, filename="site_clone.zip"), f"Crawled {len(pages)} pages. ZIP is ready.", pages
247
+ except RetryError as e:
248
+ return gr.File.update(visible=False), f"<!> Crawl timed out after retries. Reduce Max Pages or try again.\n\n{e}", []
249
+ except Exception as e:
250
+ return gr.File.update(visible=False), f"<!> Crawl error: {e}", []
251
+
252
+ def action_generate(sess: Keys, provider: str, model_name: str, sys_prompt: str, user_prompt: str, context_md: str, temp: float):
253
+ if not user_prompt.strip(): raise gr.Error("Enter a prompt or click a starter tile.")
254
+ model = (model_name or "").strip()
255
+ steer = (sys_prompt or "").strip()
256
+ prompt = (("SYSTEM:\n" + steer + "\n\n") if steer else "") + user_prompt.strip()
257
+ out = llm_summarize(sess, provider, model, prompt, context_md or "", temp=temp)
258
+ return out
259
+
260
+ # -------------------- Corpus features --------------------
261
+ def corpus_normalize_items(items: Any) -> List[Dict[str, Any]]:
262
+ """Accepts list/dict/raw and returns a list of page-like dicts with url/title/markdown/html/links."""
263
+ out=[]
264
+ if isinstance(items, dict): items=[items]
265
+ for it in _listify(items):
266
+ d=_to_dict(it)
267
+ if not isinstance(d, dict): continue
268
+ url = d.get("url") or d.get("metadata",{}).get("sourceURL") or d.get("link") or ""
269
+ title = d.get("title") or d.get("metadata",{}).get("title") or d.get("name") or ""
270
+ md = d.get("markdown") or d.get("data",{}).get("markdown") or d.get("content") or ""
271
+ html = d.get("html") or d.get("data",{}).get("html") or ""
272
+ links = d.get("links") or d.get("data",{}).get("links") or []
273
+ out.append({"url":url,"title":title,"markdown":md,"html":html,"links":links})
274
+ return out
275
+
276
+ def corpus_add(corpus: List[Dict[str,Any]], items: Any, include_filter: str, exclude_filter: str, dedupe: bool) -> Tuple[List[Dict[str,Any]], str]:
277
+ added=0
278
+ existing = set(_hash(x.get("url","")) for x in corpus if x.get("url"))
279
+ inc = (include_filter or "").strip().lower()
280
+ exc = (exclude_filter or "").strip().lower()
281
+ for rec in corpus_normalize_items(items):
282
+ url = (rec.get("url") or "").lower()
283
+ title = (rec.get("title") or "").lower()
284
+ if inc and (inc not in url and inc not in title): continue
285
+ if exc and (exc in url or exc in title): continue
286
+ if dedupe and rec.get("url") and _hash(rec["url"]) in existing: continue
287
+ corpus.append(rec); added+=1
288
+ if rec.get("url"): existing.add(_hash(rec["url"]))
289
+ return corpus, f"Added {added} item(s). Corpus size: {len(corpus)}."
290
+
291
+ def corpus_list(corpus: List[Dict[str,Any]]) -> str:
292
+ lines=[]
293
+ for i,rec in enumerate(corpus,1):
294
+ url = rec.get("url") or "(no url)"
295
+ title = rec.get("title") or "(no title)"
296
+ mlen = len(rec.get("markdown") or "")
297
+ lines.append(f"{i:03d}. {title} — {url} [md:{mlen} chars]")
298
+ if not lines: return "_(empty)_"
299
+ return "\n".join(lines)
300
+
301
+ def corpus_clear() -> Tuple[List[Dict[str,Any]], str]:
302
+ return [], "Corpus cleared."
303
+
304
+ def corpus_merge_md(corpus: List[Dict[str,Any]]) -> str:
305
+ parts=[]
306
+ for rec in corpus:
307
+ hdr = f"### {rec.get('title') or rec.get('url') or 'Untitled'}"
308
+ md = rec.get("markdown") or ""
309
+ if md: parts.append(hdr+"\n\n"+md.strip())
310
+ return "\n\n---\n\n".join(parts)
311
+
312
+ def corpus_export(corpus: List[Dict[str,Any]], merged: str, extras: Dict[str,str]):
313
+ data = pack_zip_corpus(corpus, merged, extras)
314
+ return gr.File.update(value=io.BytesIO(data), visible=True, filename=f"corpus_{int(time.time())}.zip")
315
+
316
+ def dual_generate(sess: Keys, model_openai: str, model_anthropic: str, sys_prompt: str, user_prompt: str, ctx_md: str, temp: float):
317
+ if not user_prompt.strip(): raise gr.Error("Enter a prompt or use a tile.")
318
+ steer = (sys_prompt or "").strip()
319
+ prompt = (("SYSTEM:\n" + steer + "\n\n") if steer else "") + user_prompt.strip()
320
+ ctx = ctx_md or ""
321
+ # OpenAI
322
+ oa_txt, an_txt = "", ""
323
+ try:
324
+ oa_txt = llm_summarize(sess, "openai", model_openai or "", prompt, ctx, temp)
325
+ except Exception as e:
326
+ oa_txt = f"<!> OpenAI error: {e}"
327
+ try:
328
+ an_txt = llm_summarize(sess, "anthropic", model_anthropic or "", prompt, ctx, temp)
329
+ except Exception as e:
330
+ an_txt = f"<!> Anthropic error: {e}"
331
+ # render side-by-side
332
+ md = (
333
+ "### OpenAI\n\n" + (oa_txt or "_(empty)_") +
334
+ "\n\n---\n\n" +
335
+ "### Anthropic\n\n" + (an_txt or "_(empty)_")
336
+ )
337
+ return md
338
+
339
+ def scaffold_from_corpus(corpus_md: str, site_name: str = "zen-scan"):
340
+ """
341
+ Produce a tiny site/docs scaffold as a ZIP:
342
+ /README.md
343
+ /docs/index.md (from corpus)
344
+ /docs/summary.md (brief)
345
+ """
346
+ summary = (corpus_md[:1800] + ("..." if len(corpus_md) > 1800 else "")) if corpus_md else "No content."
347
+ mem = io.BytesIO()
348
+ with zipfile.ZipFile(mem, "w", zipfile.ZIP_DEFLATED) as zf:
349
+ zf.writestr("README.md", f"# {site_name}\n\nAuto-generated scaffold from ZEN VibeCoder corpus.\n")
350
+ zf.writestr("docs/index.md", corpus_md or "# Empty\n")
351
+ zf.writestr("docs/summary.md", f"# Summary\n\n{summary}\n")
352
+ mem.seek(0)
353
+ return gr.File.update(value=mem, visible=True, filename=f"{site_name}_scaffold.zip")
354
+
355
+ # -------------------- UI --------------------
356
+ with gr.Blocks(css="#keys .wrap.svelte-1ipelgc { filter: none !important; }") as demo:
357
+ gr.Markdown("## ZEN VibeCoder — Web Clone & Research Foundry")
358
+ session_state = gr.State(Keys())
359
+
360
+ # keep stateful objects
361
+ last_search_obj = gr.State({})
362
+ last_scrape_obj = gr.State({})
363
+ last_crawl_pages = gr.State([])
364
+ corpus_state = gr.State([]) # list of dicts
365
+ merged_md_state = gr.State("") # merged markdown cache
366
+
367
+ with gr.Accordion("🔐 Keys (session)", open=True):
368
+ with gr.Row():
369
+ openai_key = gr.Textbox(label="OPENAI_API_KEY (GPT-4o / fallbacks)", type="password", placeholder="sk-...", value=os.getenv("OPENAI_API_KEY") or "")
370
+ anthropic_key = gr.Textbox(label="ANTHROPIC_API_KEY (Claude Sonnet)", type="password", placeholder="anthropic-key...", value=os.getenv("ANTHROPIC_API_KEY") or "")
371
+ firecrawl_key = gr.Textbox(label="FIRECRAWL_API_KEY", type="password", placeholder="fc-...", value=os.getenv("FIRECRAWL_API_KEY") or "")
372
+ save_btn = gr.Button("Save keys", variant="primary")
373
+ save_msg = gr.Markdown()
374
+ save_btn.click(save_keys, [openai_key, anthropic_key, firecrawl_key], [session_state, save_msg])
375
+
376
+ with gr.Tabs():
377
+ # --- SEARCH ---
378
+ with gr.Tab("🔎 Search"):
379
+ query = gr.Textbox(label="Query", placeholder='ex: site:docs "vector database" 2025')
380
+ with gr.Row():
381
+ limit = gr.Slider(1, 20, value=6, step=1, label="Limit")
382
+ scrape_content = gr.Checkbox(label="Also scrape results (markdown + links)", value=True)
383
+ location = gr.Textbox(label="Location (optional)", placeholder="ex: Germany")
384
+ go_search = gr.Button("Run Search", variant="primary")
385
+ search_json = gr.Code(label="Results JSON", language="json")
386
+
387
+ def _search(sess, q, lmt, scp, loc):
388
+ txt, obj = action_search(sess, q, lmt, scp, loc)
389
+ return txt, obj
390
+ go_search.click(_search, [session_state, query, limit, scrape_content, location], [search_json, last_search_obj])
391
+
392
+ # --- SCRAPE / CRAWL ---
393
+ with gr.Tab("🕸️ Scrape • Crawl • Clone"):
394
+ with gr.Row():
395
+ target_url = gr.Textbox(label="URL to Scrape", placeholder="https://example.com")
396
+ timeout_ms = gr.Number(label="Timeout (ms, max 40000)", value=15000)
397
+ with gr.Row():
398
+ formats_sel = gr.CheckboxGroup(choices=["markdown","html","links","screenshot"], value=["markdown","links"], label="Formats")
399
+ mobile = gr.Checkbox(label="Emulate mobile", value=False)
400
+ run_scrape = gr.Button("Scrape URL", variant="primary")
401
+ scrape_json = gr.Code(label="Raw Response (JSON)", language="json")
402
+ scrape_md = gr.Markdown(label="Markdown Preview")
403
+ run_scrape.click(action_scrape, [session_state, target_url, mobile, formats_sel, timeout_ms], [scrape_json, scrape_md, last_scrape_obj])
404
+
405
+ gr.Markdown("---")
406
+
407
+ with gr.Row():
408
+ base_url = gr.Textbox(label="Base URL to Crawl", placeholder="https://docs.firecrawl.dev")
409
+ max_pages = gr.Slider(1, 200, value=25, step=1, label="Max Pages")
410
+ formats_crawl = gr.CheckboxGroup(choices=["markdown","html","links"], value=["markdown","links"], label="Crawl Formats")
411
+ run_crawl = gr.Button("Crawl & Build ZIP", variant="primary")
412
+ zip_file = gr.File(label="Clone ZIP", visible=False)
413
+ crawl_status = gr.Markdown()
414
+ run_crawl.click(action_crawl, [session_state, base_url, max_pages, formats_crawl], [zip_file, crawl_status, last_crawl_pages])
415
+
416
+ # --- CORPUS & BUILD ---
417
+ with gr.Tab("📦 Corpus & Build"):
418
+ with gr.Row():
419
+ include_filter = gr.Textbox(label="Include filter (substring)", placeholder="docs, api, blog...")
420
+ exclude_filter = gr.Textbox(label="Exclude filter (substring)", placeholder="cdn, tracking, terms...")
421
+ dedupe = gr.Checkbox(label="Dedupe by URL", value=True)
422
+ with gr.Row():
423
+ add_from_search = gr.Button("Add from Last Search")
424
+ add_from_scrape = gr.Button("Add from Last Scrape")
425
+ add_from_crawl = gr.Button("Add from Last Crawl")
426
+ status_corpus = gr.Markdown()
427
+ corpus_list_md = gr.Markdown(label="Corpus Items")
428
+
429
+ def do_add_from_search(corpus, items, inc, exc, dd):
430
+ corpus, msg = corpus_add(corpus or [], items, inc, exc, dd)
431
+ return corpus, msg, corpus_list(corpus)
432
+ def do_add_from_scrape(corpus, obj, inc, exc, dd):
433
+ corpus, msg = corpus_add(corpus or [], obj, inc, exc, dd)
434
+ return corpus, msg, corpus_list(corpus)
435
+ def do_add_from_crawl(corpus, pages, inc, exc, dd):
436
+ corpus, msg = corpus_add(corpus or [], pages, inc, exc, dd)
437
+ return corpus, msg, corpus_list(corpus)
438
+
439
+ add_from_search.click(do_add_from_search, [corpus_state, last_search_obj, include_filter, exclude_filter, dedupe], [corpus_state, status_corpus, corpus_list_md])
440
+ add_from_scrape.click(do_add_from_scrape, [corpus_state, last_scrape_obj, include_filter, exclude_filter, dedupe], [corpus_state, status_corpus, corpus_list_md])
441
+ add_from_crawl.click(do_add_from_crawl, [corpus_state, last_crawl_pages, include_filter, exclude_filter, dedupe], [corpus_state, status_corpus, corpus_list_md])
442
+
443
+ with gr.Row():
444
+ merge_btn = gr.Button("Merge ➜ Markdown", variant="primary")
445
+ clear_btn = gr.Button("Clear Corpus", variant="secondary")
446
+ merged_md = gr.Textbox(label="Merged Markdown (editable)", lines=12)
447
+
448
+ def do_merge(corpus):
449
+ md = corpus_merge_md(corpus or [])
450
+ return md, md
451
+ def do_clear():
452
+ c,msg = corpus_clear()
453
+ return c, msg, corpus_list(c), ""
454
+ merge_btn.click(do_merge, [corpus_state], [merged_md, merged_md_state])
455
+ clear_btn.click(do_clear, [], [corpus_state, status_corpus, corpus_list_md, merged_md])
456
+
457
+ gr.Markdown("---")
458
+ with gr.Row():
459
+ site_name = gr.Textbox(label="Scaffold Name", value="zen-scan")
460
+ scaffold_btn = gr.Button("Generate Minimal Site Scaffold (ZIP)")
461
+ scaffold_zip = gr.File(visible=False)
462
+ scaffold_btn.click(lambda md, name: scaffold_from_corpus(md, name or "zen-scan"),
463
+ [merged_md], [scaffold_zip])
464
+
465
+ gr.Markdown("---")
466
+ with gr.Row():
467
+ export_zip_btn = gr.Button("Export Corpus (ZIP)")
468
+ export_zip_file = gr.File(visible=False)
469
+
470
+ def do_export(corpus, merged):
471
+ extras = {"README.txt": "Exported by ZEN VibeCoder"}
472
+ return corpus_export(corpus or [], merged or "", extras)
473
+ export_zip_btn.click(do_export, [corpus_state, merged_md], [export_zip_file])
474
+
475
+ # --- VIBE CODE (single provider) ---
476
+ with gr.Tab("✨ Vibe Code (Synthesis)"):
477
+ with gr.Row():
478
+ provider = gr.Radio(choices=["openai","anthropic"], value="openai", label="Provider")
479
+ model_name = gr.Textbox(label="Model (override)", placeholder="(blank = auto fallback)")
480
+ temp = gr.Slider(0.0, 1.2, value=0.4, step=0.05, label="Temperature")
481
+ sys_prompt = gr.Textbox(label="System Style (optional)",
482
+ value="Return structured outputs with file trees, code blocks and ordered steps. Be concise and concrete.")
483
+ user_prompt = gr.Textbox(label="User Prompt", lines=6)
484
+ ctx_md = gr.Textbox(label="Context (paste markdown or click Merge first)", lines=10)
485
+ gen_btn = gr.Button("Generate", variant="primary")
486
+ out_md = gr.Markdown()
487
+ gr.Markdown("**Starter Tiles**")
488
+ with gr.Row():
489
+ t1 = gr.Button("🔧 Clone Docs ➜ Clean README")
490
+ t2 = gr.Button("🧭 Competitor Matrix")
491
+ t3 = gr.Button("🧪 Python API Client")
492
+ t4 = gr.Button("📐 ZEN Landing Rewrite")
493
+ t5 = gr.Button("📊 Dataset & ETL Plan")
494
+ def fill_tile(tile: str):
495
+ tiles = {
496
+ "t1": "Create a clean knowledge pack from the context, then output a README.md with: Overview, Key features, Quickstart, API endpoints, Notes & gotchas, License. Include a /docs/ outline.",
497
+ "t2": "Produce a feature matrix, pricing table, ICP notes, moats/risks, and a market POV. End with a ZEN playbook: 5 lever moves.",
498
+ "t3": "Design a Python client that wraps the target API with retry/backoff and typed responses. Provide package layout, requirements, client.py, examples/, and README.",
499
+ "t4": "Rewrite the landing content in ZEN brand voice: headline, 3 value props, social proof, CTA, concise FAQ. Provide HTML sections and copy.",
500
+ "t5": "Propose a dataset schema. Output a table of fields, types, constraints, plus an ETL plan (sources, transforms, validation, freshness, monitoring).",
501
+ }
502
+ return tiles[tile]
503
+ t1.click(lambda: fill_tile("t1"), outputs=[user_prompt])
504
+ t2.click(lambda: fill_tile("t2"), outputs=[user_prompt])
505
+ t3.click(lambda: fill_tile("t3"), outputs=[user_prompt])
506
+ t4.click(lambda: fill_tile("t4"), outputs=[user_prompt])
507
+ t5.click(lambda: fill_tile("t5"), outputs=[user_prompt])
508
+ gen_btn.click(action_generate, [session_state, provider, model_name, sys_prompt, user_prompt, ctx_md, temp], [out_md])
509
+
510
+ # --- DUAL (side-by-side router) ---
511
+ with gr.Tab("🧪 Dual Synth (OpenAI vs Anthropic)"):
512
+ with gr.Row():
513
+ model_openai = gr.Textbox(label="OpenAI Model", placeholder="(blank = auto fallback)")
514
+ model_anthropic = gr.Textbox(label="Anthropic Model", placeholder="(blank = auto fallback)")
515
+ temp2 = gr.Slider(0.0, 1.2, value=0.4, step=0.05, label="Temperature")
516
+ sys2 = gr.Textbox(label="System Style (optional)", value="Return structured outputs with file trees and clear steps.")
517
+ user2 = gr.Textbox(label="User Prompt", lines=6, value="Summarize the corpus and propose a 5-step execution plan.")
518
+ ctx2 = gr.Textbox(label="Context (tip: click Merge in Corpus tab)", lines=10)
519
+ dual_btn = gr.Button("Run Dual Synthesis", variant="primary")
520
+ dual_md = gr.Markdown()
521
+ dual_btn.click(dual_generate, [session_state, model_openai, model_anthropic, sys2, user2, ctx2, temp2], [dual_md])
522
+
523
+ gr.Markdown("Built for **ZEN Arena** pipelines. Export ZIPs → ingest → credentialize via ZEN Cards.")
524
+
525
+ if __name__ == "__main__":
526
+ demo.launch(ssr_mode=False)