import os from typing import List, Dict, Tuple, Union import gradio as gr from openai import OpenAI # Optional parsers import pandas as pd from pypdf import PdfReader from docx import Document as DocxDocument # =============================== # Client # =============================== def get_client(key: str) -> OpenAI: key = (key or "").strip() if not key: raise gr.Error("Please enter your OpenAI API key.") return OpenAI(api_key=key) # =============================== # Chat (messages format + streaming) # =============================== def stream_chat(api_key: str, user_input: str, history: List[Dict]): client = get_client(api_key) history = history or [] user_input = (user_input or "").strip() if not user_input: return history, history, gr.update(value="") msgs = history + [{"role": "user", "content": user_input}] try: stream = client.chat.completions.create( model="gpt-5", messages=msgs, stream=True, ) acc = "" for chunk in stream: delta = chunk.choices[0].delta.content or "" acc += delta yield msgs + [{"role": "assistant", "content": acc}], msgs, gr.update(value="") final_hist = msgs + [{"role": "assistant", "content": acc}] yield final_hist, final_hist, gr.update(value="") except Exception as e: err = f"[Error] {e}" final_hist = msgs + [{"role": "assistant", "content": err}] yield final_hist, final_hist, gr.update(value="") # =============================== # Pro Brief – File ingestion # =============================== TEXT_EXTS = {".txt", ".md", ".markdown"} DOCX_EXTS = {".docx"} PDF_EXTS = {".pdf"} CSV_EXTS = {".csv"} def _ext(path: str) -> str: return os.path.splitext(path.lower())[1] def _coerce_paths(files: List[Union[str, dict, gr.File]]) -> List[str]: """ Gradio may send: - list[str] of absolute filepaths (when type='filepath') - list[dict] with {'name': '/tmp/..'} in some versions - list[gr.File] objects with .name Normalize to list[str] filepaths. """ paths = [] for f in files or []: if isinstance(f, str): paths.append(f) elif isinstance(f, dict) and "name" in f: paths.append(f["name"]) else: # gr.File or other object with .name name = getattr(f, "name", None) if name: paths.append(name) return paths def read_text_file(fp: str) -> str: try: with open(fp, "r", encoding="utf-8") as f: return f.read() except UnicodeDecodeError: with open(fp, "r", encoding="latin-1") as f: return f.read() def read_pdf(fp: str) -> str: text = [] with open(fp, "rb") as f: reader = PdfReader(f) for page in reader.pages: txt = page.extract_text() or "" text.append(txt) return "\n".join(text).strip() def read_docx(fp: str) -> str: doc = DocxDocument(fp) return "\n".join([p.text for p in doc.paragraphs]).strip() def summarize_csv(fp: str) -> str: # Robust CSV read with separator fallbacks read_attempts = [ dict(), dict(sep=";"), dict(sep="\t"), ] last_err = None df = None for kwargs in read_attempts: try: df = pd.read_csv(fp, **kwargs) break except Exception as e: last_err = e if df is None: raise gr.Error(f"Could not read CSV: {last_err}") shape_info = f"Rows: {df.shape[0]}, Columns: {df.shape[1]}" cols = ", ".join([f"{c} ({str(df[c].dtype)})" for c in df.columns]) try: desc = df.describe(include="all").transpose().fillna("").to_string() except Exception: desc = "(describe() failed for this CSV)" try: head = df.head(10).to_string(index=False) except Exception: head = "(preview failed)" return ( "CSV SUMMARY\n" f"{shape_info}\n\n" f"COLUMNS & TYPES:\n{cols}\n\n" f"DESCRIBE():\n{desc}\n\n" f"FIRST 10 ROWS:\n{head}\n" ) def load_files(files: List[Union[str, dict, gr.File]], progress: gr.Progress) -> Tuple[str, List[str]]: paths = _coerce_paths(files) if not paths: raise gr.Error("Please upload at least one file (PDF, DOCX, TXT, MD, or CSV).") texts = [] names = [] for i, path in enumerate(paths, start=1): names.append(os.path.basename(path)) ext = _ext(path) progress((i-0.5)/max(len(paths), 1), desc=f"Parsing {os.path.basename(path)}") if ext in TEXT_EXTS: texts.append(read_text_file(path)) elif ext in PDF_EXTS: texts.append(read_pdf(path)) elif ext in DOCX_EXTS: texts.append(read_docx(path)) elif ext in CSV_EXTS: texts.append(summarize_csv(path)) else: raise gr.Error(f"Unsupported file type: {ext}") progress(i/max(len(paths), 1), desc=f"Parsed {os.path.basename(path)}") return "\n\n-----\n\n".join(texts), names # =============================== # Pro Brief – Chunking & synthesis # =============================== def chunk_text(s: str, max_chars: int = 12000) -> List[str]: s = (s or "").strip() if not s: return [] if len(s) <= max_chars: return [s] chunks = [] start = 0 while start < len(s): end = min(start + max_chars, len(s)) cut = s.rfind("\n\n", start, end) if cut == -1 or cut <= start + 2000: cut = end chunks.append(s[start:cut]) start = cut return chunks def llm_summarize_chunks(client: OpenAI, chunks: List[str], mode: str, custom_note: str, progress: gr.Progress) -> List[str]: summaries = [] total = len(chunks) if total == 0: return summaries mode_prompt = { "Executive Brief": ( "Create a crisp executive brief with sections: Context, Key Findings, Metrics, Implications, Decisions Needed." ), "Action Items": ( "Extract actionable tasks with owners (if available), deadlines (if implied), dependencies, and priority." ), "Risks & Mitigations": ( "Identify key risks, likelihood, impact, and concrete mitigations. Include watchpoints and triggers." ), "Meeting Minutes": ( "Produce clean, structured minutes: Attendees (if inferable), Agenda, Discussion, Decisions, Action Items." ), "JSON Summary": ( "Return a compact JSON with keys: context, findings[], metrics{}, actions[], risks[], decisions[]." ), }[mode] for i, ch in enumerate(chunks, start=1): progress(0.2 + 0.6*(i-1)/max(total,1), desc=f"Summarizing chunk {i}/{total}") sys = "You are a senior analyst. Write succinctly; use bullet points where appropriate." usr = f"{mode_prompt}\n\n{('Additional guidance: ' + custom_note) if custom_note else ''}\n\n---\nSOURCE CHUNK {i}/{total}:\n{ch}\n" resp = client.chat.completions.create( model="gpt-5", messages=[{"role": "system", "content": sys}, {"role": "user", "content": usr}], ) summaries.append(resp.choices[0].message.content.strip()) progress(0.2 + 0.6*(i)/max(total,1), desc=f"Summarized chunk {i}/{total}") return summaries def llm_synthesize_final(client: OpenAI, mode: str, names: List[str], partials: List[str], custom_note: str, progress: gr.Progress) -> str: progress(0.85, desc="Synthesizing final deliverable") sys = "You are a chief of staff producing board-ready output. Tight, accurate, and well-structured." corpus = "\n\n---\n\n".join([f"[PART {i+1}]\n{p}" for i, p in enumerate(partials)]) usr = ( f"Files analyzed: {', '.join(names)}\n\n" f"Mode: {mode}\n" f"{('Additional guidance: ' + custom_note) if custom_note else ''}\n\n" "Synthesize the PARTS into a single cohesive deliverable. If JSON mode, return only JSON." "\n\n---\nCORPUS (SUMMARIES):\n" + corpus ) resp = client.chat.completions.create( model="gpt-5", messages=[{"role": "system", "content": sys}, {"role": "user", "content": usr}], ) progress(0.98, desc="Finalizing") return resp.choices[0].message.content.strip() def pro_brief(api_key: str, files: List[Union[str, dict, gr.File]], mode: str, custom_note: str): progress = gr.Progress(track_tqdm=False) client = get_client(api_key) # Stage 1: Load files progress(0.02, desc="Loading files") out = "🔎 **Loading files...**\n" yield out raw_text, names = load_files(files, progress) out += f"✅ Parsed {len(names)} file(s): {', '.join(names)}\n" yield out # Stage 2: Chunk progress(0.18, desc="Chunking text") chunks = chunk_text(raw_text, max_chars=12000) out += f"🧱 Created {len(chunks)} chunk(s) for analysis\n" yield out # Stage 3: Summarize chunks partials = llm_summarize_chunks(client, chunks, mode, custom_note, progress) out += f"🧠 Summarized {len(partials)} chunk(s)\n" yield out # Stage 4: Synthesize final final = llm_synthesize_final(client, mode, names, partials, custom_note, progress) # Done progress(1.0, desc="Done") if mode == "JSON Summary": yield " ```json\n" + final + "\n```" else: yield final # =============================== # UI # =============================== with gr.Blocks(title="ZEN GPT-5 • Production Tools") as demo: gr.Markdown("### 🔐 Enter your OpenAI API key (not stored)") api_key = gr.Textbox(placeholder="sk-...", type="password", label="OpenAI API Key") with gr.Tab("💬 Chat"): chatbox = gr.Chatbot(label="GPT-5 Chat", height=420, type="messages") history_state = gr.State([]) user_in = gr.Textbox(placeholder="Say hi…", label="Message") send_btn = gr.Button("Send", variant="primary") clear_btn = gr.Button("Clear Chat") send_btn.click(stream_chat, [api_key, user_in, history_state], [chatbox, history_state, user_in], queue=True) user_in.submit(stream_chat, [api_key, user_in, history_state], [chatbox, history_state, user_in], queue=True) clear_btn.click(lambda: ([], []), None, [chatbox, history_state]) with gr.Tab("📄 Pro Brief (Docs → Executive Output)"): gr.Markdown( "Upload PDFs, DOCX, TXT, MD, or CSV. Get an **Executive Brief**, **Action Items**, " "**Risks & Mitigations**, **Meeting Minutes**, or a **JSON Summary**." ) files = gr.File(label="Upload files", file_count="multiple", type="filepath") mode = gr.Radio( ["Executive Brief", "Action Items", "Risks & Mitigations", "Meeting Minutes", "JSON Summary"], value="Executive Brief", label="Output Mode", ) custom = gr.Textbox(label="Optional guidance (tone, audience, focus areas)", lines=3, placeholder="e.g., Board-ready; focus on budget impact and timeline risk.") run = gr.Button("Generate Pro Brief", variant="primary") out = gr.Markdown(label="Output", show_copy_button=True) # Connect generator: yields interim status + final report run.click(pro_brief, [api_key, files, mode, custom], out, queue=True) # Subtle program stamp gr.HTML( "
" "Module 3 – ZEN SDK Production" "
" ) # Enable queuing (progress & concurrency-friendly) demo.queue(max_size=64).launch()