image-generation-editing

Paused

App Files Files Community

philschmid commited on Mar 16

Commit

faee5d2

unverified ·

1 Parent(s): 1adcf06

init

Browse files

Files changed (18) hide show

LICENSE +201 -0
README.md +77 -14
app/api/extract/route.ts +0 -51
app/api/image/route.ts +184 -0
app/api/schema/route.ts +0 -147
app/globals.css +2 -1
app/layout.tsx +2 -2
app/page.tsx +104 -39
components/{PromptInput.tsx → ImagePromptInput.tsx} +24 -13
components/ImageResultDisplay.tsx +111 -0
components/{FileUpload.tsx → ImageUpload.tsx} +72 -37
components/PdfViewer.tsx +0 -77
components/ResultDisplay.tsx +0 -78
lib/types.ts +21 -0
next.config.ts +1 -1
package-lock.json +4 -4
package.json +1 -2
tsconfig.json +1 -1

LICENSE ADDED Viewed

	@@ -0,0 +1,201 @@

+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+   1. Definitions.
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+   END OF TERMS AND CONDITIONS
+   APPENDIX: How to apply the Apache License to your work.
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+   Copyright [yyyy] [name of copyright owner]
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+       http://www.apache.org/licenses/LICENSE-2.0
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.

README.md CHANGED Viewed

@@ -1,14 +1,77 @@
----
-title: Pdf To Structured Data
-emoji: 🌍
-colorFrom: purple
-colorTo: gray
-sdk: docker
-header: mini
-app_port: 3000
-pinned: false
-license: apache-2.0
-short_description: PDF to Structured Data powered by Google DeepMind Gemini 2.0
----
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

+# Image Creation & Editing with Next.js and Gemini 2.0 Flash
+This project demonstrates how to create and edit images using Google's Gemini 2.0 Flash AI model in a Next.js web application. It allows users to generate images from text prompts or edit existing images through natural language instructions, maintaining conversation context for iterative refinements.
+**How It Works:**
+1. **Create Images**: Generate images from text prompts using Gemini 2.0 Flash
+2. **Edit Images**: Upload an image and provide instructions to modify it
+3. **Conversation History**: Maintain context through a conversation with the AI for iterative refinements
+4. **Download Results**: Save your generated or edited images
+## Features
+- 🎨 Text-to-image generation with Gemini 2.0 Flash
+- 🖌️ Image editing through natural language instructions
+- 💬 Conversation history for context-aware image refinements
+- 📱 Responsive UI built with Next.js and shadcn/ui
+- 🔄 Seamless workflow between creation and editing modes
+- ⚡ Uses Gemini 2.0 Flash Javascript SDK
+## Getting Started
+### Local Development
+First, set up your environment variables:
+```bash
+cp .env.example .env
+```
+Add your Google AI Studio API key to the `.env` file:
+```
+GEMINI_API_KEY=your_google_api_key
+```
+Then, install dependencies and run the development server:
+```bash
+npm install
+npm run dev
+```
+Open [http://localhost:3000](http://localhost:3000) with your browser to see the application.
+### Docker Deployment
+1. Build the Docker image:
+```bash
+docker build -t nextjs-gemini-image-editing .
+```
+2. Run the container with your Google API key:
+```bash
+docker run -p 3000:3000 -e GEMINI_API_KEY=your_google_api_key nextjs-gemini-image-editing
+```
+Or using an environment file:
+```bash
+# Run container with env file
+docker run -p 3000:3000 --env-file .env nextjs-gemini-image-editing
+```
+Open [http://localhost:3000](http://localhost:3000) with your browser to see the application.
+## Technologies Used
+- [Next.js](https://nextjs.org/) - React framework for the web application
+- [Google Gemini 2.0 Flash](https://deepmind.google/technologies/gemini/) - AI model for image generation and editing
+- [shadcn/ui](https://ui.shadcn.com/) - Re-usable components built using Radix UI and Tailwind CSS
+## License
+This project is licensed under the Apache License 2.0 - see the [LICENSE](./LICENSE) file for details.

app/api/extract/route.ts DELETED Viewed

@@ -1,51 +0,0 @@
-import { NextResponse } from "next/server";
-import { GoogleGenerativeAI } from "@google/generative-ai";
-const genAI = new GoogleGenerativeAI(process.env.GEMINI_API_KEY!);
-const MODEL_ID = "gemini-2.0-flash";
-export async function POST(request: Request) {
-  try {
-    const formData = await request.formData();
-    const file = formData.get("file") as File;
-    const schema = JSON.parse(formData.get("schema") as string);
-    // Convert PDF to base64
-    const buffer = await file.arrayBuffer();
-    const base64 = Buffer.from(buffer).toString("base64");
-    const model = genAI.getGenerativeModel({
-      model: MODEL_ID,
-      generationConfig: {
-        responseMimeType: "application/json",
-        responseSchema: schema,
-      },
-    });
-    const prompt = "Extract the structured data from the following PDF file";
-    const result = await model.generateContent([
-      prompt,
-      {
-        inlineData: {
-          mimeType: "application/pdf",
-          data: base64,
-        },
-      },
-    ]);
-    const response = await result.response;
-    const extractedData = JSON.parse(response.text());
-    return NextResponse.json(extractedData);
-  } catch (error) {
-    console.error("Error extracting data:", error);
-    return NextResponse.json(
-      {
-        error:
-          "Failed to extract data, open a thread in discussions, could be be a rate limit issue.s",
-      },
-      { status: 500 }
-    );
-  }
-}

app/api/image/route.ts ADDED Viewed

	@@ -0,0 +1,184 @@

+import { NextRequest, NextResponse } from "next/server";
+import { GoogleGenerativeAI } from "@google/generative-ai";
+import { HistoryItem, HistoryPart } from "@/lib/types";
+// Initialize the Google Gen AI client with your API key
+const GEMINI_API_KEY = process.env.GEMINI_API_KEY || "";
+const genAI = new GoogleGenerativeAI(GEMINI_API_KEY);
+// Define the model ID for Gemini 2.0 Flash experimental
+const MODEL_ID = "gemini-2.0-flash-exp";
+// Define interface for the formatted history item
+interface FormattedHistoryItem {
+  role: "user" | "model";
+  parts: Array<{
+    text?: string;
+    inlineData?: { data: string; mimeType: string };
+  }>;
+}
+export async function POST(req: NextRequest) {
+  try {
+    // Parse JSON request instead of FormData
+    const requestData = await req.json();
+    const { prompt, image: inputImage, history } = requestData;
+    if (!prompt) {
+      return NextResponse.json(
+        { error: "Prompt is required" },
+        { status: 400 }
+      );
+    }
+    // Get the model with the correct configuration
+    const model = genAI.getGenerativeModel({
+      model: MODEL_ID,
+      generationConfig: {
+        temperature: 1,
+        topP: 0.95,
+        topK: 40,
+        // @ts-expect-error - Gemini API JS is missing this type
+        responseModalities: ["Text", "Image"],
+      },
+    });
+    let result;
+    try {
+      // Convert history to the format expected by Gemini API
+      const formattedHistory =
+        history && history.length > 0
+          ? history
+              .map((item: HistoryItem) => {
+                return {
+                  role: item.role,
+                  parts: item.parts
+                    .map((part: HistoryPart) => {
+                      if (part.text) {
+                        return { text: part.text };
+                      }
+                      if (part.image && item.role === "user") {
+                        const imgParts = part.image.split(",");
+                        if (imgParts.length > 1) {
+                          return {
+                            inlineData: {
+                              data: imgParts[1],
+                              mimeType: part.image.includes("image/png")
+                                ? "image/png"
+                                : "image/jpeg",
+                            },
+                          };
+                        }
+                      }
+                      return { text: "" };
+                    })
+                    .filter((part) => Object.keys(part).length > 0), // Remove empty parts
+                };
+              })
+              .filter((item: FormattedHistoryItem) => item.parts.length > 0) // Remove items with no parts
+          : [];
+      // Create a chat session with the formatted history
+      const chat = model.startChat({
+        history: formattedHistory,
+      });
+      // Prepare the current message parts
+      const messageParts = [];
+      // Add the text prompt
+      messageParts.push({ text: prompt });
+      // Add the image if provided
+      if (inputImage) {
+        // For image editing
+        console.log("Processing image edit request");
+        // Check if the image is a valid data URL
+        if (!inputImage.startsWith("data:")) {
+          throw new Error("Invalid image data URL format");
+        }
+        const imageParts = inputImage.split(",");
+        if (imageParts.length < 2) {
+          throw new Error("Invalid image data URL format");
+        }
+        const base64Image = imageParts[1];
+        const mimeType = inputImage.includes("image/png")
+          ? "image/png"
+          : "image/jpeg";
+        console.log(
+          "Base64 image length:",
+          base64Image.length,
+          "MIME type:",
+          mimeType
+        );
+        // Add the image to message parts
+        messageParts.push({
+          inlineData: {
+            data: base64Image,
+            mimeType: mimeType,
+          },
+        });
+      }
+      // Send the message to the chat
+      console.log("Sending message with", messageParts.length, "parts");
+      result = await chat.sendMessage(messageParts);
+    } catch (error) {
+      console.error("Error in chat.sendMessage:", error);
+      throw error;
+    }
+    const response = result.response;
+    let textResponse = null;
+    let imageData = null;
+    let mimeType = "image/png";
+    // Process the response
+    if (response.candidates && response.candidates.length > 0) {
+      const parts = response.candidates[0].content.parts;
+      console.log("Number of parts in response:", parts.length);
+      for (const part of parts) {
+        if ("inlineData" in part && part.inlineData) {
+          // Get the image data
+          imageData = part.inlineData.data;
+          mimeType = part.inlineData.mimeType || "image/png";
+          console.log(
+            "Image data received, length:",
+            imageData.length,
+            "MIME type:",
+            mimeType
+          );
+        } else if ("text" in part && part.text) {
+          // Store the text
+          textResponse = part.text;
+          console.log(
+            "Text response received:",
+            textResponse.substring(0, 50) + "..."
+          );
+        }
+      }
+    }
+    // Return just the base64 image and description as JSON
+    return NextResponse.json({
+      image: imageData ? `data:${mimeType};base64,${imageData}` : null,
+      description: textResponse,
+    });
+  } catch (error) {
+    console.error("Error generating image:", error);
+    return NextResponse.json(
+      {
+        error: "Failed to generate image",
+        details: error instanceof Error ? error.message : String(error),
+      },
+      { status: 500 }
+    );
+  }
+}

app/api/schema/route.ts DELETED Viewed

@@ -1,147 +0,0 @@
-import { NextResponse } from "next/server";
-import { GoogleGenerativeAI } from "@google/generative-ai";
-const genAI = new GoogleGenerativeAI(process.env.GEMINI_API_KEY!);
-const MODEL_ID = "gemini-2.0-flash";
-const META_PROMPT = `
-You are a JSON Schema expert. Your task is to create JSON schema baed on the user input. The schema will be used for extra data.
-You must also make sure:
-- All fields in an object are set as required
-- All objects must have properties defined
-- Order matters! If the values are dependent or would require additional information, make sure to include the additional information in the description. Same counts for "reasoning" or "thinking" should come before the conclusion.
-- $defs must be defined under the schema param
-- Return only the schema JSON not more, use \`\`\`json to start and \`\`\` to end the JSON schema
-Restrictions:
-- You cannot use examples, if you think examples are helpful include them in the description.
-- You cannot use default values, If you think default are helpful include them in the description.
-- Top level cannot have a "title" property only "description"
-- You cannot use $defs, directly in the schema, don't use any $defs and $ref in the schema. Directly define the schema in the properties.
-- Never include a $schema
-- The "type" needs to be a single value, no arrays
-Guidelines:
-- If the user prompt is short define a single object schema and fields based on your knowledge.
-- If the user prompt is in detail about the data only use the data in the schema. Don't add more fields than the user asked for.
-Examples:
-Input: Cookie Recipes
-Output: \`\`\`json
-{
-    "description": "Schema for a cookie recipe, including ingredients and quantities. The 'ingredients' array lists each ingredient along with its corresponding quantity and unit of measurement. The 'instructions' array provides a step-by-step guide to preparing the cookies. The order of instructions is important.",
-    "type": "object",
-    "properties": {
-       "name": {
-          "type": "string",
-          "description": "The name of the cookie recipe."
-       },
-       "description": {
-          "type": "string",
-          "description": "A short description of the cookie, including taste and textures."
-       },
-       "ingredients": {
-          "type": "array",
-          "description": "A list of ingredients required for the recipe.",
-          "items": {
-             "type": "object",
-             "description": "An ingredient with its quantity and unit.",
-             "properties": {
-                "name": {
-                   "type": "string",
-                   "description": "The name of the ingredient (e.g., flour, sugar, butter)."
-                },
-                "quantity": {
-                   "type": "number",
-                   "description": "The amount of the ingredient needed."
-                },
-                "unit": {
-                   "type": "string",
-                   "description": "The unit of measurement for the ingredient (e.g., cups, grams, teaspoons). Use abbreviations like 'tsp' for teaspoon and 'tbsp' for tablespoon."
-                }
-             },
-             "required": [
-                "name",
-                "quantity",
-                "unit"
-             ]
-          }
-       },
-       "instructions": {
-          "type": "array",
-          "description": "A sequence of steps to prepare the cookie recipe. The order of instructions matters.",
-          "items": {
-             "type": "string",
-             "description": "A single instruction step."
-          }
-       }
-    },
-    "required": [
-       "name",
-       "description",
-       "ingredients",
-       "instructions"
-    ]
-}
-\`\`\`
-Input: Book with title, author, and publication year.
-Output: \`\`\`json
-{
-    "type": "object",
-    "properties": {
-        "title": {
-            "type": "string",
-            "description": "The title of the book."
-        },
-        "author": {
-            "type": "string",
-            "description": "The author of the book."
-        },
-        "publicationYear": {
-            "type": "integer",
-            "description": "The year the book was published."
-        }
-    },
-    "required": [
-        "title",
-        "author",
-        "publicationYear"
-    ],
-}
-\`\`\`
-Input: {USER_PROMPT}`.trim();
-export async function POST(request: Request) {
-  try {
-    // Get the prompt from the request body
-    const { prompt } = await request.json();
-    // Get the model
-    const model = genAI.getGenerativeModel({ model: MODEL_ID });
-    // Generate the content
-    const result = await model.generateContent(
-      META_PROMPT.replace("{USER_PROMPT}", prompt)
-    );
-    // Get the response
-    const response = await result.response;
-    // Remove markdown code block markers if present
-    const jsonString = response
-      .text()
-      .replace(/^```json\n?/, "")
-      .replace(/\n?```$/, "");
-    // Return the schema
-    return NextResponse.json({ schema: JSON.parse(jsonString) });
-  } catch (error) {
-    console.error("Error generating schema:", error);
-    return NextResponse.json(
-      {
-        error:
-          "Failed to generate schema, open a thread in discussions, could be be a rate limit issue.",
-      },
-      { status: 500 }
-    );
-  }
-}

app/globals.css CHANGED Viewed

@@ -80,4 +80,5 @@ body {
 h1,h2,h3,h4,h5,h6 {
   @apply text-foreground dark:text-foreground;
-}

 h1,h2,h3,h4,h5,h6 {
   @apply text-foreground dark:text-foreground;
+}

app/layout.tsx CHANGED Viewed

@@ -11,8 +11,8 @@ const openSans = Open_Sans({
 });
 export const metadata: Metadata = {
-  title: "PDF Extractor",
-  description: "Extract data from PDFs using Google DeepMind Gemini 2.0",
 };
 export const viewport: Viewport = {

 });
 export const metadata: Metadata = {
+  title: "Image Editor",
+  description: "Edit images using Google DeepMind Gemini 2.0",
 };
 export const viewport: Viewport = {

app/page.tsx CHANGED Viewed

@@ -1,49 +1,84 @@
 "use client";
 import { useState } from "react";
-import { FileUpload } from "@/components/FileUpload";
-import { PromptInput } from "@/components/PromptInput";
-import { ResultDisplay } from "@/components/ResultDisplay";
-import { FileIcon, FileText } from "lucide-react";
 import { Card, CardContent, CardHeader, CardTitle } from "@/components/ui/card";
 export default function Home() {
-  const [schema, setSchema] = useState<string | null>(null);
-  const [file, setFile] = useState<File | null>(null);
-  const [result, setResult] = useState<string | null>(null);
   const [loading, setLoading] = useState(false);
-  const handleFileSelect = (selectedFile: File) => {
-    setFile(selectedFile);
   };
   const handlePromptSubmit = async (prompt: string) => {
     try {
       setLoading(true);
-      // First, get the JSON schema
-      const schemaResponse = await fetch("/api/schema", {
         method: "POST",
         headers: {
           "Content-Type": "application/json",
         },
-        body: JSON.stringify({ prompt }),
       });
-      const { schema } = await schemaResponse.json();
-      setSchema(schema);
-      // Then, process the PDF with the schema
-      const formData = new FormData();
-      formData.append("file", file!);
-      formData.append("schema", JSON.stringify(schema));
-      const extractResponse = await fetch("/api/extract", {
-        method: "POST",
-        body: formData,
-      });
-      const data = await extractResponse.json();
-      setResult(data);
     } catch (error) {
       console.error("Error processing request:", error);
     } finally {
       setLoading(false);
@@ -51,46 +86,76 @@ export default function Home() {
   };
   const handleReset = () => {
-    setFile(null);
-    setResult(null);
-    setSchema(null);
     setLoading(false);
   };
   return (
     <main className="min-h-screen flex items-center justify-center bg-background p-8">
-      <Card className="w-full max-w-2xl border-0 bg-card shadow-none">
         <CardHeader className="flex flex-col items-center justify-center space-y-2">
           <CardTitle className="flex items-center gap-2 text-foreground">
-            <FileText className="w-8 h-8 text-primary" />
-            PDF to Structured Data
           </CardTitle>
           <span className="text-sm font-mono text-muted-foreground">
             powered by Google DeepMind Gemini 2.0 Flash
           </span>
         </CardHeader>
         <CardContent className="space-y-6 pt-6 w-full">
-          {!result && !loading ? (
             <>
-              <FileUpload onFileSelect={handleFileSelect} />
-              <PromptInput onSubmit={handlePromptSubmit} file={file} />
             </>
           ) : loading ? (
             <div
               role="status"
               className="flex items-center mx-auto justify-center h-56 max-w-sm bg-gray-300 rounded-lg animate-pulse dark:bg-secondary"
             >
-              <FileIcon className="w-10 h-10 text-gray-200 dark:text-muted-foreground" />
               <span className="pl-4 font-mono font-xs text-muted-foreground">
                 Processing...
               </span>
             </div>
           ) : (
-            <ResultDisplay
-              result={result || ""}
-              schema={schema || ""}
-              onReset={handleReset}
-            />
           )}
         </CardContent>
       </Card>

 "use client";
 import { useState } from "react";
+import { ImageUpload } from "@/components/ImageUpload";
+import { ImagePromptInput } from "@/components/ImagePromptInput";
+import { ImageResultDisplay } from "@/components/ImageResultDisplay";
+import { ImageIcon, Wand2 } from "lucide-react";
 import { Card, CardContent, CardHeader, CardTitle } from "@/components/ui/card";
+import { HistoryItem } from "@/lib/types";
 export default function Home() {
+  const [image, setImage] = useState<string | null>(null);
+  const [generatedImage, setGeneratedImage] = useState<string | null>(null);
+  const [description, setDescription] = useState<string | null>(null);
   const [loading, setLoading] = useState(false);
+  const [error, setError] = useState<string | null>(null);
+  const [history, setHistory] = useState<HistoryItem[]>([]);
+  const handleImageSelect = (imageData: string) => {
+    setImage(imageData || null);
   };
   const handlePromptSubmit = async (prompt: string) => {
     try {
       setLoading(true);
+      setError(null);
+      // If we have a generated image, use that for editing, otherwise use the uploaded image
+      const imageToEdit = generatedImage || image;
+      // Prepare the request data as JSON
+      const requestData = {
+        prompt,
+        image: imageToEdit,
+        history: history.length > 0 ? history : undefined,
+      };
+      const response = await fetch("/api/image", {
         method: "POST",
         headers: {
           "Content-Type": "application/json",
         },
+        body: JSON.stringify(requestData),
       });
+      if (!response.ok) {
+        const errorData = await response.json();
+        throw new Error(errorData.error || "Failed to generate image");
+      }
+      const data = await response.json();
+      if (data.image) {
+        // Update the generated image and description
+        setGeneratedImage(data.image);
+        setDescription(data.description || null);
+        // Update history locally - add user message
+        const userMessage: HistoryItem = {
+          role: "user",
+          parts: [
+            { text: prompt },
+            ...(imageToEdit ? [{ image: imageToEdit }] : []),
+          ],
+        };
+        // Add AI response
+        const aiResponse: HistoryItem = {
+          role: "model",
+          parts: [
+            ...(data.description ? [{ text: data.description }] : []),
+            ...(data.image ? [{ image: data.image }] : []),
+          ],
+        };
+        // Update history with both messages
+        setHistory((prevHistory) => [...prevHistory, userMessage, aiResponse]);
+      } else {
+        setError("No image returned from API");
+      }
     } catch (error) {
+      setError(error instanceof Error ? error.message : "An error occurred");
       console.error("Error processing request:", error);
     } finally {
       setLoading(false);
   };
   const handleReset = () => {
+    setImage(null);
+    setGeneratedImage(null);
+    setDescription(null);
     setLoading(false);
+    setError(null);
+    setHistory([]);
   };
+  // If we have a generated image, we want to edit it next time
+  const currentImage = generatedImage || image;
+  const isEditing = !!currentImage;
+  // Get the latest image to display (always the generated image)
+  const displayImage = generatedImage;
   return (
     <main className="min-h-screen flex items-center justify-center bg-background p-8">
+      <Card className="w-full max-w-4xl border-0 bg-card shadow-none">
         <CardHeader className="flex flex-col items-center justify-center space-y-2">
           <CardTitle className="flex items-center gap-2 text-foreground">
+            <Wand2 className="w-8 h-8 text-primary" />
+            Image Creation & Editing
           </CardTitle>
           <span className="text-sm font-mono text-muted-foreground">
             powered by Google DeepMind Gemini 2.0 Flash
           </span>
         </CardHeader>
         <CardContent className="space-y-6 pt-6 w-full">
+          {error && (
+            <div className="p-4 mb-4 text-sm text-red-700 bg-red-100 rounded-lg">
+              {error}
+            </div>
+          )}
+          {!displayImage && !loading ? (
             <>
+              <ImageUpload
+                onImageSelect={handleImageSelect}
+                currentImage={currentImage}
+              />
+              <ImagePromptInput
+                onSubmit={handlePromptSubmit}
+                isEditing={isEditing}
+                isLoading={loading}
+              />
             </>
           ) : loading ? (
             <div
               role="status"
               className="flex items-center mx-auto justify-center h-56 max-w-sm bg-gray-300 rounded-lg animate-pulse dark:bg-secondary"
             >
+              <ImageIcon className="w-10 h-10 text-gray-200 dark:text-muted-foreground" />
               <span className="pl-4 font-mono font-xs text-muted-foreground">
                 Processing...
               </span>
             </div>
           ) : (
+            <>
+              <ImageResultDisplay
+                imageUrl={displayImage || ""}
+                description={description}
+                onReset={handleReset}
+                conversationHistory={history}
+              />
+              <ImagePromptInput
+                onSubmit={handlePromptSubmit}
+                isEditing={true}
+                isLoading={loading}
+              />
+            </>
           )}
         </CardContent>
       </Card>

components/{PromptInput.tsx → ImagePromptInput.tsx} RENAMED Viewed

@@ -3,19 +3,25 @@
 import { useState } from "react";
 import { Button } from "@/components/ui/button";
 import { Wand2 } from "lucide-react";
-import { Textarea } from "@/components/ui/textarea";
-interface PromptInputProps {
   onSubmit: (prompt: string) => void;
-  file: File | null;
 }
-export function PromptInput({ onSubmit, file }: PromptInputProps) {
   const [prompt, setPrompt] = useState("");
-  const handleSubmit = (e: React.FormEvent) => {
-    e.preventDefault();
     if (prompt.trim()) {
       onSubmit(prompt.trim());
     }
   };
@@ -23,26 +29,31 @@ export function PromptInput({ onSubmit, file }: PromptInputProps) {
     <form onSubmit={handleSubmit} className="space-y-4 rounded-lg">
       <div className="space-y-2">
         <p className="text-sm font-medium text-foreground">
-          Describe the structure and type of data you want to extract from the
-          PDF.
         </p>
       </div>
-      <Textarea
         id="prompt"
-        className="min-h-[100px] border-secondary resize-none "
-        placeholder="Example: Extract all invoice details including invoice number, date, items, prices, and total amount..."
         value={prompt}
         onChange={(e) => setPrompt(e.target.value)}
       />
       <Button
         type="submit"
-        disabled={!prompt.trim() || file === null}
         className="w-full bg-primary hover:bg-primary/90"
       >
         <Wand2 className="w-4 h-4 mr-2" />
-        Extract Data
       </Button>
     </form>
   );

 import { useState } from "react";
 import { Button } from "@/components/ui/button";
 import { Wand2 } from "lucide-react";
+import { Input } from "./ui/input";
+interface ImagePromptInputProps {
   onSubmit: (prompt: string) => void;
+  isEditing: boolean;
+  isLoading: boolean;
 }
+export function ImagePromptInput({
+  onSubmit,
+  isEditing,
+  isLoading,
+}: ImagePromptInputProps) {
   const [prompt, setPrompt] = useState("");
+  const handleSubmit = () => {
     if (prompt.trim()) {
       onSubmit(prompt.trim());
+      setPrompt("");
     }
   };
     <form onSubmit={handleSubmit} className="space-y-4 rounded-lg">
       <div className="space-y-2">
         <p className="text-sm font-medium text-foreground">
+          {isEditing
+            ? "Describe how you want to edit the image"
+            : "Describe the image you want to generate"}
         </p>
       </div>
+      <Input
         id="prompt"
+        className="border-secondary resize-none"
+        placeholder={
+          isEditing
+            ? "Example: Make the background blue and add a rainbow..."
+            : "Example: A 3D rendered image of a pig with wings and a top hat flying over a futuristic city..."
+        }
         value={prompt}
         onChange={(e) => setPrompt(e.target.value)}
       />
       <Button
         type="submit"
+        disabled={!prompt.trim() || isLoading}
         className="w-full bg-primary hover:bg-primary/90"
       >
         <Wand2 className="w-4 h-4 mr-2" />
+        {isEditing ? "Edit Image" : "Generate Image"}
       </Button>
     </form>
   );

components/ImageResultDisplay.tsx ADDED Viewed

	@@ -0,0 +1,111 @@

+"use client";
+import { Button } from "@/components/ui/button";
+import { Download, RotateCcw, MessageCircle } from "lucide-react";
+import { useState } from "react";
+import { HistoryItem, HistoryPart } from "@/lib/types";
+import Image from "next/image";
+interface ImageResultDisplayProps {
+  imageUrl: string;
+  description: string | null;
+  onReset: () => void;
+  conversationHistory?: HistoryItem[];
+}
+export function ImageResultDisplay({
+  imageUrl,
+  description,
+  onReset,
+  conversationHistory = [],
+}: ImageResultDisplayProps) {
+  const [showHistory, setShowHistory] = useState(false);
+  const handleDownload = () => {
+    // Create a temporary link element
+    const link = document.createElement("a");
+    link.href = imageUrl;
+    link.download = `gemini-image-${Date.now()}.png`;
+    document.body.appendChild(link);
+    link.click();
+    document.body.removeChild(link);
+  };
+  const toggleHistory = () => {
+    setShowHistory(!showHistory);
+  };
+  return (
+    <div className="space-y-4">
+      <div className="flex items-center justify-between">
+        <h2 className="text-xl font-semibold">Generated Image</h2>
+        <div className="space-x-2">
+          <Button variant="outline" size="sm" onClick={handleDownload}>
+            <Download className="w-4 h-4 mr-2" />
+            Download
+          </Button>
+          {conversationHistory.length > 0 && (
+            <Button variant="outline" size="sm" onClick={toggleHistory}>
+              <MessageCircle className="w-4 h-4 mr-2" />
+              {showHistory ? "Hide History" : "Show History"}
+            </Button>
+          )}
+          <Button variant="outline" size="sm" onClick={onReset}>
+            <RotateCcw className="w-4 h-4 mr-2" />
+            Create New Image
+          </Button>
+        </div>
+      </div>
+      <div className="rounded-lg overflow-hidden bg-muted p-2">
+        <Image
+          src={imageUrl}
+          alt="Generated"
+          className="max-w-[640px] h-auto mx-auto"
+        />
+      </div>
+      {description && (
+        <div className="p-4 rounded-lg bg-muted">
+          <h3 className="text-sm font-medium mb-2">Description</h3>
+          <p className="text-sm text-muted-foreground">{description}</p>
+        </div>
+      )}
+      {showHistory && conversationHistory.length > 0 && (
+        <div className="p-4 rounded-lg">
+          <h3 className="text-sm font-medium mb-4">Conversation History</h3>
+          <div className="space-y-4">
+            {conversationHistory.map((item, index) => (
+              <div key={index} className={`p-3 rounded-lg bg-secondary`}>
+                <p
+                  className={`text-sm font-medium mb-2 ${
+                    item.role === "user" ? "text-foreground" : "text-primary"
+                  }`}
+                >
+                  {item.role === "user" ? "You" : "Gemini"}
+                </p>
+                <div className="space-y-2">
+                  {item.parts.map((part: HistoryPart, partIndex) => (
+                    <div key={partIndex}>
+                      {part.text && <p className="text-sm">{part.text}</p>}
+                      {part.image && (
+                        <div className="mt-2 overflow-hidden rounded-md">
+                          <Image
+                            src={part.image}
+                            alt={`${item.role} image`}
+                            className="max-w-64 h-auto object-contain"
+                          />
+                        </div>
+                      )}
+                    </div>
+                  ))}
+                </div>
+              </div>
+            ))}
+          </div>
+        </div>
+      )}
+    </div>
+  );
+}

components/{FileUpload.tsx → ImageUpload.tsx} RENAMED Viewed

@@ -1,13 +1,13 @@
 "use client";
-import { useCallback, useState } from "react";
 import { useDropzone } from "react-dropzone";
 import { Button } from "./ui/button";
-import { Upload as UploadIcon, File as FileIcon, X } from "lucide-react";
-import PdfViewer from "./PdfViewer";
-interface FileUploadProps {
-  onFileSelect: (file: File) => void;
 }
 export function formatFileSize(bytes: number): string {
@@ -20,32 +20,58 @@ export function formatFileSize(bytes: number): string {
   );
 }
-export function FileUpload({ onFileSelect }: FileUploadProps) {
   const [selectedFile, setSelectedFile] = useState<File | null>(null);
-  const [file, setFile] = useState<File | null>(null);
   const onDrop = useCallback(
     (acceptedFiles: File[]) => {
       const file = acceptedFiles[0];
       setSelectedFile(file);
-      onFileSelect(file);
-      setFile(file);
     },
-    [onFileSelect]
   );
   const { getRootProps, getInputProps, isDragActive } = useDropzone({
     onDrop,
     accept: {
-      "application/pdf": [".pdf"],
     },
-    maxSize: 100 * 1024 * 1024, // 100MB
     multiple: false,
   });
   return (
-    <div className={`"w-full min-h-[150px] `}>
-      {!selectedFile ? (
         <div
           {...getRootProps()}
           className={`min-h-[150px] p-4 rounded-lg
@@ -60,36 +86,45 @@ export function FileUpload({ onFileSelect }: FileUploadProps) {
             <UploadIcon className="w-8 h-8 text-primary mr-3 flex-shrink-0" />
             <div className="">
               <p className="text-sm font-medium text-foreground">
-                Drop your PDF here or click to browse
               </p>
               <p className="text-xs text-muted-foreground">
-                Maximum file size: 100MB
               </p>
             </div>
           </div>
         </div>
       ) : (
-        <div className="flex my-auto flex-row items-center p-4 rounded-lg bg-secondary">
-          <FileIcon className="w-8 h-8 text-primary mr-3 flex-shrink-0" />
-          <div className="flex-grow min-w-0">
-            <p className="text-sm font-medium truncate text-foreground">
-              {selectedFile?.name}
-            </p>
-            <p className="text-xs text-muted-foreground">
-              {formatFileSize(selectedFile?.size ?? 0)}
-            </p>
           </div>
-          {file && <PdfViewer file={file} />}
-          <Button
-            variant="ghost"
-            size="icon"
-            onClick={() => setSelectedFile(null)}
-            className="flex-shrink-0 ml-2"
-          >
-            <X className="w-4 h-4" />
-            <span className="sr-only">Remove file</span>
-          </Button>
         </div>
       )}
     </div>

 "use client";
+import { useCallback, useState, useEffect } from "react";
 import { useDropzone } from "react-dropzone";
 import { Button } from "./ui/button";
+import { Upload as UploadIcon, Image as ImageIcon, X } from "lucide-react";
+import Image from "next/image";
+interface ImageUploadProps {
+  onImageSelect: (imageData: string) => void;
+  currentImage: string | null;
 }
 export function formatFileSize(bytes: number): string {
   );
 }
+export function ImageUpload({ onImageSelect, currentImage }: ImageUploadProps) {
   const [selectedFile, setSelectedFile] = useState<File | null>(null);
+  // Update the selected file when the current image changes
+  useEffect(() => {
+    if (!currentImage) {
+      setSelectedFile(null);
+    }
+  }, [currentImage]);
   const onDrop = useCallback(
     (acceptedFiles: File[]) => {
       const file = acceptedFiles[0];
+      if (!file) return;
       setSelectedFile(file);
+      // Convert the file to base64
+      const reader = new FileReader();
+      reader.onload = (event) => {
+        if (event.target && event.target.result) {
+          const result = event.target.result as string;
+          console.log("Image loaded, length:", result.length);
+          onImageSelect(result);
+        }
+      };
+      reader.onerror = (error) => {
+        console.error("Error reading file:", error);
+      };
+      reader.readAsDataURL(file);
     },
+    [onImageSelect]
   );
   const { getRootProps, getInputProps, isDragActive } = useDropzone({
     onDrop,
     accept: {
+      "image/png": [".png"],
+      "image/jpeg": [".jpg", ".jpeg"],
     },
+    maxSize: 10 * 1024 * 1024, // 10MB
     multiple: false,
   });
+  const handleRemove = () => {
+    setSelectedFile(null);
+    onImageSelect("");
+  };
   return (
+    <div className="w-full">
+      {!currentImage ? (
         <div
           {...getRootProps()}
           className={`min-h-[150px] p-4 rounded-lg
             <UploadIcon className="w-8 h-8 text-primary mr-3 flex-shrink-0" />
             <div className="">
               <p className="text-sm font-medium text-foreground">
+                Drop your image here or click to browse
               </p>
               <p className="text-xs text-muted-foreground">
+                Maximum file size: 10MB
               </p>
             </div>
           </div>
         </div>
       ) : (
+        <div className="flex flex-col items-center p-4 rounded-lg bg-secondary">
+          <div className="flex w-full items-center mb-4">
+            <ImageIcon className="w-8 h-8 text-primary mr-3 flex-shrink-0" />
+            <div className="flex-grow min-w-0">
+              <p className="text-sm font-medium truncate text-foreground">
+                {selectedFile?.name || "Current Image"}
+              </p>
+              {selectedFile && (
+                <p className="text-xs text-muted-foreground">
+                  {formatFileSize(selectedFile?.size ?? 0)}
+                </p>
+              )}
+            </div>
+            <Button
+              variant="ghost"
+              size="icon"
+              onClick={handleRemove}
+              className="flex-shrink-0 ml-2"
+            >
+              <X className="w-4 h-4" />
+              <span className="sr-only">Remove image</span>
+            </Button>
+          </div>
+          <div className="w-full overflow-hidden rounded-md">
+            <Image
+              src={currentImage}
+              alt="Selected"
+              className="w-full h-auto object-contain"
+            />
           </div>
         </div>
       )}
     </div>

components/PdfViewer.tsx DELETED Viewed

@@ -1,77 +0,0 @@
-"use client";
-import { useCallback, useState } from "react";
-import { pdfjs, Document, Page } from "react-pdf";
-import "react-pdf/dist/esm/Page/AnnotationLayer.css";
-import "react-pdf/dist/esm/Page/TextLayer.css";
-import { useResizeObserver } from "@wojtekmaj/react-hooks";
-import type { PDFDocumentProxy } from "pdfjs-dist";
-import {
-  Sheet,
-  SheetContent,
-  SheetHeader,
-  SheetTitle,
-  SheetTrigger,
-} from "./ui/sheet";
-pdfjs.GlobalWorkerOptions.workerSrc = new URL(
-  "pdfjs-dist/build/pdf.worker.min.mjs",
-  import.meta.url
-).toString();
-const options = {
-  cMapUrl: "/cmaps/",
-  standardFontDataUrl: "/standard_fonts/",
-};
-export default function PdfViewer({ file }: { file: File }) {
-  const [numPages, setNumPages] = useState<number>();
-  const [containerRef, setContainerRef] = useState<HTMLElement | null>(null);
-  const [containerWidth, setContainerWidth] = useState<number>();
-  // Add resize observer
-  const onResize = useCallback<ResizeObserverCallback>((entries) => {
-    const [entry] = entries;
-    if (entry) {
-      setContainerWidth(entry.contentRect.width);
-    }
-  }, []);
-  useResizeObserver(containerRef, {}, onResize);
-  async function onDocumentLoadSuccess(page: PDFDocumentProxy): Promise<void> {
-    setNumPages(page._pdfInfo.numPages);
-  }
-  return (
-    <Sheet>
-      <SheetTrigger className="h-10 rounded-lg px-4 py-2 border-input bg-background border-2 hover:bg-accent hover:text-accent-foreground">
-        Preview
-      </SheetTrigger>
-      <SheetContent side="bottom">
-        <SheetHeader>
-          <SheetTitle>{file.name}</SheetTitle>
-        </SheetHeader>
-        <div
-          ref={setContainerRef}
-          className="max-w-2xl mx-auto mt-2 max-h-[calc(100vh-10rem)] overflow-y-auto"
-        >
-          <Document
-            file={file}
-            onLoadSuccess={onDocumentLoadSuccess}
-            options={options}
-          >
-            {Array.from(new Array(numPages), (_el, index) => (
-              <Page
-                key={`page_${index + 1}`}
-                pageNumber={index + 1}
-                width={containerWidth}
-              />
-            ))}
-          </Document>
-        </div>
-      </SheetContent>
-    </Sheet>
-  );
-}

components/ResultDisplay.tsx DELETED Viewed

@@ -1,78 +0,0 @@
-"use client";
-import { Button } from "@/components/ui/button";
-import { Braces, Copy, RotateCcw } from "lucide-react";
-import { useState } from "react";
-import {
-  Popover,
-  PopoverContent,
-  PopoverTrigger,
-} from "@/components/ui/popover";
-interface ResultDisplayProps {
-  result: string;
-  schema: string;
-  onReset: () => void;
-}
-export function ResultDisplay({ result, schema, onReset }: ResultDisplayProps) {
-  const [copied, setCopied] = useState(false);
-  const [schemaCopied, setSchemaCopied] = useState(false);
-  const handleCopy = () => {
-    navigator.clipboard.writeText(JSON.stringify(result, null, 2));
-    setCopied(true);
-    setTimeout(() => setCopied(false), 2000);
-  };
-  const handleSchemaCopy = () => {
-    navigator.clipboard.writeText(JSON.stringify(schema, null, 2));
-    setSchemaCopied(true);
-    setTimeout(() => setSchemaCopied(false), 2000);
-  };
-  return (
-    <div className="space-y-4">
-      <div className="flex items-center justify-between">
-        <h2 className="text-xl font-semibold">Extracted Data</h2>
-        <div className="space-x-2">
-          <Popover>
-            <PopoverTrigger>
-              <Button variant="outline" size="sm">
-                <Braces className="w-4 h-4 mr-2" />
-                Schema
-              </Button>
-            </PopoverTrigger>
-            <PopoverContent className="max-h-[500px] max-w-[700px] w-full overflow-y-auto">
-              <div className="relative p-4 rounded-lg bg-muted">
-                <Button
-                  variant="secondary"
-                  size="sm"
-                  onClick={handleSchemaCopy}
-                  className="absolute top-2 right-2"
-                >
-                  <Copy className="w-4 h-4 mr-2" />
-                  {schemaCopied ? "Copied!" : "Copy"}
-                </Button>
-                <pre className="overflow-auto">
-                  <code className="text-xs">
-                    {JSON.stringify(schema, null, 2)}
-                  </code>
-                </pre>
-              </div>
-            </PopoverContent>
-          </Popover>
-          <Button variant="outline" size="sm" onClick={handleCopy}>
-            <Copy className="w-4 h-4 mr-2" />
-            {copied ? "Copied!" : "Copy"}
-          </Button>
-          <Button variant="outline" size="sm" onClick={onReset}>
-            <RotateCcw className="w-4 h-4 mr-2" />
-            Process Another PDF
-          </Button>
-        </div>
-      </div>
-      <pre className="p-4 rounded-lg bg-muted overflow-auto">
-        <code className="text-sm">{JSON.stringify(result, null, 2)}</code>
-      </pre>
-    </div>
-  );
-}

lib/types.ts ADDED Viewed

	@@ -0,0 +1,21 @@

+// Define the interface for conversation history items
+export interface HistoryItem {
+  // Role can be either "user" or "model"
+  role: "user" | "model";
+  // Parts can contain text and/or images
+  parts: HistoryPart[];
+}
+// Define the interface for history parts
+export interface HistoryPart {
+  // Text content (optional)
+  text?: string;
+  // Image content as data URL (optional)
+  // Format: data:image/png;base64,... or data:image/jpeg;base64,...
+  image?: string;
+}
+// Note: When sending to the Gemini API:
+// 1. User messages can contain both text and images (as inlineData)
+// 2. Model messages should only contain text parts
+// 3. Images in history are stored as data URLs in our app, but converted to base64 for the API

next.config.ts CHANGED Viewed

@@ -11,4 +11,4 @@ const nextConfig: NextConfig = {
   },
 };
-export default nextConfig;

   },
 };
+export default nextConfig;

package-lock.json CHANGED Viewed

@@ -20,7 +20,7 @@
                 "next-themes": "^0.4.4",
                 "react": "^19.0.0",
                 "react-dom": "^19.0.0",
-                "react-dropzone": "^14.3.5",
                 "react-pdf": "^9.2.1",
                 "tailwind-merge": "^3.0.1",
                 "tailwindcss-animate": "^1.0.7"
@@ -6255,9 +6255,9 @@
             }
         },
         "node_modules/react-dropzone": {
-            "version": "14.3.5",
-            "resolved": "https://registry.npmjs.org/react-dropzone/-/react-dropzone-14.3.5.tgz",
-            "integrity": "sha512-9nDUaEEpqZLOz5v5SUcFA0CjM4vq8YbqO0WRls+EYT7+DvxUdzDPKNCPLqGfj3YL9MsniCLCD4RFA6M95V6KMQ==",
             "license": "MIT",
             "dependencies": {
                 "attr-accept": "^2.2.4",

                 "next-themes": "^0.4.4",
                 "react": "^19.0.0",
                 "react-dom": "^19.0.0",
+                "react-dropzone": "^14.3.8",
                 "react-pdf": "^9.2.1",
                 "tailwind-merge": "^3.0.1",
                 "tailwindcss-animate": "^1.0.7"
             }
         },
         "node_modules/react-dropzone": {
+            "version": "14.3.8",
+            "resolved": "https://registry.npmjs.org/react-dropzone/-/react-dropzone-14.3.8.tgz",
+            "integrity": "sha512-sBgODnq+lcA4P296DY4wacOZz3JFpD99fp+hb//iBO2HHnyeZU3FwWyXJ6salNpqQdsZrgMrotuko/BdJMV8Ug==",
             "license": "MIT",
             "dependencies": {
                 "attr-accept": "^2.2.4",

package.json CHANGED Viewed

@@ -21,8 +21,7 @@
         "next-themes": "^0.4.4",
         "react": "^19.0.0",
         "react-dom": "^19.0.0",
-        "react-dropzone": "^14.3.5",
-        "react-pdf": "^9.2.1",
         "tailwind-merge": "^3.0.1",
         "tailwindcss-animate": "^1.0.7"
     },

         "next-themes": "^0.4.4",
         "react": "^19.0.0",
         "react-dom": "^19.0.0",
+        "react-dropzone": "^14.3.8",
         "tailwind-merge": "^3.0.1",
         "tailwindcss-animate": "^1.0.7"
     },

tsconfig.json CHANGED Viewed

@@ -4,7 +4,7 @@
     "lib": ["dom", "dom.iterable", "esnext"],
     "allowJs": true,
     "skipLibCheck": true,
-    "strict": true,
     "noEmit": true,
     "esModuleInterop": true,
     "module": "esnext",

     "lib": ["dom", "dom.iterable", "esnext"],
     "allowJs": true,
     "skipLibCheck": true,
+    "strict": false,
     "noEmit": true,
     "esModuleInterop": true,
     "module": "esnext",