Spaces:

sitammeur
/

Gemma-3-WebGPU

Running

App Files Files Community

sitammeur commited on Nov 2

Commit

ac6bfbb

verified ·

1 Parent(s): 9617b21

Upload 13 files

Browse files

Files changed (13) hide show

public/gemma-logo.png +0 -0
public/vite.svg +1 -0
src/App.jsx +471 -0
src/components/Chat.jsx +75 -0
src/components/Progress.jsx +25 -0
src/components/icons/ArrowRightIcon.jsx +22 -0
src/components/icons/BotIcon.jsx +26 -0
src/components/icons/StopIcon.jsx +25 -0
src/components/icons/UserIcon.jsx +22 -0
src/main.jsx +16 -0
src/styles/Chat.css +129 -0
src/styles/index.css +32 -0
src/worker.js +170 -0

public/gemma-logo.png ADDED Viewed

public/vite.svg ADDED Viewed

src/App.jsx ADDED Viewed

	@@ -0,0 +1,471 @@

+import { useEffect, useState, useRef } from "react";
+import Chat from "./components/Chat";
+import ArrowRightIcon from "./components/icons/ArrowRightIcon";
+import StopIcon from "./components/icons/StopIcon";
+import Progress from "./components/Progress";
+const IS_WEBGPU_AVAILABLE = !!navigator.gpu;
+const STICKY_SCROLL_THRESHOLD = 120;
+const EXAMPLES = [
+  "Give me tips to improve my time management skills.",
+  "What is the difference between AI and ML?",
+  "What is the Fibonacci sequence?",
+];
+/**
+ * The main application component that handles the interaction with a Web Worker
+ * to load and run a language model directly in the browser using WebGPU.
+ */
+function App() {
+  // Create a reference to the worker object.
+  const worker = useRef(null);
+  const textareaRef = useRef(null);
+  const chatContainerRef = useRef(null);
+  // Model loading and progress
+  const [status, setStatus] = useState(null);
+  const [error, setError] = useState(null);
+  const [loadingMessage, setLoadingMessage] = useState("");
+  const [progressItems, setProgressItems] = useState([]);
+  const [isRunning, setIsRunning] = useState(false);
+  // Inputs and outputs
+  const [input, setInput] = useState("");
+  const [messages, setMessages] = useState([]);
+  const [systemPrompt, setSystemPrompt] = useState(
+    "You are a helpful assistant."
+  );
+  const [showSystemPromptInput, setShowSystemPromptInput] = useState(false);
+  const [tps, setTps] = useState(null);
+  const [numTokens, setNumTokens] = useState(null);
+  function onEnter(message) {
+    setMessages((prev) => [...prev, { role: "user", content: message }]);
+    setTps(null);
+    setIsRunning(true);
+    setInput("");
+  }
+  function onInterrupt() {
+    // NOTE: We do not set isRunning to false here because the worker
+    // will send a 'complete' message when it is done.
+    worker.current.postMessage({ type: "interrupt" });
+  }
+  useEffect(() => {
+    resizeInput();
+  }, [input]);
+  function resizeInput() {
+    if (!textareaRef.current) {
+      return;
+    }
+    const target = textareaRef.current;
+    target.style.height = "auto";
+    const newHeight = Math.min(Math.max(target.scrollHeight, 24), 200);
+    target.style.height = `${newHeight}px`;
+  }
+  // We use the `useEffect` hook to setup the worker as soon as the `App` component is mounted.
+  useEffect(() => {
+    // Create the worker if it does not yet exist.
+    if (!worker.current) {
+      worker.current = new Worker(new URL("./worker.js", import.meta.url), {
+        type: "module",
+      });
+      worker.current.postMessage({ type: "check" }); // Do a feature check
+    }
+    // Create a callback function for messages from the worker thread.
+    const onMessageReceived = (e) => {
+      switch (e.data.status) {
+        case "loading":
+          // Model file start load: add a new progress item to the list.
+          setStatus("loading");
+          setLoadingMessage(e.data.data);
+          break;
+        case "initiate":
+          setProgressItems((prev) => [...prev, e.data]);
+          break;
+        case "progress":
+          // Model file progress: update one of the progress items.
+          setProgressItems((prev) =>
+            prev.map((item) => {
+              if (item.file === e.data.file) {
+                return { ...item, ...e.data };
+              }
+              return item;
+            })
+          );
+          break;
+        case "done":
+          // Model file loaded: remove the progress item from the list.
+          setProgressItems((prev) =>
+            prev.filter((item) => item.file !== e.data.file)
+          );
+          break;
+        case "ready":
+          // Pipeline ready: the worker is ready to accept messages.
+          setStatus("ready");
+          break;
+        case "start":
+          {
+            // Start generation
+            setMessages((prev) => [
+              ...prev,
+              { role: "assistant", content: "" },
+            ]);
+          }
+          break;
+        case "update":
+          {
+            // Generation update: update the output text.
+            // Parse messages
+            const { output, tps, numTokens } = e.data;
+            setTps(tps);
+            setNumTokens(numTokens);
+            setMessages((prev) => {
+              const cloned = [...prev];
+              const last = cloned.at(-1);
+              cloned[cloned.length - 1] = {
+                ...last,
+                content: last.content + output,
+              };
+              return cloned;
+            });
+          }
+          break;
+        case "complete":
+          // Generation complete: re-enable the "Generate" button
+          setIsRunning(false);
+          break;
+        case "error":
+          setError(e.data.data);
+          break;
+      }
+    };
+    const onErrorReceived = (e) => {
+      console.error("Worker error:", e);
+    };
+    // Attach the callback function as an event listener.
+    worker.current.addEventListener("message", onMessageReceived);
+    worker.current.addEventListener("error", onErrorReceived);
+    // Define a cleanup function for when the component is unmounted.
+    return () => {
+      worker.current.removeEventListener("message", onMessageReceived);
+      worker.current.removeEventListener("error", onErrorReceived);
+    };
+  }, []);
+  // Send the messages to the worker thread whenever the `messages` state changes.
+  useEffect(() => {
+    if (messages.filter((x) => x.role === "user").length === 0) {
+      // No user messages yet: do nothing.
+      return;
+    }
+    if (messages.at(-1).role === "assistant") {
+      // Do not update if the last message is from the assistant
+      return;
+    }
+    setTps(null);
+    // Include system prompt as the first message
+    const messagesWithSystem = [
+      { role: "system", content: systemPrompt },
+      ...messages,
+    ];
+    worker.current.postMessage({ type: "generate", data: messagesWithSystem });
+  }, [messages, isRunning, systemPrompt]);
+  useEffect(() => {
+    if (!chatContainerRef.current || !isRunning) {
+      return;
+    }
+    const element = chatContainerRef.current;
+    if (
+      element.scrollHeight - element.scrollTop - element.clientHeight <
+      STICKY_SCROLL_THRESHOLD
+    ) {
+      element.scrollTop = element.scrollHeight;
+    }
+  }, [messages, isRunning]);
+  return IS_WEBGPU_AVAILABLE ? (
+    <div className="flex flex-col h-screen mx-auto items justify-end text-gray-800 dark:text-gray-200 bg-white dark:bg-gray-900">
+      {status === null && messages.length === 0 && (
+        <div className="h-full overflow-auto scrollbar-thin flex justify-center items-center flex-col relative">
+          <div className="flex flex-col items-center mb-1 max-w-[340px] text-center">
+            <img
+              src="gemma-logo.png"
+              width="80%"
+              height="auto"
+              className="block drop-shadow-lg bg-transparent"
+            />
+            <h1 className="text-4xl font-bold mb-1">Gemma 3 Web 🌐</h1>
+            <h2 className="font-semibold">
+              A LLM that runs directly in your browser. 🚀
+            </h2>
+          </div>
+          <div className="flex flex-col items-center px-4">
+            <p className="max-w-[514px] mb-4">
+              <br />
+              You are about to load{" "}
+              <a
+                href="https://huggingface.co/onnx-community/gemma-3-270m-it-ONNX"
+                target="_blank"
+                rel="noreferrer"
+                className="font-medium underline"
+              >
+                gemma-3-270m-it-ONNX
+              </a>
+              , a 270 million parameter LLM that is optimized for inference on
+              the web. Once downloaded, the model will be cached and reused when
+              you revisit the page.
+              <br />
+              <br />
+              Everything runs directly in your browser using{" "}
+              <a
+                href="https://huggingface.co/docs/transformers.js"
+                target="_blank"
+                rel="noreferrer"
+                className="underline"
+              >
+                🤗&nbsp;Transformers.js
+              </a>{" "}
+              and ONNX Runtime Web, meaning your conversations are not sent to a
+              server. You can even disconnect from the internet after the model
+              has loaded!
+              <br />
+              Want to learn more? Check out the source code on{" "}
+              <a
+                href="https://github.com/google-gemini/gemma-cookbook/tree/main/Demos/Gemma3-on-Web"
+                target="_blank"
+                rel="noreferrer"
+                className="underline"
+              >
+                GitHub
+              </a>
+              !
+            </p>
+            {error && (
+              <div className="text-red-500 text-center mb-2">
+                <p className="mb-1">
+                  Unable to load model due to the following error:
+                </p>
+                <p className="text-sm">{error}</p>
+              </div>
+            )}
+            <button
+              className="border px-4 py-2 rounded-lg bg-blue-400 text-white hover:bg-blue-500 disabled:bg-blue-100 disabled:cursor-not-allowed select-none"
+              onClick={() => {
+                worker.current.postMessage({ type: "load" });
+                setStatus("loading");
+              }}
+              disabled={status !== null || error !== null}
+            >
+              Load model
+            </button>
+          </div>
+        </div>
+      )}
+      {status === "loading" && (
+        <>
+          <div className="w-full max-w-[500px] text-left mx-auto p-4 bottom-0 mt-auto">
+            <p className="text-center mb-1">{loadingMessage}</p>
+            {progressItems.map(({ file, progress, total }, i) => (
+              <Progress
+                key={i}
+                text={file}
+                percentage={progress}
+                total={total}
+              />
+            ))}
+          </div>
+        </>
+      )}
+      {status === "ready" && (
+        <div
+          ref={chatContainerRef}
+          className="overflow-y-auto scrollbar-thin w-full flex flex-col items-center h-full"
+        >
+          {/* System Prompt Configuration */}
+          {messages.length === 0 && (
+            <div className="w-full max-w-[600px] p-4 mb-4">
+              <div className="bg-yellow-50 dark:bg-yellow-900/20 border border-yellow-200 dark:border-yellow-800 rounded-lg p-4">
+                <div className="flex items-center justify-between mb-2">
+                  <h3 className="text-sm font-medium text-yellow-800 dark:text-yellow-200">
+                    System Prompt
+                  </h3>
+                  <button
+                    onClick={() =>
+                      setShowSystemPromptInput(!showSystemPromptInput)
+                    }
+                    className="text-sm text-yellow-700 dark:text-yellow-300 hover:text-yellow-900 dark:hover:text-yellow-100 underline"
+                  >
+                    {showSystemPromptInput ? "Hide" : "Edit"}
+                  </button>
+                </div>
+                {showSystemPromptInput ? (
+                  <div className="space-y-2">
+                    <textarea
+                      value={systemPrompt}
+                      onChange={(e) => setSystemPrompt(e.target.value)}
+                      className="w-full px-3 py-2 text-sm border border-yellow-300 dark:border-yellow-700 rounded-md bg-white dark:bg-gray-800 text-gray-900 dark:text-gray-100 placeholder-gray-500 dark:placeholder-gray-400 resize-none"
+                      placeholder="Enter your system prompt..."
+                      rows={3}
+                    />
+                    <div className="flex gap-2">
+                      <button
+                        onClick={() => setShowSystemPromptInput(false)}
+                        className="px-3 py-1 text-sm bg-yellow-600 hover:bg-yellow-700 text-white rounded-md"
+                      >
+                        Save
+                      </button>
+                      <button
+                        onClick={() => {
+                          setSystemPrompt("You are a helpful assistant.");
+                          setShowSystemPromptInput(false);
+                        }}
+                        className="px-3 py-1 text-sm bg-gray-500 hover:bg-gray-600 text-white rounded-md"
+                      >
+                        Reset to Default
+                      </button>
+                    </div>
+                  </div>
+                ) : (
+                  <p className="text-sm text-yellow-700 dark:text-yellow-300 italic">
+                    &ldquo;{systemPrompt}&rdquo;
+                  </p>
+                )}
+              </div>
+            </div>
+          )}
+          <Chat messages={messages} />
+          {messages.length === 0 && (
+            <div>
+              {EXAMPLES.map((msg, i) => (
+                <div
+                  key={i}
+                  className="m-1 border dark:border-gray-600 rounded-md p-2 bg-gray-100 dark:bg-gray-700 cursor-pointer"
+                  onClick={() => onEnter(msg)}
+                >
+                  {msg}
+                </div>
+              ))}
+            </div>
+          )}
+          <p className="text-center text-sm min-h-6 text-gray-500 dark:text-gray-300">
+            {tps && messages.length > 0 && (
+              <>
+                {!isRunning && (
+                  <span>
+                    Generated {numTokens} tokens in{" "}
+                    {(numTokens / tps).toFixed(2)} seconds&nbsp;&#40;
+                  </span>
+                )}
+                {
+                  <>
+                    <span className="font-medium text-center mr-1 text-black dark:text-white">
+                      {tps.toFixed(2)}
+                    </span>
+                    <span className="text-gray-500 dark:text-gray-300">
+                      tokens/second
+                    </span>
+                  </>
+                }
+                {!isRunning && (
+                  <>
+                    <span className="mr-1">&#41;.</span>
+                    <span
+                      className="underline cursor-pointer"
+                      onClick={() => {
+                        worker.current.postMessage({ type: "reset" });
+                        setMessages([]);
+                      }}
+                    >
+                      Reset
+                    </span>
+                  </>
+                )}
+              </>
+            )}
+          </p>
+        </div>
+      )}
+      <div className="mt-2 border dark:bg-gray-700 rounded-lg w-[600px] max-w-[80%] max-h-[200px] mx-auto relative mb-3 flex">
+        <textarea
+          ref={textareaRef}
+          className="scrollbar-thin w-[550px] dark:bg-gray-700 px-3 py-4 rounded-lg bg-transparent border-none outline-none text-gray-800 disabled:text-gray-400 dark:text-gray-200 placeholder-gray-500 dark:placeholder-gray-400 disabled:placeholder-gray-200 resize-none disabled:cursor-not-allowed"
+          placeholder="Type your message..."
+          type="text"
+          rows={1}
+          value={input}
+          disabled={status !== "ready"}
+          title={status === "ready" ? "Model is ready" : "Model not loaded yet"}
+          onKeyDown={(e) => {
+            if (
+              input.length > 0 &&
+              !isRunning &&
+              e.key === "Enter" &&
+              !e.shiftKey
+            ) {
+              e.preventDefault(); // Prevent default behavior of Enter key
+              onEnter(input);
+            }
+          }}
+          onInput={(e) => setInput(e.target.value)}
+        />
+        {isRunning ? (
+          <div className="cursor-pointer" onClick={onInterrupt}>
+            <StopIcon className="h-8 w-8 p-1 rounded-md text-gray-800 dark:text-gray-100 absolute right-3 bottom-3" />
+          </div>
+        ) : input.length > 0 ? (
+          <div className="cursor-pointer" onClick={() => onEnter(input)}>
+            <ArrowRightIcon
+              className={`h-8 w-8 p-1 bg-gray-800 dark:bg-gray-100 text-white dark:text-black rounded-md absolute right-3 bottom-3`}
+            />
+          </div>
+        ) : (
+          <div>
+            <ArrowRightIcon
+              className={`h-8 w-8 p-1 bg-gray-200 dark:bg-gray-600 text-gray-50 dark:text-gray-800 rounded-md absolute right-3 bottom-3`}
+            />
+          </div>
+        )}
+      </div>
+      <p className="text-xs text-gray-400 text-center mb-3">
+        Disclaimer: Generated content may be inaccurate or false.
+      </p>
+    </div>
+  ) : (
+    <div className="fixed w-screen h-screen bg-black z-10 bg-opacity-[92%] text-white text-2xl font-semibold flex justify-center items-center text-center">
+      WebGPU is not supported
+      <br />
+      by this browser :&#40;
+    </div>
+  );
+}
+export default App;

src/components/Chat.jsx ADDED Viewed

	@@ -0,0 +1,75 @@

+import { marked } from "marked";
+import DOMPurify from "dompurify";
+import { useEffect } from "react";
+import BotIcon from "./icons/BotIcon";
+import UserIcon from "./icons/UserIcon";
+import "../styles/Chat.css";
+function render(text) {
+  return DOMPurify.sanitize(marked.parse(text));
+}
+/**
+ * Chat component renders a chat interface with messages.
+ */
+export default function Chat({ messages }) {
+  const empty = messages.length === 0;
+  useEffect(() => {
+    window.MathJax.typeset();
+  }, [messages]);
+  return (
+    <div
+      className={`flex-1 p-6 max-w-[960px] w-full ${
+        empty ? "flex flex-col items-center justify-end" : "space-y-4"
+      }`}
+    >
+      {empty ? (
+        <div className="text-xl">
+          <span className="text-gray-500 dark:text-gray-300">
+            Hi there! How can I assist you today? 😊
+          </span>
+        </div>
+      ) : (
+        messages.map((msg, i) => (
+          <div key={`message-${i}`} className="flex items-start space-x-4">
+            {msg.role === "assistant" ? (
+              <>
+                <BotIcon className="h-6 w-6 min-h-6 min-w-6 my-3 text-gray-500 dark:text-gray-300" />
+                <div className="bg-gray-200 dark:bg-gray-700 rounded-lg p-4">
+                  <p className="min-h-6 text-gray-800 dark:text-gray-200 overflow-wrap-anywhere">
+                    {msg.content.length > 0 ? (
+                      <span
+                        className="markdown"
+                        dangerouslySetInnerHTML={{
+                          __html: render(msg.content),
+                        }}
+                      />
+                    ) : (
+                      <span className="h-6 flex items-center gap-1">
+                        <span className="w-2.5 h-2.5 bg-gray-600 dark:bg-gray-300 rounded-full animate-pulse"></span>
+                        <span className="w-2.5 h-2.5 bg-gray-600 dark:bg-gray-300 rounded-full animate-pulse animation-delay-200"></span>
+                        <span className="w-2.5 h-2.5 bg-gray-600 dark:bg-gray-300 rounded-full animate-pulse animation-delay-400"></span>
+                      </span>
+                    )}
+                  </p>
+                </div>
+              </>
+            ) : (
+              <>
+                <UserIcon className="h-6 w-6 min-h-6 min-w-6 my-3 text-gray-500 dark:text-gray-300" />
+                <div className="bg-blue-500 text-white rounded-lg p-4">
+                  <p className="min-h-6 overflow-wrap-anywhere">
+                    {msg.content}
+                  </p>
+                </div>
+              </>
+            )}
+          </div>
+        ))
+      )}
+    </div>
+  );
+}

src/components/Progress.jsx ADDED Viewed

	@@ -0,0 +1,25 @@

+function formatBytes(size) {
+  const i = size === 0 ? 0 : Math.floor(Math.log(size) / Math.log(1024));
+  return (
+    +(size / Math.pow(1024, i)).toFixed(2) * 1 +
+    ["B", "kB", "MB", "GB", "TB"][i]
+  );
+}
+/**
+ * Progress component to display a progress bar with text and percentage.
+ */
+export default function Progress({ text, percentage, total }) {
+  percentage ??= 0;
+  return (
+    <div className="w-full bg-gray-100 dark:bg-gray-700 text-left rounded-lg overflow-hidden mb-0.5">
+      <div
+        className="bg-blue-400 whitespace-nowrap px-1 text-sm"
+        style={{ width: `${percentage}%` }}
+      >
+        {text} ({percentage.toFixed(2)}%
+        {isNaN(total) ? "" : ` of ${formatBytes(total)}`})
+      </div>
+    </div>
+  );
+}

src/components/icons/ArrowRightIcon.jsx ADDED Viewed

	@@ -0,0 +1,22 @@

+/**
+ * ArrowRightIcon component renders an SVG icon of a right arrow.
+ */
+export default function ArrowRightIcon(props) {
+  return (
+    <svg
+      {...props}
+      xmlns="http://www.w3.org/2000/svg"
+      width="24"
+      height="24"
+      viewBox="0 0 24 24"
+      fill="none"
+      stroke="currentColor"
+      strokeWidth="2"
+      strokeLinecap="round"
+      strokeLinejoin="round"
+    >
+      <path d="M5 12h14" />
+      <path d="m12 5 7 7-7 7" />
+    </svg>
+  );
+}

src/components/icons/BotIcon.jsx ADDED Viewed

	@@ -0,0 +1,26 @@

+/**
+ * BotIcon component renders an SVG icon representing a bot.
+ */
+export default function BotIcon(props) {
+  return (
+    <svg
+      {...props}
+      xmlns="http://www.w3.org/2000/svg"
+      width="24"
+      height="24"
+      viewBox="0 0 24 24"
+      fill="none"
+      stroke="currentColor"
+      strokeWidth="2"
+      strokeLinecap="round"
+      strokeLinejoin="round"
+    >
+      <path d="M12 8V4H8" />
+      <rect width="16" height="12" x="4" y="8" rx="2" />
+      <path d="M2 14h2" />
+      <path d="M20 14h2" />
+      <path d="M15 13v2" />
+      <path d="M9 13v2" />
+    </svg>
+  );
+}

src/components/icons/StopIcon.jsx ADDED Viewed

	@@ -0,0 +1,25 @@

+/**
+ * A React component that renders a stop icon using SVG.
+ */
+export default function StopIcon(props) {
+  return (
+    <svg
+      {...props}
+      xmlns="http://www.w3.org/2000/svg"
+      width="24"
+      height="24"
+      viewBox="0 0 24 24"
+      fill="none"
+      stroke="currentColor"
+      strokeWidth="2"
+      strokeLinecap="round"
+      strokeLinejoin="round"
+    >
+      <path d="M21 12a9 9 0 1 1-18 0 9 9 0 0 1 18 0Z" />
+      <path
+        fill="currentColor"
+        d="M9 9.563C9 9.252 9.252 9 9.563 9h4.874c.311 0 .563.252.563.563v4.874c0 .311-.252.563-.563.563H9.564A.562.562 0 0 1 9 14.437V9.564Z"
+      />
+    </svg>
+  );
+}

src/components/icons/UserIcon.jsx ADDED Viewed

	@@ -0,0 +1,22 @@

+/**
+ * UserIcon component renders a user icon using SVG.
+ */
+export default function UserIcon(props) {
+  return (
+    <svg
+      {...props}
+      xmlns="http://www.w3.org/2000/svg"
+      width="24"
+      height="24"
+      viewBox="0 0 24 24"
+      fill="none"
+      stroke="currentColor"
+      strokeWidth="2"
+      strokeLinecap="round"
+      strokeLinejoin="round"
+    >
+      <path d="M19 21v-2a4 4 0 0 0-4-4H9a4 4 0 0 0-4 4v2" />
+      <circle cx="12" cy="7" r="4" />
+    </svg>
+  );
+}

src/main.jsx ADDED Viewed

	@@ -0,0 +1,16 @@

+/**
+ * Entry point of the React application.
+ *
+ * This file sets up the root of the React application and renders the main App component
+ * within a StrictMode wrapper for highlighting potential problems in the application.
+ */
+import { StrictMode } from "react";
+import { createRoot } from "react-dom/client";
+import "./styles/index.css";
+import App from "./App.jsx";
+createRoot(document.getElementById("root")).render(
+  <StrictMode>
+    <App />
+  </StrictMode>
+);

src/styles/Chat.css ADDED Viewed

	@@ -0,0 +1,129 @@

+@scope (.markdown) {
+  /* Code blocks */
+  pre {
+    margin: 0.5rem 0;
+    white-space: break-spaces;
+  }
+  code {
+    padding: 0.2em 0.4em;
+    border-radius: 4px;
+    font-family: Consolas, Monaco, "Andale Mono", "Ubuntu Mono", monospace;
+    font-size: 0.9em;
+  }
+  pre,
+  code {
+    background-color: #f2f2f2;
+  }
+  @media (prefers-color-scheme: dark) {
+    pre,
+    code {
+      background-color: #333;
+    }
+  }
+  pre:has(code) {
+    padding: 1rem 0.5rem;
+  }
+  pre > code {
+    padding: 0;
+  }
+  /* Headings */
+  h1,
+  h2,
+  h3,
+  h4,
+  h5,
+  h6 {
+    font-weight: 600;
+    line-height: 1.2;
+  }
+  h1 {
+    font-size: 2em;
+    margin: 1rem 0;
+  }
+  h2 {
+    font-size: 1.5em;
+    margin: 0.83rem 0;
+  }
+  h3 {
+    font-size: 1.25em;
+    margin: 0.67rem 0;
+  }
+  h4 {
+    font-size: 1em;
+    margin: 0.5rem 0;
+  }
+  h5 {
+    font-size: 0.875em;
+    margin: 0.33rem 0;
+  }
+  h6 {
+    font-size: 0.75em;
+    margin: 0.25rem 0;
+  }
+  h1,
+  h2,
+  h3,
+  h4,
+  h5,
+  h6:first-child {
+    margin-top: 0;
+  }
+  /* Unordered List */
+  ul {
+    list-style-type: disc;
+    margin-left: 1.5rem;
+  }
+  /* Ordered List */
+  ol {
+    list-style-type: decimal;
+    margin-left: 1.5rem;
+  }
+  /* List Items */
+  li {
+    margin: 0.25rem 0;
+  }
+  p:not(:first-child) {
+    margin-top: 0.75rem;
+  }
+  p:not(:last-child) {
+    margin-bottom: 0.75rem;
+  }
+  ul > li {
+    margin-left: 1rem;
+  }
+  /* Table */
+  table,
+  th,
+  td {
+    border: 1px solid lightgray;
+    padding: 0.25rem;
+  }
+  @media (prefers-color-scheme: dark) {
+    table,
+    th,
+    td {
+      border: 1px solid #f2f2f2;
+    }
+  }
+}

src/styles/index.css ADDED Viewed

	@@ -0,0 +1,32 @@

+@tailwind base;
+@tailwind components;
+@tailwind utilities;
+@layer utilities {
+  .scrollbar-thin::-webkit-scrollbar {
+    @apply w-2;
+  }
+  .scrollbar-thin::-webkit-scrollbar-track {
+    @apply rounded-full bg-gray-100 dark:bg-gray-700;
+  }
+  .scrollbar-thin::-webkit-scrollbar-thumb {
+    @apply rounded-full bg-gray-300 dark:bg-gray-600;
+  }
+  .scrollbar-thin::-webkit-scrollbar-thumb:hover {
+    @apply bg-gray-500;
+  }
+  .animation-delay-200 {
+    animation-delay: 200ms;
+  }
+  .animation-delay-400 {
+    animation-delay: 400ms;
+  }
+  .overflow-wrap-anywhere {
+    overflow-wrap: anywhere;
+  }
+}

src/worker.js ADDED Viewed

	@@ -0,0 +1,170 @@

+import {
+  AutoTokenizer,
+  AutoModelForCausalLM,
+  TextStreamer,
+  InterruptableStoppingCriteria,
+} from "@huggingface/transformers";
+/**
+ * This class uses the Singleton pattern to enable lazy-loading of the pipeline
+ */
+class TextGenerationPipeline {
+  static model_id = "onnx-community/gemma-3-270m-it-ONNX";
+  static async getInstance(progress_callback = null) {
+    this.tokenizer ??= AutoTokenizer.from_pretrained(this.model_id, {
+      progress_callback,
+    });
+    this.model ??= AutoModelForCausalLM.from_pretrained(this.model_id, {
+      dtype: "q4", // Choose better quants like fp32
+      device: "webgpu",
+      progress_callback,
+    });
+    return Promise.all([this.tokenizer, this.model]);
+  }
+}
+const stopping_criteria = new InterruptableStoppingCriteria();
+let past_key_values_cache = null;
+/**
+ * Generate text based on the input messages
+ */
+async function generate(messages) {
+  // Retrieve the text-generation pipeline.
+  const [tokenizer, model] = await TextGenerationPipeline.getInstance();
+  const inputs = tokenizer.apply_chat_template(messages, {
+    add_generation_prompt: true,
+    return_dict: true,
+  });
+  let startTime;
+  let numTokens = 0;
+  let tps;
+  const token_callback_function = () => {
+    startTime ??= performance.now();
+    if (numTokens++ > 0) {
+      tps = (numTokens / (performance.now() - startTime)) * 1000;
+    }
+  };
+  const callback_function = (output) => {
+    self.postMessage({
+      status: "update",
+      output,
+      tps,
+      numTokens,
+    });
+  };
+  const streamer = new TextStreamer(tokenizer, {
+    skip_prompt: true,
+    skip_special_tokens: true,
+    callback_function,
+    token_callback_function,
+  });
+  // Tell the main thread we are starting
+  self.postMessage({ status: "start" });
+  const { past_key_values, sequences } = await model.generate({
+    ...inputs,
+    past_key_values: past_key_values_cache,
+    // Sampling
+    do_sample: false,
+    temperature: 0.3,
+    max_new_tokens: 512,
+    streamer,
+    stopping_criteria,
+    return_dict_in_generate: true,
+  });
+  past_key_values_cache = past_key_values;
+  const decoded = tokenizer.batch_decode(sequences, {
+    skip_special_tokens: true,
+  });
+  // Send the output back to the main thread
+  self.postMessage({
+    status: "complete",
+    output: decoded,
+  });
+}
+/**
+ * Helper function to perform feature detection for WebGPU
+ */
+async function check() {
+  try {
+    const adapter = await navigator.gpu.requestAdapter();
+    if (!adapter) {
+      throw new Error("WebGPU is not supported (no adapter found)");
+    }
+  } catch (e) {
+    self.postMessage({
+      status: "error",
+      data: e.toString(),
+    });
+  }
+}
+/**
+ * Helper function to load the model and tokenizer
+ */
+async function load() {
+  self.postMessage({
+    status: "loading",
+    data: "Loading model...",
+  });
+  // Load the pipeline and save it for future use.
+  const [tokenizer, model] = await TextGenerationPipeline.getInstance((x) => {
+    // We also add a progress callback to the pipeline so that we can
+    // track model loading.
+    self.postMessage(x);
+  });
+  self.postMessage({
+    status: "loading",
+    data: "Compiling shaders and warming up the model...",
+  });
+  // Run model with dummy input to compile shaders
+  const inputs = tokenizer("a");
+  await model.generate({ ...inputs, max_new_tokens: 1 });
+  self.postMessage({ status: "ready" });
+}
+// Listen for messages from the main thread
+self.addEventListener("message", async (e) => {
+  const { type, data } = e.data;
+  switch (type) {
+    case "check":
+      check();
+      break;
+    case "load":
+      load();
+      break;
+    case "generate":
+      stopping_criteria.reset();
+      generate(data);
+      break;
+    case "interrupt":
+      stopping_criteria.interrupt();
+      break;
+    case "reset":
+      past_key_values_cache = null;
+      stopping_criteria.reset();
+      break;
+  }
+});