oliveregger commited on
Commit
0623576
·
1 Parent(s): 63378c2

adding screen as input

Browse files
.gitignore ADDED
@@ -0,0 +1,67 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Logs
2
+ logs
3
+ *.log
4
+ npm-debug.log*
5
+ yarn-debug.log*
6
+ yarn-error.log*
7
+ pnpm-debug.log*
8
+ lerna-debug.log*
9
+
10
+ # Dependencies
11
+ node_modules
12
+ .pnp
13
+ .pnp.js
14
+
15
+ # Build outputs
16
+ dist
17
+ dist-ssr
18
+ *.local
19
+
20
+ # Editor directories and files
21
+ .vscode/*
22
+ !.vscode/extensions.json
23
+ .idea
24
+ .DS_Store
25
+ *.suo
26
+ *.ntvs*
27
+ *.njsproj
28
+ *.sln
29
+ *.sw?
30
+
31
+ # Environment variables
32
+ .env
33
+ .env.local
34
+ .env.development.local
35
+ .env.test.local
36
+ .env.production.local
37
+
38
+ # Testing
39
+ coverage
40
+ *.lcov
41
+ .nyc_output
42
+
43
+ # Temporary files
44
+ *.tmp
45
+ *.temp
46
+ .cache
47
+
48
+ # OS files
49
+ Thumbs.db
50
+ .DS_Store
51
+
52
+ # TypeScript
53
+ *.tsbuildinfo
54
+
55
+ # Optional npm cache directory
56
+ .npm
57
+
58
+ # Optional eslint cache
59
+ .eslintcache
60
+
61
+ # Optional stylelint cache
62
+ .stylelintcache
63
+
64
+ # Vite
65
+ .vite
66
+ vite.config.js.timestamp-*
67
+ vite.config.ts.timestamp-*
src/App.tsx CHANGED
@@ -9,10 +9,12 @@ export default function App() {
9
  const [appState, setAppState] = useState<AppState>("requesting-permission");
10
  const [webcamStream, setWebcamStream] = useState<MediaStream | null>(null);
11
  const [isVideoReady, setIsVideoReady] = useState(false);
 
12
  const videoRef = useRef<HTMLVideoElement | null>(null);
13
 
14
- const handlePermissionGranted = useCallback((stream: MediaStream) => {
15
  setWebcamStream(stream);
 
16
  setAppState("welcome");
17
  }, []);
18
 
@@ -109,7 +111,7 @@ export default function App() {
109
  <LoadingScreen onComplete={handleLoadingComplete} />
110
  )}
111
 
112
- {appState === "captioning" && <CaptioningView videoRef={videoRef} />}
113
  </div>
114
  );
115
  }
 
9
  const [appState, setAppState] = useState<AppState>("requesting-permission");
10
  const [webcamStream, setWebcamStream] = useState<MediaStream | null>(null);
11
  const [isVideoReady, setIsVideoReady] = useState(false);
12
+ const [sourceType, setSourceType] = useState<"camera" | "screen">("camera");
13
  const videoRef = useRef<HTMLVideoElement | null>(null);
14
 
15
+ const handlePermissionGranted = useCallback((stream: MediaStream, source: "camera" | "screen") => {
16
  setWebcamStream(stream);
17
+ setSourceType(source);
18
  setAppState("welcome");
19
  }, []);
20
 
 
111
  <LoadingScreen onComplete={handleLoadingComplete} />
112
  )}
113
 
114
+ {appState === "captioning" && <CaptioningView videoRef={videoRef} sourceType={sourceType} />}
115
  </div>
116
  );
117
  }
src/components/CaptioningView.tsx CHANGED
@@ -7,6 +7,7 @@ import { PROMPTS, TIMING } from "../constants";
7
 
8
  interface CaptioningViewProps {
9
  videoRef: React.RefObject<HTMLVideoElement | null>;
 
10
  }
11
 
12
  function useCaptioningLoop(
@@ -94,7 +95,7 @@ function useCaptioningLoop(
94
  }, [isRunning, isLoaded, runInference, promptRef, videoRef]);
95
  }
96
 
97
- export default function CaptioningView({ videoRef }: CaptioningViewProps) {
98
  const { imageSize, setImageSize } = useVLMContext();
99
  const [caption, setCaption] = useState<string>("");
100
  const [isLoopRunning, setIsLoopRunning] = useState<boolean>(true);
@@ -177,6 +178,7 @@ export default function CaptioningView({ videoRef }: CaptioningViewProps) {
177
  error={error}
178
  imageSize={imageSize}
179
  onImageSizeChange={setImageSize}
 
180
  />
181
 
182
  {/* Prompt Input - Bottom Left */}
 
7
 
8
  interface CaptioningViewProps {
9
  videoRef: React.RefObject<HTMLVideoElement | null>;
10
+ sourceType: "camera" | "screen";
11
  }
12
 
13
  function useCaptioningLoop(
 
95
  }, [isRunning, isLoaded, runInference, promptRef, videoRef]);
96
  }
97
 
98
+ export default function CaptioningView({ videoRef, sourceType }: CaptioningViewProps) {
99
  const { imageSize, setImageSize } = useVLMContext();
100
  const [caption, setCaption] = useState<string>("");
101
  const [isLoopRunning, setIsLoopRunning] = useState<boolean>(true);
 
178
  error={error}
179
  imageSize={imageSize}
180
  onImageSizeChange={setImageSize}
181
+ sourceType={sourceType}
182
  />
183
 
184
  {/* Prompt Input - Bottom Left */}
src/components/WebcamCapture.tsx CHANGED
@@ -7,6 +7,7 @@ interface WebcamCaptureProps {
7
  error?: string | null;
8
  imageSize?: number;
9
  onImageSizeChange?: (size: number) => void;
 
10
  }
11
 
12
  export default function WebcamCapture({
@@ -15,6 +16,7 @@ export default function WebcamCapture({
15
  error,
16
  imageSize,
17
  onImageSizeChange,
 
18
  }: WebcamCaptureProps) {
19
  const hasError = Boolean(error);
20
 
@@ -26,7 +28,7 @@ export default function WebcamCapture({
26
  }
27
  : isRunning
28
  ? {
29
- text: "LIVE FEED",
30
  color: "bg-[var(--mistral-orange)] animate-pulse",
31
  border: "border-[var(--mistral-orange)]",
32
  }
 
7
  error?: string | null;
8
  imageSize?: number;
9
  onImageSizeChange?: (size: number) => void;
10
+ sourceType: "camera" | "screen";
11
  }
12
 
13
  export default function WebcamCapture({
 
16
  error,
17
  imageSize,
18
  onImageSizeChange,
19
+ sourceType,
20
  }: WebcamCaptureProps) {
21
  const hasError = Boolean(error);
22
 
 
28
  }
29
  : isRunning
30
  ? {
31
+ text: sourceType === "screen" ? "SCREEN CAPTURE" : "LIVE FEED",
32
  color: "bg-[var(--mistral-orange)] animate-pulse",
33
  border: "border-[var(--mistral-orange)]",
34
  }
src/components/WebcamPermissionDialog.tsx CHANGED
@@ -17,13 +17,15 @@ const VIDEO_CONSTRAINTS = {
17
  },
18
  };
19
 
 
 
20
  interface ErrorInfo {
21
  type: (typeof ERROR_TYPES)[keyof typeof ERROR_TYPES];
22
  message: string;
23
  }
24
 
25
  interface WebcamPermissionDialogProps {
26
- onPermissionGranted: (stream: MediaStream) => void;
27
  }
28
 
29
  export default function WebcamPermissionDialog({
@@ -31,6 +33,7 @@ export default function WebcamPermissionDialog({
31
  }: WebcamPermissionDialogProps) {
32
  const [isRequesting, setIsRequesting] = useState(false);
33
  const [error, setError] = useState<ErrorInfo | null>(null);
 
34
 
35
  const [mounted, setMounted] = useState(false);
36
  useEffect(() => setMounted(true), []);
@@ -91,31 +94,40 @@ export default function WebcamPermissionDialog({
91
  };
92
  };
93
 
94
- const requestWebcamAccess = useCallback(async () => {
95
  setIsRequesting(true);
96
  setError(null);
 
97
 
98
  try {
99
- if (!navigator.mediaDevices?.getUserMedia) {
100
- throw new Error("NOT_SUPPORTED");
 
 
 
 
 
 
 
 
 
 
 
 
 
 
101
  }
102
 
103
- const stream =
104
- await navigator.mediaDevices.getUserMedia(VIDEO_CONSTRAINTS);
105
- onPermissionGranted(stream);
106
  } catch (err) {
107
  const errorInfo = getErrorInfo(err);
108
  setError(errorInfo);
109
- console.error("Error accessing webcam:", err, errorInfo);
110
  } finally {
111
  setIsRequesting(false);
112
  }
113
  }, [onPermissionGranted]);
114
 
115
- useEffect(() => {
116
- requestWebcamAccess();
117
- }, [requestWebcamAccess]);
118
-
119
  const troubleshootingData = useMemo(
120
  () => ({
121
  [ERROR_TYPES.HTTPS]: {
@@ -209,15 +221,15 @@ export default function WebcamPermissionDialog({
209
  };
210
 
211
  const getTitle = () => {
212
- if (isRequesting) return "Initialize Camera";
213
  if (error) return "Connection Failed";
214
- return "Permission Required";
215
  };
216
 
217
  const getDescription = () => {
218
- if (isRequesting) return "Requesting access to video input device...";
219
  if (error) return error.message;
220
- return "Ministral WebGPU requires local camera access for real-time inference.";
221
  };
222
 
223
  return (
@@ -324,12 +336,57 @@ export default function WebcamPermissionDialog({
324
  </p>
325
  </div>
326
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
327
  {/* Error Actions */}
328
  {error && (
329
  <div className="animate-enter">
330
  <div className="flex justify-center mb-6">
331
  <Button
332
- onClick={requestWebcamAccess}
333
  disabled={isRequesting}
334
  className="px-8 py-3 text-white shadow-lg hover:shadow-xl transition-all font-semibold tracking-wide hover:bg-[var(--mistral-orange-dark)]"
335
  >
 
17
  },
18
  };
19
 
20
+ type SourceType = "camera" | "screen";
21
+
22
  interface ErrorInfo {
23
  type: (typeof ERROR_TYPES)[keyof typeof ERROR_TYPES];
24
  message: string;
25
  }
26
 
27
  interface WebcamPermissionDialogProps {
28
+ onPermissionGranted: (stream: MediaStream, sourceType: SourceType) => void;
29
  }
30
 
31
  export default function WebcamPermissionDialog({
 
33
  }: WebcamPermissionDialogProps) {
34
  const [isRequesting, setIsRequesting] = useState(false);
35
  const [error, setError] = useState<ErrorInfo | null>(null);
36
+ const [selectedSource, setSelectedSource] = useState<SourceType | null>(null);
37
 
38
  const [mounted, setMounted] = useState(false);
39
  useEffect(() => setMounted(true), []);
 
94
  };
95
  };
96
 
97
+ const requestAccess = useCallback(async (sourceType: SourceType) => {
98
  setIsRequesting(true);
99
  setError(null);
100
+ setSelectedSource(sourceType);
101
 
102
  try {
103
+ let stream: MediaStream;
104
+
105
+ if (sourceType === "camera") {
106
+ if (!navigator.mediaDevices?.getUserMedia) {
107
+ throw new Error("NOT_SUPPORTED");
108
+ }
109
+ stream = await navigator.mediaDevices.getUserMedia(VIDEO_CONSTRAINTS);
110
+ } else {
111
+ // Screen capture
112
+ if (!navigator.mediaDevices?.getDisplayMedia) {
113
+ throw new Error("NOT_SUPPORTED");
114
+ }
115
+ stream = await navigator.mediaDevices.getDisplayMedia({
116
+ video: true,
117
+ audio: false,
118
+ } as DisplayMediaStreamOptions);
119
  }
120
 
121
+ onPermissionGranted(stream, sourceType);
 
 
122
  } catch (err) {
123
  const errorInfo = getErrorInfo(err);
124
  setError(errorInfo);
125
+ console.error(`Error accessing ${sourceType}:`, err, errorInfo);
126
  } finally {
127
  setIsRequesting(false);
128
  }
129
  }, [onPermissionGranted]);
130
 
 
 
 
 
131
  const troubleshootingData = useMemo(
132
  () => ({
133
  [ERROR_TYPES.HTTPS]: {
 
221
  };
222
 
223
  const getTitle = () => {
224
+ if (isRequesting) return selectedSource === "screen" ? "Initialize Screen Capture" : "Initialize Camera";
225
  if (error) return "Connection Failed";
226
+ return "Select Video Source";
227
  };
228
 
229
  const getDescription = () => {
230
+ if (isRequesting) return "Requesting access to video source...";
231
  if (error) return error.message;
232
+ return "Choose your video source for real-time visual inference.";
233
  };
234
 
235
  return (
 
336
  </p>
337
  </div>
338
 
339
+ {/* Source Selection Buttons */}
340
+ {!isRequesting && !error && (
341
+ <div className="flex flex-col gap-3">
342
+ <Button
343
+ onClick={() => requestAccess("camera")}
344
+ className="w-full px-6 py-4 text-white shadow-lg hover:shadow-xl transition-all font-semibold tracking-wide hover:bg-[var(--mistral-orange-dark)] flex items-center justify-center gap-3"
345
+ >
346
+ <svg
347
+ className="w-6 h-6"
348
+ fill="none"
349
+ viewBox="0 0 24 24"
350
+ stroke="currentColor"
351
+ strokeWidth={2}
352
+ >
353
+ <path
354
+ strokeLinecap="round"
355
+ strokeLinejoin="round"
356
+ d="M15 10l4.553-2.276A1 1 0 0121 8.618v6.764a1 1 0 01-1.447.894L15 14M5 18h8a2 2 0 002-2V8a2 2 0 00-2-2H5a2 2 0 00-2 2v8a2 2 0 002 2z"
357
+ />
358
+ </svg>
359
+ Use Camera
360
+ </Button>
361
+
362
+ <Button
363
+ onClick={() => requestAccess("screen")}
364
+ className="w-full px-6 py-4 bg-gray-700 text-white shadow-lg hover:shadow-xl transition-all font-semibold tracking-wide hover:bg-gray-800 flex items-center justify-center gap-3"
365
+ >
366
+ <svg
367
+ className="w-6 h-6"
368
+ fill="none"
369
+ viewBox="0 0 24 24"
370
+ stroke="currentColor"
371
+ strokeWidth={2}
372
+ >
373
+ <path
374
+ strokeLinecap="round"
375
+ strokeLinejoin="round"
376
+ d="M9.75 17L9 20l-1 1h8l-1-1-.75-3M3 13h18M5 17h14a2 2 0 002-2V5a2 2 0 00-2-2H5a2 2 0 00-2 2v10a2 2 0 002 2z"
377
+ />
378
+ </svg>
379
+ Capture Screen
380
+ </Button>
381
+ </div>
382
+ )}
383
+
384
  {/* Error Actions */}
385
  {error && (
386
  <div className="animate-enter">
387
  <div className="flex justify-center mb-6">
388
  <Button
389
+ onClick={() => requestAccess(selectedSource || "camera")}
390
  disabled={isRequesting}
391
  className="px-8 py-3 text-white shadow-lg hover:shadow-xl transition-all font-semibold tracking-wide hover:bg-[var(--mistral-orange-dark)]"
392
  >
src/components/WelcomeScreen.tsx CHANGED
@@ -131,7 +131,7 @@ export default function WelcomeScreen({ onStart }: WelcomeScreenProps) {
131
  Private & Local
132
  </h4>
133
  <p className="text-gray-600 leading-relaxed">
134
- Your video feed is processed locally and never sent to a
135
  server, powered by
136
  <a href="https://github.com/huggingface/transformers.js">
137
  <span className="font-medium underline">
 
131
  Private & Local
132
  </h4>
133
  <p className="text-gray-600 leading-relaxed">
134
+ Your video source (camera or screen) is processed locally and never sent to a
135
  server, powered by
136
  <a href="https://github.com/huggingface/transformers.js">
137
  <span className="font-medium underline">