Spaces:
Running
Running
Commit
·
0623576
1
Parent(s):
63378c2
adding screen as input
Browse files- .gitignore +67 -0
- src/App.tsx +4 -2
- src/components/CaptioningView.tsx +3 -1
- src/components/WebcamCapture.tsx +3 -1
- src/components/WebcamPermissionDialog.tsx +74 -17
- src/components/WelcomeScreen.tsx +1 -1
.gitignore
ADDED
|
@@ -0,0 +1,67 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Logs
|
| 2 |
+
logs
|
| 3 |
+
*.log
|
| 4 |
+
npm-debug.log*
|
| 5 |
+
yarn-debug.log*
|
| 6 |
+
yarn-error.log*
|
| 7 |
+
pnpm-debug.log*
|
| 8 |
+
lerna-debug.log*
|
| 9 |
+
|
| 10 |
+
# Dependencies
|
| 11 |
+
node_modules
|
| 12 |
+
.pnp
|
| 13 |
+
.pnp.js
|
| 14 |
+
|
| 15 |
+
# Build outputs
|
| 16 |
+
dist
|
| 17 |
+
dist-ssr
|
| 18 |
+
*.local
|
| 19 |
+
|
| 20 |
+
# Editor directories and files
|
| 21 |
+
.vscode/*
|
| 22 |
+
!.vscode/extensions.json
|
| 23 |
+
.idea
|
| 24 |
+
.DS_Store
|
| 25 |
+
*.suo
|
| 26 |
+
*.ntvs*
|
| 27 |
+
*.njsproj
|
| 28 |
+
*.sln
|
| 29 |
+
*.sw?
|
| 30 |
+
|
| 31 |
+
# Environment variables
|
| 32 |
+
.env
|
| 33 |
+
.env.local
|
| 34 |
+
.env.development.local
|
| 35 |
+
.env.test.local
|
| 36 |
+
.env.production.local
|
| 37 |
+
|
| 38 |
+
# Testing
|
| 39 |
+
coverage
|
| 40 |
+
*.lcov
|
| 41 |
+
.nyc_output
|
| 42 |
+
|
| 43 |
+
# Temporary files
|
| 44 |
+
*.tmp
|
| 45 |
+
*.temp
|
| 46 |
+
.cache
|
| 47 |
+
|
| 48 |
+
# OS files
|
| 49 |
+
Thumbs.db
|
| 50 |
+
.DS_Store
|
| 51 |
+
|
| 52 |
+
# TypeScript
|
| 53 |
+
*.tsbuildinfo
|
| 54 |
+
|
| 55 |
+
# Optional npm cache directory
|
| 56 |
+
.npm
|
| 57 |
+
|
| 58 |
+
# Optional eslint cache
|
| 59 |
+
.eslintcache
|
| 60 |
+
|
| 61 |
+
# Optional stylelint cache
|
| 62 |
+
.stylelintcache
|
| 63 |
+
|
| 64 |
+
# Vite
|
| 65 |
+
.vite
|
| 66 |
+
vite.config.js.timestamp-*
|
| 67 |
+
vite.config.ts.timestamp-*
|
src/App.tsx
CHANGED
|
@@ -9,10 +9,12 @@ export default function App() {
|
|
| 9 |
const [appState, setAppState] = useState<AppState>("requesting-permission");
|
| 10 |
const [webcamStream, setWebcamStream] = useState<MediaStream | null>(null);
|
| 11 |
const [isVideoReady, setIsVideoReady] = useState(false);
|
|
|
|
| 12 |
const videoRef = useRef<HTMLVideoElement | null>(null);
|
| 13 |
|
| 14 |
-
const handlePermissionGranted = useCallback((stream: MediaStream) => {
|
| 15 |
setWebcamStream(stream);
|
|
|
|
| 16 |
setAppState("welcome");
|
| 17 |
}, []);
|
| 18 |
|
|
@@ -109,7 +111,7 @@ export default function App() {
|
|
| 109 |
<LoadingScreen onComplete={handleLoadingComplete} />
|
| 110 |
)}
|
| 111 |
|
| 112 |
-
{appState === "captioning" && <CaptioningView videoRef={videoRef} />}
|
| 113 |
</div>
|
| 114 |
);
|
| 115 |
}
|
|
|
|
| 9 |
const [appState, setAppState] = useState<AppState>("requesting-permission");
|
| 10 |
const [webcamStream, setWebcamStream] = useState<MediaStream | null>(null);
|
| 11 |
const [isVideoReady, setIsVideoReady] = useState(false);
|
| 12 |
+
const [sourceType, setSourceType] = useState<"camera" | "screen">("camera");
|
| 13 |
const videoRef = useRef<HTMLVideoElement | null>(null);
|
| 14 |
|
| 15 |
+
const handlePermissionGranted = useCallback((stream: MediaStream, source: "camera" | "screen") => {
|
| 16 |
setWebcamStream(stream);
|
| 17 |
+
setSourceType(source);
|
| 18 |
setAppState("welcome");
|
| 19 |
}, []);
|
| 20 |
|
|
|
|
| 111 |
<LoadingScreen onComplete={handleLoadingComplete} />
|
| 112 |
)}
|
| 113 |
|
| 114 |
+
{appState === "captioning" && <CaptioningView videoRef={videoRef} sourceType={sourceType} />}
|
| 115 |
</div>
|
| 116 |
);
|
| 117 |
}
|
src/components/CaptioningView.tsx
CHANGED
|
@@ -7,6 +7,7 @@ import { PROMPTS, TIMING } from "../constants";
|
|
| 7 |
|
| 8 |
interface CaptioningViewProps {
|
| 9 |
videoRef: React.RefObject<HTMLVideoElement | null>;
|
|
|
|
| 10 |
}
|
| 11 |
|
| 12 |
function useCaptioningLoop(
|
|
@@ -94,7 +95,7 @@ function useCaptioningLoop(
|
|
| 94 |
}, [isRunning, isLoaded, runInference, promptRef, videoRef]);
|
| 95 |
}
|
| 96 |
|
| 97 |
-
export default function CaptioningView({ videoRef }: CaptioningViewProps) {
|
| 98 |
const { imageSize, setImageSize } = useVLMContext();
|
| 99 |
const [caption, setCaption] = useState<string>("");
|
| 100 |
const [isLoopRunning, setIsLoopRunning] = useState<boolean>(true);
|
|
@@ -177,6 +178,7 @@ export default function CaptioningView({ videoRef }: CaptioningViewProps) {
|
|
| 177 |
error={error}
|
| 178 |
imageSize={imageSize}
|
| 179 |
onImageSizeChange={setImageSize}
|
|
|
|
| 180 |
/>
|
| 181 |
|
| 182 |
{/* Prompt Input - Bottom Left */}
|
|
|
|
| 7 |
|
| 8 |
interface CaptioningViewProps {
|
| 9 |
videoRef: React.RefObject<HTMLVideoElement | null>;
|
| 10 |
+
sourceType: "camera" | "screen";
|
| 11 |
}
|
| 12 |
|
| 13 |
function useCaptioningLoop(
|
|
|
|
| 95 |
}, [isRunning, isLoaded, runInference, promptRef, videoRef]);
|
| 96 |
}
|
| 97 |
|
| 98 |
+
export default function CaptioningView({ videoRef, sourceType }: CaptioningViewProps) {
|
| 99 |
const { imageSize, setImageSize } = useVLMContext();
|
| 100 |
const [caption, setCaption] = useState<string>("");
|
| 101 |
const [isLoopRunning, setIsLoopRunning] = useState<boolean>(true);
|
|
|
|
| 178 |
error={error}
|
| 179 |
imageSize={imageSize}
|
| 180 |
onImageSizeChange={setImageSize}
|
| 181 |
+
sourceType={sourceType}
|
| 182 |
/>
|
| 183 |
|
| 184 |
{/* Prompt Input - Bottom Left */}
|
src/components/WebcamCapture.tsx
CHANGED
|
@@ -7,6 +7,7 @@ interface WebcamCaptureProps {
|
|
| 7 |
error?: string | null;
|
| 8 |
imageSize?: number;
|
| 9 |
onImageSizeChange?: (size: number) => void;
|
|
|
|
| 10 |
}
|
| 11 |
|
| 12 |
export default function WebcamCapture({
|
|
@@ -15,6 +16,7 @@ export default function WebcamCapture({
|
|
| 15 |
error,
|
| 16 |
imageSize,
|
| 17 |
onImageSizeChange,
|
|
|
|
| 18 |
}: WebcamCaptureProps) {
|
| 19 |
const hasError = Boolean(error);
|
| 20 |
|
|
@@ -26,7 +28,7 @@ export default function WebcamCapture({
|
|
| 26 |
}
|
| 27 |
: isRunning
|
| 28 |
? {
|
| 29 |
-
text: "LIVE FEED",
|
| 30 |
color: "bg-[var(--mistral-orange)] animate-pulse",
|
| 31 |
border: "border-[var(--mistral-orange)]",
|
| 32 |
}
|
|
|
|
| 7 |
error?: string | null;
|
| 8 |
imageSize?: number;
|
| 9 |
onImageSizeChange?: (size: number) => void;
|
| 10 |
+
sourceType: "camera" | "screen";
|
| 11 |
}
|
| 12 |
|
| 13 |
export default function WebcamCapture({
|
|
|
|
| 16 |
error,
|
| 17 |
imageSize,
|
| 18 |
onImageSizeChange,
|
| 19 |
+
sourceType,
|
| 20 |
}: WebcamCaptureProps) {
|
| 21 |
const hasError = Boolean(error);
|
| 22 |
|
|
|
|
| 28 |
}
|
| 29 |
: isRunning
|
| 30 |
? {
|
| 31 |
+
text: sourceType === "screen" ? "SCREEN CAPTURE" : "LIVE FEED",
|
| 32 |
color: "bg-[var(--mistral-orange)] animate-pulse",
|
| 33 |
border: "border-[var(--mistral-orange)]",
|
| 34 |
}
|
src/components/WebcamPermissionDialog.tsx
CHANGED
|
@@ -17,13 +17,15 @@ const VIDEO_CONSTRAINTS = {
|
|
| 17 |
},
|
| 18 |
};
|
| 19 |
|
|
|
|
|
|
|
| 20 |
interface ErrorInfo {
|
| 21 |
type: (typeof ERROR_TYPES)[keyof typeof ERROR_TYPES];
|
| 22 |
message: string;
|
| 23 |
}
|
| 24 |
|
| 25 |
interface WebcamPermissionDialogProps {
|
| 26 |
-
onPermissionGranted: (stream: MediaStream) => void;
|
| 27 |
}
|
| 28 |
|
| 29 |
export default function WebcamPermissionDialog({
|
|
@@ -31,6 +33,7 @@ export default function WebcamPermissionDialog({
|
|
| 31 |
}: WebcamPermissionDialogProps) {
|
| 32 |
const [isRequesting, setIsRequesting] = useState(false);
|
| 33 |
const [error, setError] = useState<ErrorInfo | null>(null);
|
|
|
|
| 34 |
|
| 35 |
const [mounted, setMounted] = useState(false);
|
| 36 |
useEffect(() => setMounted(true), []);
|
|
@@ -91,31 +94,40 @@ export default function WebcamPermissionDialog({
|
|
| 91 |
};
|
| 92 |
};
|
| 93 |
|
| 94 |
-
const
|
| 95 |
setIsRequesting(true);
|
| 96 |
setError(null);
|
|
|
|
| 97 |
|
| 98 |
try {
|
| 99 |
-
|
| 100 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 101 |
}
|
| 102 |
|
| 103 |
-
|
| 104 |
-
await navigator.mediaDevices.getUserMedia(VIDEO_CONSTRAINTS);
|
| 105 |
-
onPermissionGranted(stream);
|
| 106 |
} catch (err) {
|
| 107 |
const errorInfo = getErrorInfo(err);
|
| 108 |
setError(errorInfo);
|
| 109 |
-
console.error(
|
| 110 |
} finally {
|
| 111 |
setIsRequesting(false);
|
| 112 |
}
|
| 113 |
}, [onPermissionGranted]);
|
| 114 |
|
| 115 |
-
useEffect(() => {
|
| 116 |
-
requestWebcamAccess();
|
| 117 |
-
}, [requestWebcamAccess]);
|
| 118 |
-
|
| 119 |
const troubleshootingData = useMemo(
|
| 120 |
() => ({
|
| 121 |
[ERROR_TYPES.HTTPS]: {
|
|
@@ -209,15 +221,15 @@ export default function WebcamPermissionDialog({
|
|
| 209 |
};
|
| 210 |
|
| 211 |
const getTitle = () => {
|
| 212 |
-
if (isRequesting) return "Initialize Camera";
|
| 213 |
if (error) return "Connection Failed";
|
| 214 |
-
return "
|
| 215 |
};
|
| 216 |
|
| 217 |
const getDescription = () => {
|
| 218 |
-
if (isRequesting) return "Requesting access to video
|
| 219 |
if (error) return error.message;
|
| 220 |
-
return "
|
| 221 |
};
|
| 222 |
|
| 223 |
return (
|
|
@@ -324,12 +336,57 @@ export default function WebcamPermissionDialog({
|
|
| 324 |
</p>
|
| 325 |
</div>
|
| 326 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 327 |
{/* Error Actions */}
|
| 328 |
{error && (
|
| 329 |
<div className="animate-enter">
|
| 330 |
<div className="flex justify-center mb-6">
|
| 331 |
<Button
|
| 332 |
-
onClick={
|
| 333 |
disabled={isRequesting}
|
| 334 |
className="px-8 py-3 text-white shadow-lg hover:shadow-xl transition-all font-semibold tracking-wide hover:bg-[var(--mistral-orange-dark)]"
|
| 335 |
>
|
|
|
|
| 17 |
},
|
| 18 |
};
|
| 19 |
|
| 20 |
+
type SourceType = "camera" | "screen";
|
| 21 |
+
|
| 22 |
interface ErrorInfo {
|
| 23 |
type: (typeof ERROR_TYPES)[keyof typeof ERROR_TYPES];
|
| 24 |
message: string;
|
| 25 |
}
|
| 26 |
|
| 27 |
interface WebcamPermissionDialogProps {
|
| 28 |
+
onPermissionGranted: (stream: MediaStream, sourceType: SourceType) => void;
|
| 29 |
}
|
| 30 |
|
| 31 |
export default function WebcamPermissionDialog({
|
|
|
|
| 33 |
}: WebcamPermissionDialogProps) {
|
| 34 |
const [isRequesting, setIsRequesting] = useState(false);
|
| 35 |
const [error, setError] = useState<ErrorInfo | null>(null);
|
| 36 |
+
const [selectedSource, setSelectedSource] = useState<SourceType | null>(null);
|
| 37 |
|
| 38 |
const [mounted, setMounted] = useState(false);
|
| 39 |
useEffect(() => setMounted(true), []);
|
|
|
|
| 94 |
};
|
| 95 |
};
|
| 96 |
|
| 97 |
+
const requestAccess = useCallback(async (sourceType: SourceType) => {
|
| 98 |
setIsRequesting(true);
|
| 99 |
setError(null);
|
| 100 |
+
setSelectedSource(sourceType);
|
| 101 |
|
| 102 |
try {
|
| 103 |
+
let stream: MediaStream;
|
| 104 |
+
|
| 105 |
+
if (sourceType === "camera") {
|
| 106 |
+
if (!navigator.mediaDevices?.getUserMedia) {
|
| 107 |
+
throw new Error("NOT_SUPPORTED");
|
| 108 |
+
}
|
| 109 |
+
stream = await navigator.mediaDevices.getUserMedia(VIDEO_CONSTRAINTS);
|
| 110 |
+
} else {
|
| 111 |
+
// Screen capture
|
| 112 |
+
if (!navigator.mediaDevices?.getDisplayMedia) {
|
| 113 |
+
throw new Error("NOT_SUPPORTED");
|
| 114 |
+
}
|
| 115 |
+
stream = await navigator.mediaDevices.getDisplayMedia({
|
| 116 |
+
video: true,
|
| 117 |
+
audio: false,
|
| 118 |
+
} as DisplayMediaStreamOptions);
|
| 119 |
}
|
| 120 |
|
| 121 |
+
onPermissionGranted(stream, sourceType);
|
|
|
|
|
|
|
| 122 |
} catch (err) {
|
| 123 |
const errorInfo = getErrorInfo(err);
|
| 124 |
setError(errorInfo);
|
| 125 |
+
console.error(`Error accessing ${sourceType}:`, err, errorInfo);
|
| 126 |
} finally {
|
| 127 |
setIsRequesting(false);
|
| 128 |
}
|
| 129 |
}, [onPermissionGranted]);
|
| 130 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 131 |
const troubleshootingData = useMemo(
|
| 132 |
() => ({
|
| 133 |
[ERROR_TYPES.HTTPS]: {
|
|
|
|
| 221 |
};
|
| 222 |
|
| 223 |
const getTitle = () => {
|
| 224 |
+
if (isRequesting) return selectedSource === "screen" ? "Initialize Screen Capture" : "Initialize Camera";
|
| 225 |
if (error) return "Connection Failed";
|
| 226 |
+
return "Select Video Source";
|
| 227 |
};
|
| 228 |
|
| 229 |
const getDescription = () => {
|
| 230 |
+
if (isRequesting) return "Requesting access to video source...";
|
| 231 |
if (error) return error.message;
|
| 232 |
+
return "Choose your video source for real-time visual inference.";
|
| 233 |
};
|
| 234 |
|
| 235 |
return (
|
|
|
|
| 336 |
</p>
|
| 337 |
</div>
|
| 338 |
|
| 339 |
+
{/* Source Selection Buttons */}
|
| 340 |
+
{!isRequesting && !error && (
|
| 341 |
+
<div className="flex flex-col gap-3">
|
| 342 |
+
<Button
|
| 343 |
+
onClick={() => requestAccess("camera")}
|
| 344 |
+
className="w-full px-6 py-4 text-white shadow-lg hover:shadow-xl transition-all font-semibold tracking-wide hover:bg-[var(--mistral-orange-dark)] flex items-center justify-center gap-3"
|
| 345 |
+
>
|
| 346 |
+
<svg
|
| 347 |
+
className="w-6 h-6"
|
| 348 |
+
fill="none"
|
| 349 |
+
viewBox="0 0 24 24"
|
| 350 |
+
stroke="currentColor"
|
| 351 |
+
strokeWidth={2}
|
| 352 |
+
>
|
| 353 |
+
<path
|
| 354 |
+
strokeLinecap="round"
|
| 355 |
+
strokeLinejoin="round"
|
| 356 |
+
d="M15 10l4.553-2.276A1 1 0 0121 8.618v6.764a1 1 0 01-1.447.894L15 14M5 18h8a2 2 0 002-2V8a2 2 0 00-2-2H5a2 2 0 00-2 2v8a2 2 0 002 2z"
|
| 357 |
+
/>
|
| 358 |
+
</svg>
|
| 359 |
+
Use Camera
|
| 360 |
+
</Button>
|
| 361 |
+
|
| 362 |
+
<Button
|
| 363 |
+
onClick={() => requestAccess("screen")}
|
| 364 |
+
className="w-full px-6 py-4 bg-gray-700 text-white shadow-lg hover:shadow-xl transition-all font-semibold tracking-wide hover:bg-gray-800 flex items-center justify-center gap-3"
|
| 365 |
+
>
|
| 366 |
+
<svg
|
| 367 |
+
className="w-6 h-6"
|
| 368 |
+
fill="none"
|
| 369 |
+
viewBox="0 0 24 24"
|
| 370 |
+
stroke="currentColor"
|
| 371 |
+
strokeWidth={2}
|
| 372 |
+
>
|
| 373 |
+
<path
|
| 374 |
+
strokeLinecap="round"
|
| 375 |
+
strokeLinejoin="round"
|
| 376 |
+
d="M9.75 17L9 20l-1 1h8l-1-1-.75-3M3 13h18M5 17h14a2 2 0 002-2V5a2 2 0 00-2-2H5a2 2 0 00-2 2v10a2 2 0 002 2z"
|
| 377 |
+
/>
|
| 378 |
+
</svg>
|
| 379 |
+
Capture Screen
|
| 380 |
+
</Button>
|
| 381 |
+
</div>
|
| 382 |
+
)}
|
| 383 |
+
|
| 384 |
{/* Error Actions */}
|
| 385 |
{error && (
|
| 386 |
<div className="animate-enter">
|
| 387 |
<div className="flex justify-center mb-6">
|
| 388 |
<Button
|
| 389 |
+
onClick={() => requestAccess(selectedSource || "camera")}
|
| 390 |
disabled={isRequesting}
|
| 391 |
className="px-8 py-3 text-white shadow-lg hover:shadow-xl transition-all font-semibold tracking-wide hover:bg-[var(--mistral-orange-dark)]"
|
| 392 |
>
|
src/components/WelcomeScreen.tsx
CHANGED
|
@@ -131,7 +131,7 @@ export default function WelcomeScreen({ onStart }: WelcomeScreenProps) {
|
|
| 131 |
Private & Local
|
| 132 |
</h4>
|
| 133 |
<p className="text-gray-600 leading-relaxed">
|
| 134 |
-
Your video
|
| 135 |
server, powered by
|
| 136 |
<a href="https://github.com/huggingface/transformers.js">
|
| 137 |
<span className="font-medium underline">
|
|
|
|
| 131 |
Private & Local
|
| 132 |
</h4>
|
| 133 |
<p className="text-gray-600 leading-relaxed">
|
| 134 |
+
Your video source (camera or screen) is processed locally and never sent to a
|
| 135 |
server, powered by
|
| 136 |
<a href="https://github.com/huggingface/transformers.js">
|
| 137 |
<span className="font-medium underline">
|