Spaces:
Running
on
Zero
Running
on
Zero
| import gradio as gr | |
| import os | |
| import json | |
| import numpy as np | |
| import cv2 | |
| import base64 | |
| import requests | |
| import time | |
| from typing import List, Tuple | |
| from gradio_client.utils import handle_file | |
| from pathlib import Path | |
| # Backend Space URL - replace with your actual backend space URL | |
| BACKEND_SPACE_URL = "Yuxihenry/SpatialTrackerV2_Backend" # Replace with actual backend space URL | |
| hf_token = os.getenv("HF_TOKEN") # Replace with your actual Hugging Face token | |
| # Debug information | |
| print(f"🔧 Environment Debug Info:") | |
| print(f" - Backend URL: {BACKEND_SPACE_URL}") | |
| print(f" - HF Token available: {'Yes' if hf_token else 'No'}") | |
| print(f" - HF Token length: {len(hf_token) if hf_token else 0}") | |
| # Flag to track if backend is available | |
| BACKEND_AVAILABLE = False | |
| backend_client = None | |
| def check_user_permissions(): | |
| """Check if user has necessary permissions""" | |
| print("🔐 Checking user permissions...") | |
| if not hf_token: | |
| print("❌ No HF Token found") | |
| print("🔧 To get a token:") | |
| print(" 1. Go to https://huggingface.co/settings/tokens") | |
| print(" 2. Create a new token with 'read' permissions") | |
| print(" 3. Set it as environment variable: export HF_TOKEN='your_token'") | |
| return False | |
| # Try to access user info | |
| try: | |
| headers = {'Authorization': f'Bearer {hf_token}'} | |
| response = requests.get('https://huggingface.co/api/whoami', headers=headers, timeout=5) | |
| if response.status_code == 200: | |
| user_info = response.json() | |
| username = user_info.get('name', 'Unknown') | |
| print(f"✅ Authenticated as: {username}") | |
| # Check if user has access to the specific space | |
| space_url = f"https://huggingface.co/api/spaces/{BACKEND_SPACE_URL}" | |
| space_response = requests.get(space_url, headers=headers, timeout=5) | |
| if space_response.status_code == 200: | |
| print("✅ You have access to the backend Space") | |
| return True | |
| elif space_response.status_code == 401: | |
| print("❌ You don't have access to the backend Space") | |
| print("🔧 Solutions:") | |
| print(" 1. Contact the Space owner to add you as collaborator") | |
| print(" 2. Ask the owner to make the Space public") | |
| return False | |
| elif space_response.status_code == 404: | |
| print("❌ Backend Space not found") | |
| print("🔧 Please check if the Space URL is correct") | |
| return False | |
| else: | |
| print(f"⚠️ Unexpected response checking Space access: {space_response.status_code}") | |
| return False | |
| else: | |
| print(f"❌ Token validation failed: {response.status_code}") | |
| print("🔧 Your token might be invalid or expired") | |
| return False | |
| except Exception as e: | |
| print(f"❌ Error checking permissions: {e}") | |
| return False | |
| def check_backend_space_status(): | |
| """Check if backend space is running via HTTP request""" | |
| try: | |
| backend_url = f"https://huggingface.co/spaces/{BACKEND_SPACE_URL}" | |
| print(f"🔍 Checking backend space status: {backend_url}") | |
| # Prepare headers with authentication if token is available | |
| headers = {} | |
| if hf_token: | |
| headers['Authorization'] = f'Bearer {hf_token}' | |
| print(f"🔐 Using HF Token for authentication") | |
| # Try to access the space page | |
| response = requests.get(backend_url, headers=headers, timeout=10) | |
| if response.status_code == 200: | |
| print("✅ Backend space page is accessible") | |
| # Check if space is running (look for common indicators) | |
| page_content = response.text.lower() | |
| if "runtime error" in page_content: | |
| print("❌ Backend space has runtime error") | |
| return False | |
| elif "building" in page_content: | |
| print("🔄 Backend space is building...") | |
| return False | |
| elif "sleeping" in page_content: | |
| print("😴 Backend space is sleeping") | |
| return False | |
| else: | |
| print("✅ Backend space appears to be running") | |
| return True | |
| elif response.status_code == 401: | |
| print("❌ Authentication failed (HTTP 401)") | |
| print("🔧 This means:") | |
| print(" - The backend Space is private") | |
| print(" - Your HF Token doesn't have access to this Space") | |
| print(" - You need to be added as a collaborator to the Space") | |
| print(" - Or the Space owner needs to make it public") | |
| return False | |
| elif response.status_code == 404: | |
| print("❌ Backend space not found (HTTP 404)") | |
| print("🔧 Please check if the Space URL is correct:") | |
| print(f" Current URL: {BACKEND_SPACE_URL}") | |
| return False | |
| else: | |
| print(f"❌ Backend space not accessible (HTTP {response.status_code})") | |
| print(f"🔧 Response: {response.text[:200]}...") | |
| return False | |
| except requests.RequestException as e: | |
| print(f"❌ Failed to check backend space status: {e}") | |
| return False | |
| except Exception as e: | |
| print(f"❌ Unexpected error checking backend: {e}") | |
| return False | |
| def initialize_backend(): | |
| """Initialize backend connection using gradio_client""" | |
| global backend_client, BACKEND_AVAILABLE | |
| try: | |
| from gradio_client import Client | |
| # Connect to HF Space | |
| if hf_token: | |
| backend_client = Client(BACKEND_SPACE_URL, hf_token=hf_token) | |
| else: | |
| backend_client = Client(BACKEND_SPACE_URL) | |
| # Test the connection | |
| backend_client.view_api() | |
| BACKEND_AVAILABLE = True | |
| return True | |
| except Exception as e: | |
| print(f"❌ Backend connection failed: {e}") | |
| BACKEND_AVAILABLE = False | |
| return False | |
| def numpy_to_base64(arr): | |
| """Convert numpy array to base64 string""" | |
| return base64.b64encode(arr.tobytes()).decode('utf-8') | |
| def base64_to_numpy(b64_str, shape, dtype): | |
| """Convert base64 string back to numpy array""" | |
| return np.frombuffer(base64.b64decode(b64_str), dtype=dtype).reshape(shape) | |
| def base64_to_image(b64_str): | |
| """Convert base64 string to numpy image array""" | |
| if not b64_str: | |
| return None | |
| try: | |
| # Decode base64 to bytes | |
| img_bytes = base64.b64decode(b64_str) | |
| # Convert bytes to numpy array | |
| nparr = np.frombuffer(img_bytes, np.uint8) | |
| # Decode image | |
| img = cv2.imdecode(nparr, cv2.IMREAD_COLOR) | |
| # Convert BGR to RGB | |
| img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) | |
| return img | |
| except Exception as e: | |
| print(f"Error converting base64 to image: {e}") | |
| return None | |
| def get_video_name(video_path): | |
| """Extract video name without extension""" | |
| return os.path.splitext(os.path.basename(video_path))[0] | |
| def extract_first_frame(video_path): | |
| """Extract first frame from video file""" | |
| try: | |
| cap = cv2.VideoCapture(video_path) | |
| ret, frame = cap.read() | |
| cap.release() | |
| if ret: | |
| # Convert BGR to RGB | |
| frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) | |
| return frame_rgb | |
| else: | |
| return None | |
| except Exception as e: | |
| print(f"Error extracting first frame: {e}") | |
| return None | |
| def handle_video_upload(video): | |
| """Handle video upload and extract first frame""" | |
| if video is None: | |
| return (None, None, [], | |
| gr.update(value=50), | |
| gr.update(value=756), | |
| gr.update(value=3)) | |
| try: | |
| if BACKEND_AVAILABLE and backend_client: | |
| # Try to use backend API | |
| try: | |
| print("🔧 Calling backend API for video upload...") | |
| # Call the unified API with upload_video function type - fix: use handle_file wrapper | |
| result = backend_client.predict( | |
| "upload_video", # function_type | |
| handle_file(video), # video file - wrapped with handle_file | |
| "", # original_image_state (not used for upload) | |
| [], # selected_points (not used for upload) | |
| "positive_point", # point_type (not used for upload) | |
| 0, # point_x (not used for upload) | |
| 0, # point_y (not used for upload) | |
| 50, # grid_size (not used for upload) | |
| 756, # vo_points (not used for upload) | |
| 3, # fps (not used for upload) | |
| api_name="/unified_api" | |
| ) | |
| print(f"✅ Backend video upload API call successful!") | |
| print(f"🔧 Result type: {type(result)}") | |
| print(f"🔧 Result: {result}") | |
| # Parse the result - expect a dict with success status | |
| if isinstance(result, dict) and result.get("success"): | |
| # Extract data from backend response | |
| original_image_state = result.get("original_image_state", "") | |
| display_image = result.get("display_image", None) | |
| selected_points = result.get("selected_points", []) | |
| # Fix: Convert display_image from list back to numpy array if needed | |
| if isinstance(display_image, list): | |
| display_image = np.array(display_image, dtype=np.uint8) | |
| print(f"🔧 Converted display_image from list to numpy array: {display_image.shape}") | |
| # Get video settings based on video name | |
| video_name = get_video_name(video) | |
| print(f"🎬 Video path: '{video}' -> Video name: '{video_name}'") | |
| grid_size_val, vo_points_val, fps_val = get_video_settings(video_name) | |
| print(f"🎬 Video settings for '{video_name}': grid_size={grid_size_val}, vo_points={vo_points_val}, fps={fps_val}") | |
| return (original_image_state, display_image, selected_points, | |
| gr.update(value=grid_size_val), | |
| gr.update(value=vo_points_val), | |
| gr.update(value=fps_val)) | |
| else: | |
| print("Backend processing failed, using local fallback") | |
| # Fallback to local processing | |
| pass | |
| except Exception as e: | |
| print(f"Backend API call failed: {e}") | |
| # Fallback to local processing | |
| pass | |
| # Fallback: local processing | |
| print("Using local video processing...") | |
| display_image = extract_first_frame(video) | |
| if display_image is not None: | |
| # Create a state format compatible with backend | |
| import tempfile | |
| import shutil | |
| # Create a temporary directory for this session | |
| session_id = str(int(time.time() * 1000)) # Use timestamp as session ID | |
| temp_dir = os.path.join("temp_frontend", f"session_{session_id}") | |
| os.makedirs(temp_dir, exist_ok=True) | |
| # Copy video to temp directory with standardized name | |
| video_name = get_video_name(video) | |
| temp_video_path = os.path.join(temp_dir, f"{video_name}.mp4") | |
| shutil.copy(video, temp_video_path) | |
| # Create state format compatible with backend | |
| frame_data = { | |
| 'data': numpy_to_base64(display_image), | |
| 'shape': display_image.shape, | |
| 'dtype': str(display_image.dtype), | |
| 'temp_dir': temp_dir, | |
| 'video_name': video_name, | |
| 'video_path': temp_video_path # Keep for backward compatibility | |
| } | |
| original_image_state = json.dumps(frame_data) | |
| else: | |
| # Fallback to simple state if frame extraction fails | |
| original_image_state = json.dumps({ | |
| "video_path": video, | |
| "frame": "local_processing_failed" | |
| }) | |
| # Get video settings | |
| video_name = get_video_name(video) | |
| print(f"🎬 Local fallback - Video path: '{video}' -> Video name: '{video_name}'") | |
| grid_size_val, vo_points_val, fps_val = get_video_settings(video_name) | |
| print(f"🎬 Local fallback - Video settings for '{video_name}': grid_size={grid_size_val}, vo_points={vo_points_val}, fps={fps_val}") | |
| return (original_image_state, display_image, [], | |
| gr.update(value=grid_size_val), | |
| gr.update(value=vo_points_val), | |
| gr.update(value=fps_val)) | |
| except Exception as e: | |
| print(f"Error in handle_video_upload: {e}") | |
| return (None, None, [], | |
| gr.update(value=50), | |
| gr.update(value=756), | |
| gr.update(value=3)) | |
| def select_point(original_img: str, sel_pix: list, point_type: str, evt: gr.SelectData): | |
| """Handle point selection for SAM""" | |
| if original_img is None: | |
| return None, [] | |
| try: | |
| if BACKEND_AVAILABLE and backend_client: | |
| # Try to use backend API | |
| try: | |
| print(f"🔧 Calling backend select point API: x={evt.index[0]}, y={evt.index[1]}, type={point_type}") | |
| # Call the unified API with select_point function type | |
| result = backend_client.predict( | |
| "select_point", # function_type | |
| None, # video file (not used for select_point) | |
| original_img, # original_image_state | |
| sel_pix, # selected_points | |
| point_type, # point_type | |
| evt.index[0], # point_x | |
| evt.index[1], # point_y | |
| 50, # grid_size (not used for select_point) | |
| 756, # vo_points (not used for select_point) | |
| 3, # fps (not used for select_point) | |
| api_name="/unified_api" | |
| ) | |
| print(f"✅ Backend select point API call successful!") | |
| print(f"🔧 Result type: {type(result)}") | |
| print(f"🔧 Result: {result}") | |
| # Parse the result - expect a dict with success status | |
| if isinstance(result, dict) and result.get("success"): | |
| display_image = result.get("display_image", None) | |
| new_sel_pix = result.get("selected_points", sel_pix) | |
| # Fix: Convert display_image from list back to numpy array if needed | |
| if isinstance(display_image, list): | |
| display_image = np.array(display_image, dtype=np.uint8) | |
| print(f"🔧 Converted display_image from list to numpy array: {display_image.shape}") | |
| return display_image, new_sel_pix | |
| else: | |
| print("Backend processing failed, using local fallback") | |
| # Fallback to local processing | |
| pass | |
| except Exception as e: | |
| print(f"Backend API call failed: {e}") | |
| # Check for specific gradio_client errors | |
| if "AppError" in str(type(e)): | |
| print("🔧 Backend Space has internal errors (AppError)") | |
| print("🔧 The backend Space code has bugs or configuration issues") | |
| print("🔧 Contact the Space owner to fix the backend implementation") | |
| elif "Could not fetch config" in str(e): | |
| print("🔧 Config fetch failed - possible Gradio version mismatch") | |
| print("🔧 Frontend and backend may be using incompatible Gradio versions") | |
| elif "timeout" in str(e).lower(): | |
| print("🔧 Backend request timed out - Space might be overloaded") | |
| else: | |
| print(f"🔧 Unexpected error type: {type(e).__name__}") | |
| print("🔄 Showing error message instead of visualization...") | |
| # Fallback to local processing | |
| pass | |
| # Fallback: local processing with improved visualization | |
| print("Using local point selection with enhanced visualization...") | |
| # Parse original image state | |
| try: | |
| state_data = json.loads(original_img) | |
| video_path = state_data.get("video_path") | |
| except: | |
| video_path = None | |
| if video_path: | |
| # Re-extract frame and add point with mask visualization | |
| display_image = extract_first_frame(video_path) | |
| if display_image is not None: | |
| # Add point to the image with enhanced visualization | |
| x, y = evt.index[0], evt.index[1] | |
| color = (0, 255, 0) if point_type == 'positive_point' else (255, 0, 0) | |
| # Draw a larger, more visible point | |
| cv2.circle(display_image, (x, y), 8, color, -1) | |
| cv2.circle(display_image, (x, y), 12, (255, 255, 255), 2) | |
| # Add point to selected points list - fix logic to match local version | |
| new_sel_pix = sel_pix.copy() if sel_pix else [] | |
| new_sel_pix.append([x, y, point_type]) | |
| return display_image, new_sel_pix | |
| return None, [] | |
| except Exception as e: | |
| print(f"Error in select_point: {e}") | |
| return None, [] | |
| def reset_points(original_img: str, sel_pix): | |
| """Reset points and restore original image""" | |
| if original_img is None: | |
| return None, [] | |
| try: | |
| if BACKEND_AVAILABLE and backend_client: | |
| # Try to use backend API | |
| try: | |
| print("🔧 Calling backend reset points API...") | |
| # Call the unified API with reset_points function type | |
| result = backend_client.predict( | |
| "reset_points", # function_type | |
| None, # video file (not used for reset_points) | |
| original_img, # original_image_state | |
| sel_pix, # selected_points | |
| "positive_point", # point_type (not used for reset_points) | |
| 0, # point_x (not used for reset_points) | |
| 0, # point_y (not used for reset_points) | |
| 50, # grid_size (not used for reset_points) | |
| 756, # vo_points (not used for reset_points) | |
| 3, # fps (not used for reset_points) | |
| api_name="/unified_api" | |
| ) | |
| print(f"✅ Backend reset points API call successful!") | |
| print(f"🔧 Result: {result}") | |
| # Parse the result | |
| if isinstance(result, dict) and result.get("success"): | |
| display_image = result.get("display_image", None) | |
| new_sel_pix = result.get("selected_points", []) | |
| # Fix: Convert display_image from list back to numpy array if needed | |
| if isinstance(display_image, list): | |
| display_image = np.array(display_image, dtype=np.uint8) | |
| print(f"🔧 Converted display_image from list to numpy array: {display_image.shape}") | |
| return display_image, new_sel_pix | |
| else: | |
| print("Backend processing failed, using local fallback") | |
| # Fallback to local processing | |
| pass | |
| except Exception as e: | |
| print(f"Backend API call failed: {e}") | |
| # Fallback to local processing | |
| pass | |
| # Fallback: local processing | |
| print("Using local reset points...") | |
| # Parse original image state | |
| try: | |
| state_data = json.loads(original_img) | |
| video_path = state_data.get("video_path") | |
| except: | |
| video_path = None | |
| if video_path: | |
| # Re-extract original frame | |
| display_image = extract_first_frame(video_path) | |
| return display_image, [] | |
| return None, [] | |
| except Exception as e: | |
| print(f"Error in reset_points: {e}") | |
| return None, [] | |
| gr.set_static_paths(paths=[Path.cwd().absolute()/"_viz"]) | |
| def launch_viz(grid_size, vo_points, fps, original_image_state): | |
| """Launch visualization with user-specific temp directory""" | |
| if original_image_state is None: | |
| return None, None, None | |
| try: | |
| if BACKEND_AVAILABLE and backend_client: | |
| # Try to use backend API | |
| try: | |
| print(f"🔧 Calling backend API with parameters: grid_size={grid_size}, vo_points={vo_points}, fps={fps}") | |
| print(f"🔧 Original image state type: {type(original_image_state)}") | |
| print(f"🔧 Original image state preview: {str(original_image_state)[:100]}...") | |
| # Validate and potentially fix the original_image_state format | |
| state_to_send = original_image_state | |
| # Check if this is a local processing state that needs to be converted | |
| try: | |
| if isinstance(original_image_state, str): | |
| parsed_state = json.loads(original_image_state) | |
| if "video_path" in parsed_state and "frame" in parsed_state: | |
| # This is a local processing state, we need to handle differently | |
| print("🔧 Detected local processing state, cannot use backend for tracking") | |
| print("🔧 Backend requires proper video upload state from backend API") | |
| # Fall through to local processing | |
| raise ValueError("Local state cannot be processed by backend") | |
| except json.JSONDecodeError: | |
| print("🔧 Invalid JSON state, cannot send to backend") | |
| raise ValueError("Invalid state format") | |
| # Call the unified API with run_tracker function type | |
| result = backend_client.predict( | |
| "run_tracker", # function_type | |
| None, # video file (not used for run_tracker) | |
| state_to_send, # original_image_state | |
| [], # selected_points (not used for run_tracker) | |
| "positive_point", # point_type (not used for run_tracker) | |
| 0, # point_x (not used for run_tracker) | |
| 0, # point_y (not used for run_tracker) | |
| grid_size, # grid_size | |
| vo_points, # vo_points | |
| fps, # fps | |
| api_name="/unified_api" | |
| ) | |
| print(f"✅ Backend API call successful!") | |
| print(f"🔧 Result type: {type(result)}") | |
| print(f"🔧 Result: {result}") | |
| # Parse the result | |
| if isinstance(result, dict) and result.get("success"): | |
| viz_html = result.get("viz_html", "") | |
| track_video_path = result.get("track_video_path", "") | |
| track_video_content = result.get("track_video_content", None) | |
| track_video_filename = result.get("track_video_filename", "tracked_video.mp4") | |
| # Save HTML to _viz directory (like local version) | |
| viz_dir = './_viz' | |
| os.makedirs(viz_dir, exist_ok=True) | |
| random_path = f'./_viz/_{time.time()}.html' | |
| with open(random_path, 'w', encoding='utf-8') as f: | |
| f.write(viz_html) | |
| # Create iframe HTML to display the saved file | |
| # Create iframe HTML | |
| iframe_html = f""" | |
| <div style='border: 3px solid #667eea; border-radius: 10px; overflow: hidden; box-shadow: 0 8px 32px rgba(102, 126, 234, 0.3);'> | |
| <iframe id="viz_iframe" src="/gradio_api/file={random_path}" width="100%" height="950px" style="border:none;"></iframe> | |
| </div> | |
| """ | |
| print(f"💾 HTML saved to: {random_path}") | |
| print(f"📊 HTML content preview: {viz_html[:200]}...") | |
| # If we have base64 encoded video content, save it as a temporary file | |
| local_video_path = None | |
| if track_video_content: | |
| try: | |
| # Create a temporary file for the video | |
| temp_video_dir = "temp_frontend_videos" | |
| os.makedirs(temp_video_dir, exist_ok=True) | |
| # Generate unique filename to avoid conflicts | |
| timestamp = str(int(time.time() * 1000)) | |
| local_video_path = os.path.join(temp_video_dir, f"{timestamp}_{track_video_filename}") | |
| # Decode base64 and save as video file | |
| video_bytes = base64.b64decode(track_video_content) | |
| with open(local_video_path, 'wb') as f: | |
| f.write(video_bytes) | |
| print(f"✅ Successfully saved tracking video to: {local_video_path}") | |
| print(f"🔧 Video file size: {len(video_bytes)} bytes") | |
| except Exception as e: | |
| print(f"❌ Failed to process tracking video: {e}") | |
| local_video_path = None | |
| else: | |
| print("⚠️ No tracking video content received from backend") | |
| # 返回iframe HTML、视频路径和HTML文件路径(用于下载) | |
| return iframe_html, local_video_path, random_path | |
| else: | |
| error_msg = result.get("error", "Unknown error") if isinstance(result, dict) else "Backend processing failed" | |
| print(f"❌ Backend processing failed: {error_msg}") | |
| # Fall through to error message | |
| pass | |
| except Exception as e: | |
| print(f"❌ Backend API call failed: {e}") | |
| print(f"🔧 Error type: {type(e)}") | |
| print(f"🔧 Error details: {str(e)}") | |
| # Check for specific gradio_client errors | |
| if "AppError" in str(type(e)): | |
| print("🔧 Backend Space has internal errors (AppError)") | |
| print("🔧 The backend Space code has bugs or configuration issues") | |
| print("🔧 Contact the Space owner to fix the backend implementation") | |
| elif "Could not fetch config" in str(e): | |
| print("🔧 Config fetch failed - possible Gradio version mismatch") | |
| print("🔧 Frontend and backend may be using incompatible Gradio versions") | |
| elif "timeout" in str(e).lower(): | |
| print("🔧 Backend request timed out - Space might be overloaded") | |
| elif "Expecting value" in str(e): | |
| print("🔧 JSON parsing error in backend - state format mismatch") | |
| print("🔧 This happens when using local processing state with backend API") | |
| print("🔧 Please upload video again to use backend processing") | |
| else: | |
| print(f"🔧 Unexpected error type: {type(e).__name__}") | |
| print("🔄 Showing error message instead of visualization...") | |
| # Fall through to error message | |
| pass | |
| # Create an informative error message based on the state | |
| state_info = "" | |
| try: | |
| if isinstance(original_image_state, str): | |
| parsed_state = json.loads(original_image_state) | |
| if "video_path" in parsed_state: | |
| video_name = os.path.basename(parsed_state["video_path"]) | |
| state_info = f"Video: {video_name}" | |
| except: | |
| state_info = "State format unknown" | |
| # Fallback: show message that backend is required | |
| error_message = f""" | |
| <div style='border: 3px solid #ff6b6b; border-radius: 10px; padding: 20px; background-color: #fff5f5;'> | |
| <h3 style='color: #d63031; margin-bottom: 15px;'>⚠️ Backend Processing Required</h3> | |
| <p style='color: #2d3436; line-height: 1.6;'> | |
| The tracking and visualization features require backend processing. The current setup is using local processing which is incompatible with the backend API. | |
| </p> | |
| <h4 style='color: #d63031; margin: 15px 0 10px 0;'>Solutions:</h4> | |
| <ul style='color: #2d3436; line-height: 1.6;'> | |
| <li><strong>Upload video again:</strong> This will properly initialize the backend state</li> | |
| <li><strong>Select points on the frame:</strong> Ensure you've clicked on the object to track</li> | |
| <li><strong>Check backend connection:</strong> Ensure the backend Space is running</li> | |
| <li><strong>Use compatible state:</strong> Avoid local processing mode</li> | |
| </ul> | |
| <div style='background-color: #f8f9fa; border-radius: 5px; padding: 10px; margin-top: 15px;'> | |
| <p style='color: #2d3436; font-weight: bold; margin: 0 0 5px 0;'>Debug Information:</p> | |
| <p style='color: #666; font-size: 12px; margin: 0;'>Backend Available: {BACKEND_AVAILABLE}</p> | |
| <p style='color: #666; font-size: 12px; margin: 0;'>Backend Client: {backend_client is not None}</p> | |
| <p style='color: #666; font-size: 12px; margin: 0;'>Backend URL: {BACKEND_SPACE_URL}</p> | |
| <p style='color: #666; font-size: 12px; margin: 0;'>State Info: {state_info}</p> | |
| <p style='color: #666; font-size: 12px; margin: 0;'>Processing Mode: {"Backend" if BACKEND_AVAILABLE else "Local (Limited)"}</p> | |
| </div> | |
| <div style='background-color: #e3f2fd; border-radius: 5px; padding: 10px; margin-top: 10px; border-left: 4px solid #2196f3;'> | |
| <p style='color: #1976d2; font-weight: bold; margin: 0 0 5px 0;'>💡 Quick Fix:</p> | |
| <p style='color: #1976d2; font-size: 13px; margin: 0;'> | |
| Try uploading your video again - this should properly initialize the backend state for tracking. | |
| </p> | |
| </div> | |
| </div> | |
| """ | |
| return error_message, None, None | |
| except Exception as e: | |
| print(f"Error in launch_viz: {e}") | |
| return None, None, None | |
| def clear_all(): | |
| """Clear all buffers and temporary files""" | |
| return (None, None, [], | |
| gr.update(value=50), | |
| gr.update(value=756), | |
| gr.update(value=3)) | |
| def clear_all_with_download(): | |
| """Clear all buffers including download component""" | |
| return (None, None, [], | |
| gr.update(value=50), | |
| gr.update(value=756), | |
| gr.update(value=3), | |
| None) # HTML download component | |
| def update_tracker_model(model_name): | |
| """Update tracker model (placeholder function)""" | |
| return | |
| def get_video_settings(video_name): | |
| """Get video-specific settings based on video name""" | |
| video_settings = { | |
| "kiss": (45, 700, 10), | |
| "backpack": (40, 600, 2), | |
| "kitchen": (60, 800, 3), | |
| "pillow": (35, 500, 2), | |
| "handwave": (35, 500, 8), | |
| "hockey": (45, 700, 2), | |
| "drifting": (35, 1000, 6), | |
| "basketball": (45, 1500, 5), | |
| "ken_block_0": (45, 700, 2), | |
| "ego_kc1": (45, 500, 4), | |
| "vertical_place": (45, 500, 3), | |
| "ego_teaser": (45, 1200, 10), | |
| "robot_unitree": (45, 500, 4), | |
| "robot_3": (35, 400, 5), | |
| "teleop2": (45, 256, 7), | |
| "pusht": (45, 256, 10), | |
| "cinema_0": (45, 356, 5), | |
| "cinema_1": (45, 756, 3), | |
| } | |
| return video_settings.get(video_name, (50, 756, 3)) | |
| def test_backend_connection(): | |
| """Test if backend is actually working""" | |
| global BACKEND_AVAILABLE | |
| if not backend_client: | |
| return False | |
| try: | |
| print("Testing backend connection with a simple call...") | |
| # Check if we have fns available | |
| if hasattr(backend_client, 'fns') and backend_client.fns: | |
| print("✅ Backend API functions are available") | |
| print(f"🔧 Available function indices: {list(backend_client.fns.keys())}") | |
| return True | |
| else: | |
| print("❌ Backend API functions not found") | |
| return False | |
| except Exception as e: | |
| print(f"❌ Backend connection test failed: {e}") | |
| return False | |
| def test_backend_api(): | |
| """Test specific backend API functions""" | |
| if not BACKEND_AVAILABLE or not backend_client: | |
| print("❌ Backend not available for testing") | |
| return False | |
| try: | |
| print("🧪 Testing backend API functions...") | |
| # Test if fns exist and show available indices | |
| if hasattr(backend_client, 'fns') and backend_client.fns: | |
| print(f"✅ Backend has {len(backend_client.fns)} functions available") | |
| for idx in backend_client.fns.keys(): | |
| print(f"✅ Function {idx} is available") | |
| else: | |
| print("❌ No functions found in backend API") | |
| return False | |
| return True | |
| except Exception as e: | |
| print(f"❌ Backend API test failed: {e}") | |
| return False | |
| # Initialize the backend connection | |
| print("🚀 Initializing frontend application...") | |
| result = initialize_backend() | |
| # Test backend connection if available | |
| if result and BACKEND_AVAILABLE: | |
| print("✅ Backend connection successful!") | |
| else: | |
| print("❌ Backend connection failed!") | |
| # Create the Gradio interface | |
| print("🎨 Creating Gradio interface...") | |
| with gr.Blocks( | |
| theme=gr.themes.Soft(), | |
| title="SpatialTracker V2 - Frontend", | |
| css=""" | |
| .gradio-container { | |
| max-width: 1200px !important; | |
| margin: auto !important; | |
| } | |
| .gr-button { | |
| margin: 5px; | |
| } | |
| .gr-form { | |
| background: white; | |
| border-radius: 10px; | |
| padding: 20px; | |
| box-shadow: 0 2px 10px rgba(0,0,0,0.1); | |
| } | |
| /* 固定视频上传组件高度 */ | |
| .gr-video { | |
| height: 300px !important; | |
| min-height: 300px !important; | |
| max-height: 300px !important; | |
| } | |
| .gr-video video { | |
| height: 260px !important; | |
| max-height: 260px !important; | |
| object-fit: contain !important; | |
| background: #f8f9fa; | |
| } | |
| .gr-video .gr-video-player { | |
| height: 260px !important; | |
| max-height: 260px !important; | |
| } | |
| /* 水平滚动的示例视频样式 */ | |
| .example-videos .gr-examples { | |
| overflow: visible !important; | |
| } | |
| .example-videos .gr-examples .gr-table-wrapper { | |
| overflow-x: auto !important; | |
| overflow-y: hidden !important; | |
| scrollbar-width: thin; | |
| scrollbar-color: #667eea #f1f1f1; | |
| } | |
| .example-videos .gr-examples .gr-table-wrapper::-webkit-scrollbar { | |
| height: 8px; | |
| } | |
| .example-videos .gr-examples .gr-table-wrapper::-webkit-scrollbar-track { | |
| background: #f1f1f1; | |
| border-radius: 4px; | |
| } | |
| .example-videos .gr-examples .gr-table-wrapper::-webkit-scrollbar-thumb { | |
| background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); | |
| border-radius: 4px; | |
| } | |
| .example-videos .gr-examples .gr-table-wrapper::-webkit-scrollbar-thumb:hover { | |
| background: linear-gradient(135deg, #5a6fd8 0%, #6a4190 100%); | |
| } | |
| .example-videos .gr-examples .gr-table { | |
| display: flex !important; | |
| flex-wrap: nowrap !important; | |
| min-width: max-content !important; | |
| gap: 10px !important; | |
| } | |
| .example-videos .gr-examples .gr-table tbody { | |
| display: flex !important; | |
| flex-direction: row !important; | |
| flex-wrap: nowrap !important; | |
| gap: 10px !important; | |
| } | |
| .example-videos .gr-examples .gr-table tbody tr { | |
| display: flex !important; | |
| flex-direction: column !important; | |
| min-width: 120px !important; | |
| max-width: 120px !important; | |
| margin: 0 !important; | |
| background: white; | |
| border-radius: 8px; | |
| box-shadow: 0 2px 8px rgba(0,0,0,0.1); | |
| transition: all 0.3s ease; | |
| cursor: pointer; | |
| } | |
| .example-videos .gr-examples .gr-table tbody tr:hover { | |
| transform: translateY(-2px); | |
| box-shadow: 0 4px 12px rgba(102, 126, 234, 0.2); | |
| } | |
| .example-videos .gr-examples .gr-table tbody tr td { | |
| text-align: center !important; | |
| padding: 8px !important; | |
| border: none !important; | |
| } | |
| .example-videos .gr-examples .gr-table tbody tr td video { | |
| border-radius: 6px !important; | |
| width: 100% !important; | |
| height: auto !important; | |
| } | |
| .example-videos .gr-examples .gr-table tbody tr td:last-child { | |
| font-size: 12px !important; | |
| font-weight: 500 !important; | |
| color: #333 !important; | |
| padding-top: 4px !important; | |
| } | |
| /* 新的水平滚动示例视频样式 */ | |
| .horizontal-examples .gr-examples { | |
| overflow: visible !important; | |
| } | |
| .horizontal-examples .gr-examples .gr-table-wrapper { | |
| overflow-x: auto !important; | |
| overflow-y: hidden !important; | |
| scrollbar-width: thin; | |
| scrollbar-color: #667eea #f1f1f1; | |
| padding: 10px 0; | |
| } | |
| .horizontal-examples .gr-examples .gr-table-wrapper::-webkit-scrollbar { | |
| height: 8px; | |
| } | |
| .horizontal-examples .gr-examples .gr-table-wrapper::-webkit-scrollbar-track { | |
| background: #f1f1f1; | |
| border-radius: 4px; | |
| } | |
| .horizontal-examples .gr-examples .gr-table-wrapper::-webkit-scrollbar-thumb { | |
| background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); | |
| border-radius: 4px; | |
| } | |
| .horizontal-examples .gr-examples .gr-table-wrapper::-webkit-scrollbar-thumb:hover { | |
| background: linear-gradient(135deg, #5a6fd8 0%, #6a4190 100%); | |
| } | |
| .horizontal-examples .gr-examples .gr-table { | |
| display: flex !important; | |
| flex-wrap: nowrap !important; | |
| min-width: max-content !important; | |
| gap: 15px !important; | |
| padding-bottom: 10px; | |
| } | |
| .horizontal-examples .gr-examples .gr-table tbody { | |
| display: flex !important; | |
| flex-direction: row !important; | |
| flex-wrap: nowrap !important; | |
| gap: 15px !important; | |
| } | |
| .horizontal-examples .gr-examples .gr-table tbody tr { | |
| display: flex !important; | |
| flex-direction: column !important; | |
| min-width: 160px !important; | |
| max-width: 160px !important; | |
| margin: 0 !important; | |
| background: white; | |
| border-radius: 12px; | |
| box-shadow: 0 3px 12px rgba(0,0,0,0.12); | |
| transition: all 0.3s ease; | |
| cursor: pointer; | |
| overflow: hidden; | |
| } | |
| .horizontal-examples .gr-examples .gr-table tbody tr:hover { | |
| transform: translateY(-4px); | |
| box-shadow: 0 8px 20px rgba(102, 126, 234, 0.25); | |
| } | |
| .horizontal-examples .gr-examples .gr-table tbody tr td { | |
| text-align: center !important; | |
| padding: 0 !important; | |
| border: none !important; | |
| } | |
| .horizontal-examples .gr-examples .gr-table tbody tr td:first-child { | |
| padding: 0 !important; | |
| } | |
| .horizontal-examples .gr-examples .gr-table tbody tr td video { | |
| border-radius: 8px 8px 0 0 !important; | |
| width: 100% !important; | |
| height: 90px !important; | |
| object-fit: cover !important; | |
| } | |
| .horizontal-examples .gr-examples .gr-table tbody tr td:last-child { | |
| font-size: 11px !important; | |
| font-weight: 600 !important; | |
| color: #333 !important; | |
| padding: 8px 12px !important; | |
| background: linear-gradient(135deg, #f8f9ff 0%, #e6f3ff 100%); | |
| border-radius: 0 0 8px 8px; | |
| } | |
| """ | |
| ) as demo: | |
| gr.Markdown(""" | |
| # 🎯 SpatialTracker V2 - Frontend Interface | |
| Welcome to SpatialTracker V2! This interface allows you to track any pixels in 3D using our model. | |
| **Instructions:** | |
| 1. Upload a video file or select from examples below | |
| 2. Click on the object you want to track in the first frame | |
| 3. Adjust tracking parameters if needed | |
| 4. Click "Launch Visualization" to start tracking | |
| """) | |
| # Status indicator with more detailed information | |
| if BACKEND_AVAILABLE: | |
| status_text = "🟢 Backend Connected" | |
| status_details = f"Connected to: {BACKEND_SPACE_URL}" | |
| else: | |
| status_text = "🟡 Running in Standalone Mode" | |
| status_details = f"Backend unavailable: {BACKEND_SPACE_URL}" | |
| gr.Markdown(f"**Status:** {status_text}") | |
| gr.Markdown(f"<small style='color: #666;'>{status_details}</small>", elem_id="status-details") | |
| # Example videos section - moved to top | |
| with gr.Group(elem_classes=["example-videos"]): | |
| gr.Markdown("### 📂 Example Videos") | |
| gr.Markdown("Try these example videos to get started quickly:") | |
| # Custom horizontal scrolling video gallery | |
| gr.HTML(""" | |
| <div style='background-color: #f8f9ff; border-radius: 8px; padding: 10px; margin: 10px 0; border-left: 4px solid #667eea;'> | |
| <p style='margin: 0; font-size: 13px; color: #666; display: flex; align-items: center; gap: 8px;'> | |
| <span style='font-size: 16px;'>💡</span> | |
| <strong>Tip:</strong> Scroll horizontally below to see all example videos | |
| </p> | |
| </div> | |
| """) | |
| # Define video_input here so it can be referenced in examples | |
| video_input = gr.Video( | |
| label="Upload Video or Select Example", | |
| format="mp4", | |
| height=300 | |
| ) | |
| # Create a horizontal scrolling container for the examples | |
| with gr.Group(elem_classes=["horizontal-examples"]): | |
| gr.Examples( | |
| examples=[ | |
| ["examples/kiss.mp4"], | |
| ["examples/backpack.mp4"], | |
| ["examples/pillow.mp4"], | |
| ["examples/handwave.mp4"], | |
| ["examples/hockey.mp4"], | |
| ["examples/drifting.mp4"], | |
| ["examples/ken_block_0.mp4"], | |
| ["examples/kitchen.mp4"], | |
| ["examples/basketball.mp4"], | |
| ["examples/ego_kc1.mp4"], | |
| ["examples/vertical_place.mp4"], | |
| ["examples/ego_teaser.mp4"], | |
| ["examples/robot_unitree.mp4"], | |
| ["examples/robot_3.mp4"], | |
| ["examples/teleop2.mp4"], | |
| ["examples/pusht.mp4"], | |
| ["examples/cinema_0.mp4"], | |
| ["examples/cinema_1.mp4"], | |
| ], | |
| inputs=video_input, | |
| label="🎬 Click on any example to load it", | |
| examples_per_page=16 | |
| ) | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| # Interactive frame display | |
| with gr.Group(): | |
| gr.Markdown("### 🎯 Point Selection") | |
| gr.Markdown("Click on the object you want to track in the frame below:") | |
| interactive_frame = gr.Image( | |
| label="Click to select tracking points", | |
| type="numpy", | |
| interactive=True | |
| ) | |
| with gr.Row(): | |
| point_type = gr.Radio( | |
| choices=["positive_point", "negative_point"], | |
| value="positive_point", | |
| label="Point Type", | |
| info="Positive points indicate the object to track, negative points indicate areas to avoid" | |
| ) | |
| with gr.Row(): | |
| reset_points_btn = gr.Button("🔄 Reset Points", variant="secondary") | |
| clear_all_btn = gr.Button("🗑️ Clear All", variant="stop") | |
| with gr.Column(scale=1): | |
| # Tracking results | |
| with gr.Group(): | |
| gr.Markdown("### 🎬 Tracking Results") | |
| tracking_result_video = gr.Video( | |
| label="Tracking Result Video", | |
| interactive=False, | |
| height=300 | |
| ) | |
| # HTML文件下载组件 | |
| html_download = gr.File( | |
| label="📥 Download 3D Visualization HTML", | |
| interactive=False, | |
| visible=True | |
| ) | |
| # Advanced settings section - changed to open=True | |
| with gr.Accordion("⚙️ Advanced Settings", open=True): | |
| gr.Markdown("Adjust these parameters to optimize tracking performance:") | |
| with gr.Row(): | |
| grid_size = gr.Slider( | |
| minimum=10, | |
| maximum=100, | |
| step=10, | |
| value=50, | |
| label="Grid Size", | |
| info="Size of the tracking grid (larger = more detailed)" | |
| ) | |
| vo_points = gr.Slider( | |
| minimum=100, | |
| maximum=2000, | |
| step=50, | |
| value=756, | |
| label="VO Points", | |
| info="Number of visual odometry points (more = better accuracy)" | |
| ) | |
| fps = gr.Slider( | |
| minimum=1, | |
| maximum=30, | |
| step=1, | |
| value=3, | |
| label="FPS", | |
| info="Frames per second for processing (higher = smoother but slower)" | |
| ) | |
| # Launch button | |
| with gr.Row(): | |
| launch_btn = gr.Button("🚀 Start Tracking Now!", variant="primary", size="lg") | |
| # 3D Visualization - Make it larger and more prominent | |
| with gr.Row(): | |
| with gr.Column(): | |
| with gr.Group(): | |
| gr.Markdown("### 🌐 3D Trajectory Visualization") | |
| gr.Markdown("Interactive 3D visualization of 3D point tracking and camera motion:") | |
| viz_html = gr.HTML( | |
| label="3D Trajectory Visualization", | |
| value=""" | |
| <div style='border: 3px solid #667eea; border-radius: 15px; padding: 40px; | |
| background: linear-gradient(135deg, #f8f9ff 0%, #e6f3ff 100%); | |
| text-align: center; min-height: 600px; display: flex; | |
| flex-direction: column; justify-content: center; align-items: center; | |
| box-shadow: 0 8px 32px rgba(102, 126, 234, 0.2);'> | |
| <div style='font-size: 48px; margin-bottom: 20px;'>🌐</div> | |
| <h2 style='color: #667eea; margin-bottom: 15px; font-size: 28px; font-weight: 600;'> | |
| 3D Trajectory Visualization | |
| </h2> | |
| <p style='color: #666; font-size: 16px; line-height: 1.6; max-width: 500px; margin-bottom: 25px;'> | |
| Perceive the world with Pixel-wise 3D Motions! | |
| </p> | |
| <div style='background: rgba(102, 126, 234, 0.1); border-radius: 25px; | |
| padding: 12px 24px; border: 2px solid rgba(102, 126, 234, 0.2);'> | |
| <span style='color: #667eea; font-weight: 600; font-size: 14px;'> | |
| ⚡ Powered by SpatialTracker V2 | |
| </span> | |
| </div> | |
| </div> | |
| """, | |
| elem_id="viz_container" | |
| ) | |
| # Hidden state variables | |
| original_image_state = gr.State(None) | |
| selected_points = gr.State([]) | |
| # Event handlers | |
| video_input.change( | |
| fn=handle_video_upload, | |
| inputs=[video_input], | |
| outputs=[original_image_state, interactive_frame, selected_points, grid_size, vo_points, fps] | |
| ) | |
| interactive_frame.select( | |
| fn=select_point, | |
| inputs=[original_image_state, selected_points, point_type], | |
| outputs=[interactive_frame, selected_points] | |
| ) | |
| reset_points_btn.click( | |
| fn=reset_points, | |
| inputs=[original_image_state, selected_points], | |
| outputs=[interactive_frame, selected_points] | |
| ) | |
| clear_all_btn.click( | |
| fn=clear_all_with_download, | |
| outputs=[video_input, interactive_frame, selected_points, grid_size, vo_points, fps, html_download] | |
| ) | |
| launch_btn.click( | |
| fn=launch_viz, | |
| inputs=[grid_size, vo_points, fps, original_image_state], | |
| outputs=[viz_html, tracking_result_video, html_download] | |
| ) | |
| # GitHub Star Reminder - Added back! | |
| gr.HTML(""" | |
| <div style='background: linear-gradient(135deg, #e8eaff 0%, #f0f2ff 100%); | |
| border-radius: 10px; | |
| padding: 15px; | |
| margin: 15px 0; | |
| box-shadow: 0 2px 8px rgba(102, 126, 234, 0.1); | |
| border: 1px solid rgba(102, 126, 234, 0.15);'> | |
| <div style='text-align: center; color: #4a5568;'> | |
| <h3 style='margin: 0 0 10px 0; font-size: 18px; text-shadow: none; color: #2d3748;'> | |
| ⭐ Love SpatialTracker? Give us a Star! ⭐ | |
| </h3> | |
| <p style='margin: 0 0 12px 0; font-size: 14px; opacity: 0.8; color: #4a5568;'> | |
| Help us grow by starring our repository on GitHub! 🚀 | |
| </p> | |
| <div style='display: flex; justify-content: center;'> | |
| <a href="https://github.com/henry123-boy/SpaTrackerV2" | |
| target="_blank" | |
| style='display: inline-flex; | |
| align-items: center; | |
| gap: 6px; | |
| background: rgba(102, 126, 234, 0.1); | |
| color: #4a5568; | |
| padding: 8px 16px; | |
| border-radius: 20px; | |
| text-decoration: none; | |
| font-weight: bold; | |
| font-size: 14px; | |
| backdrop-filter: blur(5px); | |
| border: 1px solid rgba(102, 126, 234, 0.2); | |
| transition: all 0.3s ease;' | |
| onmouseover="this.style.background='rgba(102, 126, 234, 0.15)'; this.style.transform='translateY(-1px)'" | |
| onmouseout="this.style.background='rgba(102, 126, 234, 0.1)'; this.style.transform='translateY(0)'"> | |
| <span style='font-size: 16px;'>⭐</span> | |
| Star on GitHub | |
| </a> | |
| </div> | |
| </div> | |
| </div> | |
| """) | |
| # Acknowledgment section for TAPIR3D - moved to the end | |
| gr.HTML(""" | |
| <div style='background: linear-gradient(135deg, #fff8e1 0%, #fffbf0 100%); | |
| border-radius: 8px; | |
| padding: 12px; | |
| margin: 15px 0; | |
| box-shadow: 0 1px 4px rgba(255, 193, 7, 0.1); | |
| border: 1px solid rgba(255, 193, 7, 0.2);'> | |
| <div style='text-align: center; color: #5d4037;'> | |
| <h5 style='margin: 0 0 6px 0; font-size: 14px; color: #5d4037;'> | |
| Acknowledgments | |
| </h5> | |
| <p style='margin: 0; font-size: 12px; opacity: 0.9; color: #5d4037; line-height: 1.3;'> | |
| Our 3D visualizer is adapted from <strong>TAPIP3D</strong>. We thank the authors for their excellent work! | |
| </p> | |
| <div style='margin-top: 6px;'> | |
| <a href="https://github.com/zbw001/TAPIP3D" | |
| target="_blank" | |
| style='display: inline-flex; | |
| align-items: center; | |
| gap: 3px; | |
| background: rgba(255, 193, 7, 0.15); | |
| color: #5d4037; | |
| padding: 3px 10px; | |
| border-radius: 12px; | |
| text-decoration: none; | |
| font-weight: 500; | |
| font-size: 11px; | |
| border: 1px solid rgba(255, 193, 7, 0.3); | |
| transition: all 0.3s ease;' | |
| onmouseover="this.style.background='rgba(255, 193, 7, 0.2)'" | |
| onmouseout="this.style.background='rgba(255, 193, 7, 0.15)'"> | |
| 📚 TAPIP3D Repository | |
| </a> | |
| </div> | |
| </div> | |
| </div> | |
| """) | |
| # Launch the interface | |
| if __name__ == "__main__": | |
| print("🌟 Launching SpatialTracker V2 Frontend...") | |
| print(f"🔗 Backend Status: {'Connected' if BACKEND_AVAILABLE else 'Disconnected'}") | |
| demo.launch( | |
| server_name="0.0.0.0", | |
| server_port=7860, | |
| share=True, | |
| debug=True, | |
| show_error=True | |
| ) |