Spaces:

Yuxihenry
/

SpatialTrackerV2

Running on Zero

SpatialTrackerV2 / app_backup.py

xiaoyuxi

vggt_da

2a1f271 5 months ago

55.1 kB

	import gradio as gr
	import os
	import json
	import numpy as np
	import cv2
	import base64
	import requests
	import time
	from typing import List, Tuple
	from gradio_client.utils import handle_file
	from pathlib import Path

	# Backend Space URL - replace with your actual backend space URL
	BACKEND_SPACE_URL = "Yuxihenry/SpatialTrackerV2_Backend" # Replace with actual backend space URL
	hf_token = os.getenv("HF_TOKEN") # Replace with your actual Hugging Face token

	# Debug information
	print(f"🔧 Environment Debug Info:")
	print(f" - Backend URL: {BACKEND_SPACE_URL}")
	print(f" - HF Token available: {'Yes' if hf_token else 'No'}")
	print(f" - HF Token length: {len(hf_token) if hf_token else 0}")

	# Flag to track if backend is available
	BACKEND_AVAILABLE = False
	backend_client = None

	def check_user_permissions():
	"""Check if user has necessary permissions"""
	print("🔐 Checking user permissions...")

	if not hf_token:
	print("❌ No HF Token found")
	print("🔧 To get a token:")
	print(" 1. Go to https://huggingface.co/settings/tokens")
	print(" 2. Create a new token with 'read' permissions")
	print(" 3. Set it as environment variable: export HF_TOKEN='your_token'")
	return False

	# Try to access user info
	try:
	headers = {'Authorization': f'Bearer {hf_token}'}
	response = requests.get('https://huggingface.co/api/whoami', headers=headers, timeout=5)

	if response.status_code == 200:
	user_info = response.json()
	username = user_info.get('name', 'Unknown')
	print(f"✅ Authenticated as: {username}")

	# Check if user has access to the specific space
	space_url = f"https://huggingface.co/api/spaces/{BACKEND_SPACE_URL}"
	space_response = requests.get(space_url, headers=headers, timeout=5)

	if space_response.status_code == 200:
	print("✅ You have access to the backend Space")
	return True
	elif space_response.status_code == 401:
	print("❌ You don't have access to the backend Space")
	print("🔧 Solutions:")
	print(" 1. Contact the Space owner to add you as collaborator")
	print(" 2. Ask the owner to make the Space public")
	return False
	elif space_response.status_code == 404:
	print("❌ Backend Space not found")
	print("🔧 Please check if the Space URL is correct")
	return False
	else:
	print(f"⚠️ Unexpected response checking Space access: {space_response.status_code}")
	return False

	else:
	print(f"❌ Token validation failed: {response.status_code}")
	print("🔧 Your token might be invalid or expired")
	return False

	except Exception as e:
	print(f"❌ Error checking permissions: {e}")
	return False

	def check_backend_space_status():
	"""Check if backend space is running via HTTP request"""
	try:
	backend_url = f"https://huggingface.co/spaces/{BACKEND_SPACE_URL}"
	print(f"🔍 Checking backend space status: {backend_url}")

	# Prepare headers with authentication if token is available
	headers = {}
	if hf_token:
	headers['Authorization'] = f'Bearer {hf_token}'
	print(f"🔐 Using HF Token for authentication")

	# Try to access the space page
	response = requests.get(backend_url, headers=headers, timeout=10)

	if response.status_code == 200:
	print("✅ Backend space page is accessible")

	# Check if space is running (look for common indicators)
	page_content = response.text.lower()
	if "runtime error" in page_content:
	print("❌ Backend space has runtime error")
	return False
	elif "building" in page_content:
	print("🔄 Backend space is building...")
	return False
	elif "sleeping" in page_content:
	print("😴 Backend space is sleeping")
	return False
	else:
	print("✅ Backend space appears to be running")
	return True

	elif response.status_code == 401:
	print("❌ Authentication failed (HTTP 401)")
	print("🔧 This means:")
	print(" - The backend Space is private")
	print(" - Your HF Token doesn't have access to this Space")
	print(" - You need to be added as a collaborator to the Space")
	print(" - Or the Space owner needs to make it public")
	return False

	elif response.status_code == 404:
	print("❌ Backend space not found (HTTP 404)")
	print("🔧 Please check if the Space URL is correct:")
	print(f" Current URL: {BACKEND_SPACE_URL}")
	return False

	else:
	print(f"❌ Backend space not accessible (HTTP {response.status_code})")
	print(f"🔧 Response: {response.text[:200]}...")
	return False

	except requests.RequestException as e:
	print(f"❌ Failed to check backend space status: {e}")
	return False
	except Exception as e:
	print(f"❌ Unexpected error checking backend: {e}")
	return False

	def initialize_backend():
	"""Initialize backend connection using gradio_client"""
	global backend_client, BACKEND_AVAILABLE

	try:
	from gradio_client import Client

	# Connect to HF Space
	if hf_token:
	backend_client = Client(BACKEND_SPACE_URL, hf_token=hf_token)
	else:
	backend_client = Client(BACKEND_SPACE_URL)

	# Test the connection
	backend_client.view_api()
	BACKEND_AVAILABLE = True
	return True

	except Exception as e:
	print(f"❌ Backend connection failed: {e}")
	BACKEND_AVAILABLE = False
	return False

	def numpy_to_base64(arr):
	"""Convert numpy array to base64 string"""
	return base64.b64encode(arr.tobytes()).decode('utf-8')

	def base64_to_numpy(b64_str, shape, dtype):
	"""Convert base64 string back to numpy array"""
	return np.frombuffer(base64.b64decode(b64_str), dtype=dtype).reshape(shape)

	def base64_to_image(b64_str):
	"""Convert base64 string to numpy image array"""
	if not b64_str:
	return None
	try:
	# Decode base64 to bytes
	img_bytes = base64.b64decode(b64_str)
	# Convert bytes to numpy array
	nparr = np.frombuffer(img_bytes, np.uint8)
	# Decode image
	img = cv2.imdecode(nparr, cv2.IMREAD_COLOR)
	# Convert BGR to RGB
	img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
	return img
	except Exception as e:
	print(f"Error converting base64 to image: {e}")
	return None

	def get_video_name(video_path):
	"""Extract video name without extension"""
	return os.path.splitext(os.path.basename(video_path))[0]

	def extract_first_frame(video_path):
	"""Extract first frame from video file"""
	try:
	cap = cv2.VideoCapture(video_path)
	ret, frame = cap.read()
	cap.release()

	if ret:
	# Convert BGR to RGB
	frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
	return frame_rgb
	else:
	return None
	except Exception as e:
	print(f"Error extracting first frame: {e}")
	return None

	def handle_video_upload(video):
	"""Handle video upload and extract first frame"""
	if video is None:
	return (None, None, [],
	gr.update(value=50),
	gr.update(value=756),
	gr.update(value=3))

	try:
	if BACKEND_AVAILABLE and backend_client:
	# Try to use backend API
	try:
	print("🔧 Calling backend API for video upload...")

	# Call the unified API with upload_video function type - fix: use handle_file wrapper
	result = backend_client.predict(
	"upload_video", # function_type
	handle_file(video), # video file - wrapped with handle_file
	"", # original_image_state (not used for upload)
	[], # selected_points (not used for upload)
	"positive_point", # point_type (not used for upload)
	0, # point_x (not used for upload)
	0, # point_y (not used for upload)
	50, # grid_size (not used for upload)
	756, # vo_points (not used for upload)
	3, # fps (not used for upload)
	api_name="/unified_api"
	)

	print(f"✅ Backend video upload API call successful!")
	print(f"🔧 Result type: {type(result)}")
	print(f"🔧 Result: {result}")

	# Parse the result - expect a dict with success status
	if isinstance(result, dict) and result.get("success"):
	# Extract data from backend response
	original_image_state = result.get("original_image_state", "")
	display_image = result.get("display_image", None)
	selected_points = result.get("selected_points", [])

	# Fix: Convert display_image from list back to numpy array if needed
	if isinstance(display_image, list):
	display_image = np.array(display_image, dtype=np.uint8)
	print(f"🔧 Converted display_image from list to numpy array: {display_image.shape}")

	# Get video settings based on video name
	video_name = get_video_name(video)
	print(f"🎬 Video path: '{video}' -> Video name: '{video_name}'")
	grid_size_val, vo_points_val, fps_val = get_video_settings(video_name)
	print(f"🎬 Video settings for '{video_name}': grid_size={grid_size_val}, vo_points={vo_points_val}, fps={fps_val}")

	return (original_image_state, display_image, selected_points,
	gr.update(value=grid_size_val),
	gr.update(value=vo_points_val),
	gr.update(value=fps_val))
	else:
	print("Backend processing failed, using local fallback")
	# Fallback to local processing
	pass
	except Exception as e:
	print(f"Backend API call failed: {e}")
	# Fallback to local processing
	pass

	# Fallback: local processing
	print("Using local video processing...")
	display_image = extract_first_frame(video)

	if display_image is not None:
	# Create a state format compatible with backend
	import tempfile
	import shutil

	# Create a temporary directory for this session
	session_id = str(int(time.time() * 1000)) # Use timestamp as session ID
	temp_dir = os.path.join("temp_frontend", f"session_{session_id}")
	os.makedirs(temp_dir, exist_ok=True)

	# Copy video to temp directory with standardized name
	video_name = get_video_name(video)
	temp_video_path = os.path.join(temp_dir, f"{video_name}.mp4")
	shutil.copy(video, temp_video_path)

	# Create state format compatible with backend
	frame_data = {
	'data': numpy_to_base64(display_image),
	'shape': display_image.shape,
	'dtype': str(display_image.dtype),
	'temp_dir': temp_dir,
	'video_name': video_name,
	'video_path': temp_video_path # Keep for backward compatibility
	}

	original_image_state = json.dumps(frame_data)
	else:
	# Fallback to simple state if frame extraction fails
	original_image_state = json.dumps({
	"video_path": video,
	"frame": "local_processing_failed"
	})

	# Get video settings
	video_name = get_video_name(video)
	print(f"🎬 Local fallback - Video path: '{video}' -> Video name: '{video_name}'")
	grid_size_val, vo_points_val, fps_val = get_video_settings(video_name)
	print(f"🎬 Local fallback - Video settings for '{video_name}': grid_size={grid_size_val}, vo_points={vo_points_val}, fps={fps_val}")

	return (original_image_state, display_image, [],
	gr.update(value=grid_size_val),
	gr.update(value=vo_points_val),
	gr.update(value=fps_val))

	except Exception as e:
	print(f"Error in handle_video_upload: {e}")
	return (None, None, [],
	gr.update(value=50),
	gr.update(value=756),
	gr.update(value=3))

	def select_point(original_img: str, sel_pix: list, point_type: str, evt: gr.SelectData):
	"""Handle point selection for SAM"""
	if original_img is None:
	return None, []

	try:
	if BACKEND_AVAILABLE and backend_client:
	# Try to use backend API
	try:
	print(f"🔧 Calling backend select point API: x={evt.index[0]}, y={evt.index[1]}, type={point_type}")

	# Call the unified API with select_point function type
	result = backend_client.predict(
	"select_point", # function_type
	None, # video file (not used for select_point)
	original_img, # original_image_state
	sel_pix, # selected_points
	point_type, # point_type
	evt.index[0], # point_x
	evt.index[1], # point_y
	50, # grid_size (not used for select_point)
	756, # vo_points (not used for select_point)
	3, # fps (not used for select_point)
	api_name="/unified_api"
	)

	print(f"✅ Backend select point API call successful!")
	print(f"🔧 Result type: {type(result)}")
	print(f"🔧 Result: {result}")

	# Parse the result - expect a dict with success status
	if isinstance(result, dict) and result.get("success"):
	display_image = result.get("display_image", None)
	new_sel_pix = result.get("selected_points", sel_pix)

	# Fix: Convert display_image from list back to numpy array if needed
	if isinstance(display_image, list):
	display_image = np.array(display_image, dtype=np.uint8)
	print(f"🔧 Converted display_image from list to numpy array: {display_image.shape}")

	return display_image, new_sel_pix
	else:
	print("Backend processing failed, using local fallback")
	# Fallback to local processing
	pass
	except Exception as e:
	print(f"Backend API call failed: {e}")

	# Check for specific gradio_client errors
	if "AppError" in str(type(e)):
	print("🔧 Backend Space has internal errors (AppError)")
	print("🔧 The backend Space code has bugs or configuration issues")
	print("🔧 Contact the Space owner to fix the backend implementation")
	elif "Could not fetch config" in str(e):
	print("🔧 Config fetch failed - possible Gradio version mismatch")
	print("🔧 Frontend and backend may be using incompatible Gradio versions")
	elif "timeout" in str(e).lower():
	print("🔧 Backend request timed out - Space might be overloaded")
	else:
	print(f"🔧 Unexpected error type: {type(e).__name__}")

	print("🔄 Showing error message instead of visualization...")
	# Fallback to local processing
	pass

	# Fallback: local processing with improved visualization
	print("Using local point selection with enhanced visualization...")

	# Parse original image state
	try:
	state_data = json.loads(original_img)
	video_path = state_data.get("video_path")
	except:
	video_path = None

	if video_path:
	# Re-extract frame and add point with mask visualization
	display_image = extract_first_frame(video_path)
	if display_image is not None:
	# Add point to the image with enhanced visualization
	x, y = evt.index[0], evt.index[1]
	color = (0, 255, 0) if point_type == 'positive_point' else (255, 0, 0)

	# Draw a larger, more visible point
	cv2.circle(display_image, (x, y), 8, color, -1)
	cv2.circle(display_image, (x, y), 12, (255, 255, 255), 2)

	# Add point to selected points list - fix logic to match local version
	new_sel_pix = sel_pix.copy() if sel_pix else []
	new_sel_pix.append([x, y, point_type])

	return display_image, new_sel_pix

	return None, []

	except Exception as e:
	print(f"Error in select_point: {e}")
	return None, []

	def reset_points(original_img: str, sel_pix):
	"""Reset points and restore original image"""
	if original_img is None:
	return None, []

	try:
	if BACKEND_AVAILABLE and backend_client:
	# Try to use backend API
	try:
	print("🔧 Calling backend reset points API...")

	# Call the unified API with reset_points function type
	result = backend_client.predict(
	"reset_points", # function_type
	None, # video file (not used for reset_points)
	original_img, # original_image_state
	sel_pix, # selected_points
	"positive_point", # point_type (not used for reset_points)
	0, # point_x (not used for reset_points)
	0, # point_y (not used for reset_points)
	50, # grid_size (not used for reset_points)
	756, # vo_points (not used for reset_points)
	3, # fps (not used for reset_points)
	api_name="/unified_api"
	)

	print(f"✅ Backend reset points API call successful!")
	print(f"🔧 Result: {result}")

	# Parse the result
	if isinstance(result, dict) and result.get("success"):
	display_image = result.get("display_image", None)
	new_sel_pix = result.get("selected_points", [])

	# Fix: Convert display_image from list back to numpy array if needed
	if isinstance(display_image, list):
	display_image = np.array(display_image, dtype=np.uint8)
	print(f"🔧 Converted display_image from list to numpy array: {display_image.shape}")

	return display_image, new_sel_pix
	else:
	print("Backend processing failed, using local fallback")
	# Fallback to local processing
	pass
	except Exception as e:
	print(f"Backend API call failed: {e}")
	# Fallback to local processing
	pass

	# Fallback: local processing
	print("Using local reset points...")

	# Parse original image state
	try:
	state_data = json.loads(original_img)
	video_path = state_data.get("video_path")
	except:
	video_path = None

	if video_path:
	# Re-extract original frame
	display_image = extract_first_frame(video_path)
	return display_image, []

	return None, []

	except Exception as e:
	print(f"Error in reset_points: {e}")
	return None, []

	gr.set_static_paths(paths=[Path.cwd().absolute()/"_viz"])

	def launch_viz(grid_size, vo_points, fps, original_image_state):
	"""Launch visualization with user-specific temp directory"""
	if original_image_state is None:
	return None, None, None

	try:
	if BACKEND_AVAILABLE and backend_client:
	# Try to use backend API
	try:
	print(f"🔧 Calling backend API with parameters: grid_size={grid_size}, vo_points={vo_points}, fps={fps}")
	print(f"🔧 Original image state type: {type(original_image_state)}")
	print(f"🔧 Original image state preview: {str(original_image_state)[:100]}...")

	# Validate and potentially fix the original_image_state format
	state_to_send = original_image_state

	# Check if this is a local processing state that needs to be converted
	try:
	if isinstance(original_image_state, str):
	parsed_state = json.loads(original_image_state)
	if "video_path" in parsed_state and "frame" in parsed_state:
	# This is a local processing state, we need to handle differently
	print("🔧 Detected local processing state, cannot use backend for tracking")
	print("🔧 Backend requires proper video upload state from backend API")
	# Fall through to local processing
	raise ValueError("Local state cannot be processed by backend")
	except json.JSONDecodeError:
	print("🔧 Invalid JSON state, cannot send to backend")
	raise ValueError("Invalid state format")

	# Call the unified API with run_tracker function type
	result = backend_client.predict(
	"run_tracker", # function_type
	None, # video file (not used for run_tracker)
	state_to_send, # original_image_state
	[], # selected_points (not used for run_tracker)
	"positive_point", # point_type (not used for run_tracker)
	0, # point_x (not used for run_tracker)
	0, # point_y (not used for run_tracker)
	grid_size, # grid_size
	vo_points, # vo_points
	fps, # fps
	api_name="/unified_api"
	)

	print(f"✅ Backend API call successful!")
	print(f"🔧 Result type: {type(result)}")
	print(f"🔧 Result: {result}")

	# Parse the result
	if isinstance(result, dict) and result.get("success"):
	viz_html = result.get("viz_html", "")
	track_video_path = result.get("track_video_path", "")
	track_video_content = result.get("track_video_content", None)
	track_video_filename = result.get("track_video_filename", "tracked_video.mp4")

	# Save HTML to _viz directory (like local version)
	viz_dir = './_viz'
	os.makedirs(viz_dir, exist_ok=True)
	random_path = f'./_viz/_{time.time()}.html'

	with open(random_path, 'w', encoding='utf-8') as f:
	f.write(viz_html)

	# Create iframe HTML to display the saved file
	# Create iframe HTML
	iframe_html = f"""
	<div style='border: 3px solid #667eea; border-radius: 10px; overflow: hidden; box-shadow: 0 8px 32px rgba(102, 126, 234, 0.3);'>
	<iframe id="viz_iframe" src="/gradio_api/file={random_path}" width="100%" height="950px" style="border:none;"></iframe>
	</div>
	"""
	print(f"💾 HTML saved to: {random_path}")
	print(f"📊 HTML content preview: {viz_html[:200]}...")

	# If we have base64 encoded video content, save it as a temporary file
	local_video_path = None
	if track_video_content:
	try:
	# Create a temporary file for the video
	temp_video_dir = "temp_frontend_videos"
	os.makedirs(temp_video_dir, exist_ok=True)

	# Generate unique filename to avoid conflicts
	timestamp = str(int(time.time() * 1000))
	local_video_path = os.path.join(temp_video_dir, f"{timestamp}_{track_video_filename}")

	# Decode base64 and save as video file
	video_bytes = base64.b64decode(track_video_content)
	with open(local_video_path, 'wb') as f:
	f.write(video_bytes)

	print(f"✅ Successfully saved tracking video to: {local_video_path}")
	print(f"🔧 Video file size: {len(video_bytes)} bytes")

	except Exception as e:
	print(f"❌ Failed to process tracking video: {e}")
	local_video_path = None
	else:
	print("⚠️ No tracking video content received from backend")

	# 返回iframe HTML、视频路径和HTML文件路径（用于下载）
	return iframe_html, local_video_path, random_path
	else:
	error_msg = result.get("error", "Unknown error") if isinstance(result, dict) else "Backend processing failed"
	print(f"❌ Backend processing failed: {error_msg}")
	# Fall through to error message
	pass
	except Exception as e:
	print(f"❌ Backend API call failed: {e}")
	print(f"🔧 Error type: {type(e)}")
	print(f"🔧 Error details: {str(e)}")

	# Check for specific gradio_client errors
	if "AppError" in str(type(e)):
	print("🔧 Backend Space has internal errors (AppError)")
	print("🔧 The backend Space code has bugs or configuration issues")
	print("🔧 Contact the Space owner to fix the backend implementation")
	elif "Could not fetch config" in str(e):
	print("🔧 Config fetch failed - possible Gradio version mismatch")
	print("🔧 Frontend and backend may be using incompatible Gradio versions")
	elif "timeout" in str(e).lower():
	print("🔧 Backend request timed out - Space might be overloaded")
	elif "Expecting value" in str(e):
	print("🔧 JSON parsing error in backend - state format mismatch")
	print("🔧 This happens when using local processing state with backend API")
	print("🔧 Please upload video again to use backend processing")
	else:
	print(f"🔧 Unexpected error type: {type(e).__name__}")

	print("🔄 Showing error message instead of visualization...")
	# Fall through to error message
	pass

	# Create an informative error message based on the state
	state_info = ""
	try:
	if isinstance(original_image_state, str):
	parsed_state = json.loads(original_image_state)
	if "video_path" in parsed_state:
	video_name = os.path.basename(parsed_state["video_path"])
	state_info = f"Video: {video_name}"
	except:
	state_info = "State format unknown"

	# Fallback: show message that backend is required
	error_message = f"""
	<div style='border: 3px solid #ff6b6b; border-radius: 10px; padding: 20px; background-color: #fff5f5;'>
	<h3 style='color: #d63031; margin-bottom: 15px;'>⚠️ Backend Processing Required</h3>
	<p style='color: #2d3436; line-height: 1.6;'>
	The tracking and visualization features require backend processing. The current setup is using local processing which is incompatible with the backend API.
	</p>
	<h4 style='color: #d63031; margin: 15px 0 10px 0;'>Solutions:</h4>
	<ul style='color: #2d3436; line-height: 1.6;'>
	<li><strong>Upload video again:</strong> This will properly initialize the backend state</li>
	<li><strong>Select points on the frame:</strong> Ensure you've clicked on the object to track</li>
	<li><strong>Check backend connection:</strong> Ensure the backend Space is running</li>
	<li><strong>Use compatible state:</strong> Avoid local processing mode</li>
	</ul>
	<div style='background-color: #f8f9fa; border-radius: 5px; padding: 10px; margin-top: 15px;'>
	<p style='color: #2d3436; font-weight: bold; margin: 0 0 5px 0;'>Debug Information:</p>
	<p style='color: #666; font-size: 12px; margin: 0;'>Backend Available: {BACKEND_AVAILABLE}</p>
	<p style='color: #666; font-size: 12px; margin: 0;'>Backend Client: {backend_client is not None}</p>
	<p style='color: #666; font-size: 12px; margin: 0;'>Backend URL: {BACKEND_SPACE_URL}</p>
	<p style='color: #666; font-size: 12px; margin: 0;'>State Info: {state_info}</p>
	<p style='color: #666; font-size: 12px; margin: 0;'>Processing Mode: {"Backend" if BACKEND_AVAILABLE else "Local (Limited)"}</p>
	</div>
	<div style='background-color: #e3f2fd; border-radius: 5px; padding: 10px; margin-top: 10px; border-left: 4px solid #2196f3;'>
	<p style='color: #1976d2; font-weight: bold; margin: 0 0 5px 0;'>💡 Quick Fix:</p>
	<p style='color: #1976d2; font-size: 13px; margin: 0;'>
	Try uploading your video again - this should properly initialize the backend state for tracking.
	</p>
	</div>
	</div>
	"""
	return error_message, None, None

	except Exception as e:
	print(f"Error in launch_viz: {e}")
	return None, None, None

	def clear_all():
	"""Clear all buffers and temporary files"""
	return (None, None, [],
	gr.update(value=50),
	gr.update(value=756),
	gr.update(value=3))

	def clear_all_with_download():
	"""Clear all buffers including download component"""
	return (None, None, [],
	gr.update(value=50),
	gr.update(value=756),
	gr.update(value=3),
	None) # HTML download component

	def update_tracker_model(model_name):
	"""Update tracker model (placeholder function)"""
	return

	def get_video_settings(video_name):
	"""Get video-specific settings based on video name"""
	video_settings = {
	"kiss": (45, 700, 10),
	"backpack": (40, 600, 2),
	"kitchen": (60, 800, 3),
	"pillow": (35, 500, 2),
	"handwave": (35, 500, 8),
	"hockey": (45, 700, 2),
	"drifting": (35, 1000, 6),
	"basketball": (45, 1500, 5),
	"ken_block_0": (45, 700, 2),
	"ego_kc1": (45, 500, 4),
	"vertical_place": (45, 500, 3),
	"ego_teaser": (45, 1200, 10),
	"robot_unitree": (45, 500, 4),
	"robot_3": (35, 400, 5),
	"teleop2": (45, 256, 7),
	"pusht": (45, 256, 10),
	"cinema_0": (45, 356, 5),
	"cinema_1": (45, 756, 3),
	}

	return video_settings.get(video_name, (50, 756, 3))

	def test_backend_connection():
	"""Test if backend is actually working"""
	global BACKEND_AVAILABLE
	if not backend_client:
	return False

	try:
	print("Testing backend connection with a simple call...")
	# Check if we have fns available
	if hasattr(backend_client, 'fns') and backend_client.fns:
	print("✅ Backend API functions are available")
	print(f"🔧 Available function indices: {list(backend_client.fns.keys())}")
	return True
	else:
	print("❌ Backend API functions not found")
	return False
	except Exception as e:
	print(f"❌ Backend connection test failed: {e}")
	return False

	def test_backend_api():
	"""Test specific backend API functions"""
	if not BACKEND_AVAILABLE or not backend_client:
	print("❌ Backend not available for testing")
	return False

	try:
	print("🧪 Testing backend API functions...")

	# Test if fns exist and show available indices
	if hasattr(backend_client, 'fns') and backend_client.fns:
	print(f"✅ Backend has {len(backend_client.fns)} functions available")
	for idx in backend_client.fns.keys():
	print(f"✅ Function {idx} is available")
	else:
	print("❌ No functions found in backend API")
	return False

	return True

	except Exception as e:
	print(f"❌ Backend API test failed: {e}")
	return False

	# Initialize the backend connection
	print("🚀 Initializing frontend application...")
	result = initialize_backend()

	# Test backend connection if available
	if result and BACKEND_AVAILABLE:
	print("✅ Backend connection successful!")
	else:
	print("❌ Backend connection failed!")

	# Create the Gradio interface
	print("🎨 Creating Gradio interface...")

	with gr.Blocks(
	theme=gr.themes.Soft(),
	title="SpatialTracker V2 - Frontend",
	css="""
	.gradio-container {
	max-width: 1200px !important;
	margin: auto !important;
	}
	.gr-button {
	margin: 5px;
	}
	.gr-form {
	background: white;
	border-radius: 10px;
	padding: 20px;
	box-shadow: 0 2px 10px rgba(0,0,0,0.1);
	}
	/* 固定视频上传组件高度 */
	.gr-video {
	height: 300px !important;
	min-height: 300px !important;
	max-height: 300px !important;
	}
	.gr-video video {
	height: 260px !important;
	max-height: 260px !important;
	object-fit: contain !important;
	background: #f8f9fa;
	}
	.gr-video .gr-video-player {
	height: 260px !important;
	max-height: 260px !important;
	}
	/* 水平滚动的示例视频样式 */
	.example-videos .gr-examples {
	overflow: visible !important;
	}
	.example-videos .gr-examples .gr-table-wrapper {
	overflow-x: auto !important;
	overflow-y: hidden !important;
	scrollbar-width: thin;
	scrollbar-color: #667eea #f1f1f1;
	}
	.example-videos .gr-examples .gr-table-wrapper::-webkit-scrollbar {
	height: 8px;
	}
	.example-videos .gr-examples .gr-table-wrapper::-webkit-scrollbar-track {
	background: #f1f1f1;
	border-radius: 4px;
	}
	.example-videos .gr-examples .gr-table-wrapper::-webkit-scrollbar-thumb {
	background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
	border-radius: 4px;
	}
	.example-videos .gr-examples .gr-table-wrapper::-webkit-scrollbar-thumb:hover {
	background: linear-gradient(135deg, #5a6fd8 0%, #6a4190 100%);
	}
	.example-videos .gr-examples .gr-table {
	display: flex !important;
	flex-wrap: nowrap !important;
	min-width: max-content !important;
	gap: 10px !important;
	}
	.example-videos .gr-examples .gr-table tbody {
	display: flex !important;
	flex-direction: row !important;
	flex-wrap: nowrap !important;
	gap: 10px !important;
	}
	.example-videos .gr-examples .gr-table tbody tr {
	display: flex !important;
	flex-direction: column !important;
	min-width: 120px !important;
	max-width: 120px !important;
	margin: 0 !important;
	background: white;
	border-radius: 8px;
	box-shadow: 0 2px 8px rgba(0,0,0,0.1);
	transition: all 0.3s ease;
	cursor: pointer;
	}
	.example-videos .gr-examples .gr-table tbody tr:hover {
	transform: translateY(-2px);
	box-shadow: 0 4px 12px rgba(102, 126, 234, 0.2);
	}
	.example-videos .gr-examples .gr-table tbody tr td {
	text-align: center !important;
	padding: 8px !important;
	border: none !important;
	}
	.example-videos .gr-examples .gr-table tbody tr td video {
	border-radius: 6px !important;
	width: 100% !important;
	height: auto !important;
	}
	.example-videos .gr-examples .gr-table tbody tr td:last-child {
	font-size: 12px !important;
	font-weight: 500 !important;
	color: #333 !important;
	padding-top: 4px !important;
	}

	/* 新的水平滚动示例视频样式 */
	.horizontal-examples .gr-examples {
	overflow: visible !important;
	}
	.horizontal-examples .gr-examples .gr-table-wrapper {
	overflow-x: auto !important;
	overflow-y: hidden !important;
	scrollbar-width: thin;
	scrollbar-color: #667eea #f1f1f1;
	padding: 10px 0;
	}
	.horizontal-examples .gr-examples .gr-table-wrapper::-webkit-scrollbar {
	height: 8px;
	}
	.horizontal-examples .gr-examples .gr-table-wrapper::-webkit-scrollbar-track {
	background: #f1f1f1;
	border-radius: 4px;
	}
	.horizontal-examples .gr-examples .gr-table-wrapper::-webkit-scrollbar-thumb {
	background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
	border-radius: 4px;
	}
	.horizontal-examples .gr-examples .gr-table-wrapper::-webkit-scrollbar-thumb:hover {
	background: linear-gradient(135deg, #5a6fd8 0%, #6a4190 100%);
	}
	.horizontal-examples .gr-examples .gr-table {
	display: flex !important;
	flex-wrap: nowrap !important;
	min-width: max-content !important;
	gap: 15px !important;
	padding-bottom: 10px;
	}
	.horizontal-examples .gr-examples .gr-table tbody {
	display: flex !important;
	flex-direction: row !important;
	flex-wrap: nowrap !important;
	gap: 15px !important;
	}
	.horizontal-examples .gr-examples .gr-table tbody tr {
	display: flex !important;
	flex-direction: column !important;
	min-width: 160px !important;
	max-width: 160px !important;
	margin: 0 !important;
	background: white;
	border-radius: 12px;
	box-shadow: 0 3px 12px rgba(0,0,0,0.12);
	transition: all 0.3s ease;
	cursor: pointer;
	overflow: hidden;
	}
	.horizontal-examples .gr-examples .gr-table tbody tr:hover {
	transform: translateY(-4px);
	box-shadow: 0 8px 20px rgba(102, 126, 234, 0.25);
	}
	.horizontal-examples .gr-examples .gr-table tbody tr td {
	text-align: center !important;
	padding: 0 !important;
	border: none !important;
	}
	.horizontal-examples .gr-examples .gr-table tbody tr td:first-child {
	padding: 0 !important;
	}
	.horizontal-examples .gr-examples .gr-table tbody tr td video {
	border-radius: 8px 8px 0 0 !important;
	width: 100% !important;
	height: 90px !important;
	object-fit: cover !important;
	}
	.horizontal-examples .gr-examples .gr-table tbody tr td:last-child {
	font-size: 11px !important;
	font-weight: 600 !important;
	color: #333 !important;
	padding: 8px 12px !important;
	background: linear-gradient(135deg, #f8f9ff 0%, #e6f3ff 100%);
	border-radius: 0 0 8px 8px;
	}
	"""
	) as demo:

	gr.Markdown("""
	# 🎯 SpatialTracker V2 - Frontend Interface

	Welcome to SpatialTracker V2! This interface allows you to track any pixels in 3D using our model.

	Instructions:
	1. Upload a video file or select from examples below
	2. Click on the object you want to track in the first frame
	3. Adjust tracking parameters if needed
	4. Click "Launch Visualization" to start tracking

	""")

	# Status indicator with more detailed information
	if BACKEND_AVAILABLE:
	status_text = "🟢 Backend Connected"
	status_details = f"Connected to: {BACKEND_SPACE_URL}"
	else:
	status_text = "🟡 Running in Standalone Mode"
	status_details = f"Backend unavailable: {BACKEND_SPACE_URL}"

	gr.Markdown(f"Status: {status_text}")
	gr.Markdown(f"<small style='color: #666;'>{status_details}</small>", elem_id="status-details")

	# Example videos section - moved to top
	with gr.Group(elem_classes=["example-videos"]):
	gr.Markdown("### 📂 Example Videos")
	gr.Markdown("Try these example videos to get started quickly:")

	# Custom horizontal scrolling video gallery
	gr.HTML("""
	<div style='background-color: #f8f9ff; border-radius: 8px; padding: 10px; margin: 10px 0; border-left: 4px solid #667eea;'>
	<p style='margin: 0; font-size: 13px; color: #666; display: flex; align-items: center; gap: 8px;'>
	<span style='font-size: 16px;'>💡</span>
	<strong>Tip:</strong> Scroll horizontally below to see all example videos
	</p>
	</div>
	""")

	# Define video_input here so it can be referenced in examples
	video_input = gr.Video(
	label="Upload Video or Select Example",
	format="mp4",
	height=300
	)

	# Create a horizontal scrolling container for the examples
	with gr.Group(elem_classes=["horizontal-examples"]):
	gr.Examples(
	examples=[
	["examples/kiss.mp4"],
	["examples/backpack.mp4"],
	["examples/pillow.mp4"],
	["examples/handwave.mp4"],
	["examples/hockey.mp4"],
	["examples/drifting.mp4"],
	["examples/ken_block_0.mp4"],
	["examples/kitchen.mp4"],
	["examples/basketball.mp4"],
	["examples/ego_kc1.mp4"],
	["examples/vertical_place.mp4"],
	["examples/ego_teaser.mp4"],
	["examples/robot_unitree.mp4"],
	["examples/robot_3.mp4"],
	["examples/teleop2.mp4"],
	["examples/pusht.mp4"],
	["examples/cinema_0.mp4"],
	["examples/cinema_1.mp4"],
	],
	inputs=video_input,
	label="🎬 Click on any example to load it",
	examples_per_page=16
	)

	with gr.Row():
	with gr.Column(scale=1):
	# Interactive frame display
	with gr.Group():
	gr.Markdown("### 🎯 Point Selection")
	gr.Markdown("Click on the object you want to track in the frame below:")

	interactive_frame = gr.Image(
	label="Click to select tracking points",
	type="numpy",
	interactive=True
	)

	with gr.Row():
	point_type = gr.Radio(
	choices=["positive_point", "negative_point"],
	value="positive_point",
	label="Point Type",
	info="Positive points indicate the object to track, negative points indicate areas to avoid"
	)

	with gr.Row():
	reset_points_btn = gr.Button("🔄 Reset Points", variant="secondary")
	clear_all_btn = gr.Button("🗑️ Clear All", variant="stop")

	with gr.Column(scale=1):
	# Tracking results
	with gr.Group():
	gr.Markdown("### 🎬 Tracking Results")
	tracking_result_video = gr.Video(
	label="Tracking Result Video",
	interactive=False,
	height=300
	)

	# HTML文件下载组件
	html_download = gr.File(
	label="📥 Download 3D Visualization HTML",
	interactive=False,
	visible=True
	)

	# Advanced settings section - changed to open=True
	with gr.Accordion("⚙️ Advanced Settings", open=True):
	gr.Markdown("Adjust these parameters to optimize tracking performance:")
	with gr.Row():
	grid_size = gr.Slider(
	minimum=10,
	maximum=100,
	step=10,
	value=50,
	label="Grid Size",
	info="Size of the tracking grid (larger = more detailed)"
	)
	vo_points = gr.Slider(
	minimum=100,
	maximum=2000,
	step=50,
	value=756,
	label="VO Points",
	info="Number of visual odometry points (more = better accuracy)"
	)
	fps = gr.Slider(
	minimum=1,
	maximum=30,
	step=1,
	value=3,
	label="FPS",
	info="Frames per second for processing (higher = smoother but slower)"
	)

	# Launch button
	with gr.Row():
	launch_btn = gr.Button("🚀 Start Tracking Now!", variant="primary", size="lg")

	# 3D Visualization - Make it larger and more prominent
	with gr.Row():
	with gr.Column():
	with gr.Group():
	gr.Markdown("### 🌐 3D Trajectory Visualization")
	gr.Markdown("Interactive 3D visualization of 3D point tracking and camera motion:")
	viz_html = gr.HTML(
	label="3D Trajectory Visualization",
	value="""
	<div style='border: 3px solid #667eea; border-radius: 15px; padding: 40px;
	background: linear-gradient(135deg, #f8f9ff 0%, #e6f3ff 100%);
	text-align: center; min-height: 600px; display: flex;
	flex-direction: column; justify-content: center; align-items: center;
	box-shadow: 0 8px 32px rgba(102, 126, 234, 0.2);'>
	<div style='font-size: 48px; margin-bottom: 20px;'>🌐</div>
	<h2 style='color: #667eea; margin-bottom: 15px; font-size: 28px; font-weight: 600;'>
	3D Trajectory Visualization
	</h2>
	<p style='color: #666; font-size: 16px; line-height: 1.6; max-width: 500px; margin-bottom: 25px;'>
	Perceive the world with Pixel-wise 3D Motions!
	</p>
	<div style='background: rgba(102, 126, 234, 0.1); border-radius: 25px;
	padding: 12px 24px; border: 2px solid rgba(102, 126, 234, 0.2);'>
	<span style='color: #667eea; font-weight: 600; font-size: 14px;'>
	⚡ Powered by SpatialTracker V2
	</span>
	</div>
	</div>
	""",
	elem_id="viz_container"
	)

	# Hidden state variables
	original_image_state = gr.State(None)
	selected_points = gr.State([])

	# Event handlers
	video_input.change(
	fn=handle_video_upload,
	inputs=[video_input],
	outputs=[original_image_state, interactive_frame, selected_points, grid_size, vo_points, fps]
	)

	interactive_frame.select(
	fn=select_point,
	inputs=[original_image_state, selected_points, point_type],
	outputs=[interactive_frame, selected_points]
	)

	reset_points_btn.click(
	fn=reset_points,
	inputs=[original_image_state, selected_points],
	outputs=[interactive_frame, selected_points]
	)

	clear_all_btn.click(
	fn=clear_all_with_download,
	outputs=[video_input, interactive_frame, selected_points, grid_size, vo_points, fps, html_download]
	)

	launch_btn.click(
	fn=launch_viz,
	inputs=[grid_size, vo_points, fps, original_image_state],
	outputs=[viz_html, tracking_result_video, html_download]
	)

	# GitHub Star Reminder - Added back!
	gr.HTML("""
	<div style='background: linear-gradient(135deg, #e8eaff 0%, #f0f2ff 100%);
	border-radius: 10px;
	padding: 15px;
	margin: 15px 0;
	box-shadow: 0 2px 8px rgba(102, 126, 234, 0.1);
	border: 1px solid rgba(102, 126, 234, 0.15);'>
	<div style='text-align: center; color: #4a5568;'>
	<h3 style='margin: 0 0 10px 0; font-size: 18px; text-shadow: none; color: #2d3748;'>
	⭐ Love SpatialTracker? Give us a Star! ⭐
	</h3>
	<p style='margin: 0 0 12px 0; font-size: 14px; opacity: 0.8; color: #4a5568;'>
	Help us grow by starring our repository on GitHub! 🚀
	</p>
	<div style='display: flex; justify-content: center;'>
	<a href="https://github.com/henry123-boy/SpaTrackerV2"
	target="_blank"
	style='display: inline-flex;
	align-items: center;
	gap: 6px;
	background: rgba(102, 126, 234, 0.1);
	color: #4a5568;
	padding: 8px 16px;
	border-radius: 20px;
	text-decoration: none;
	font-weight: bold;
	font-size: 14px;
	backdrop-filter: blur(5px);
	border: 1px solid rgba(102, 126, 234, 0.2);
	transition: all 0.3s ease;'
	onmouseover="this.style.background='rgba(102, 126, 234, 0.15)'; this.style.transform='translateY(-1px)'"
	onmouseout="this.style.background='rgba(102, 126, 234, 0.1)'; this.style.transform='translateY(0)'">
	<span style='font-size: 16px;'>⭐</span>
	Star on GitHub
	</a>
	</div>
	</div>
	</div>
	""")

	# Acknowledgment section for TAPIR3D - moved to the end
	gr.HTML("""
	<div style='background: linear-gradient(135deg, #fff8e1 0%, #fffbf0 100%);
	border-radius: 8px;
	padding: 12px;
	margin: 15px 0;
	box-shadow: 0 1px 4px rgba(255, 193, 7, 0.1);
	border: 1px solid rgba(255, 193, 7, 0.2);'>
	<div style='text-align: center; color: #5d4037;'>
	<h5 style='margin: 0 0 6px 0; font-size: 14px; color: #5d4037;'>
	Acknowledgments
	</h5>
	<p style='margin: 0; font-size: 12px; opacity: 0.9; color: #5d4037; line-height: 1.3;'>
	Our 3D visualizer is adapted from <strong>TAPIP3D</strong>. We thank the authors for their excellent work!
	</p>
	<div style='margin-top: 6px;'>
	<a href="https://github.com/zbw001/TAPIP3D"
	target="_blank"
	style='display: inline-flex;
	align-items: center;
	gap: 3px;
	background: rgba(255, 193, 7, 0.15);
	color: #5d4037;
	padding: 3px 10px;
	border-radius: 12px;
	text-decoration: none;
	font-weight: 500;
	font-size: 11px;
	border: 1px solid rgba(255, 193, 7, 0.3);
	transition: all 0.3s ease;'
	onmouseover="this.style.background='rgba(255, 193, 7, 0.2)'"
	onmouseout="this.style.background='rgba(255, 193, 7, 0.15)'">
	📚 TAPIP3D Repository
	</a>
	</div>
	</div>
	</div>
	""")

	# Launch the interface
	if __name__ == "__main__":
	print("🌟 Launching SpatialTracker V2 Frontend...")
	print(f"🔗 Backend Status: {'Connected' if BACKEND_AVAILABLE else 'Disconnected'}")

	demo.launch(
	server_name="0.0.0.0",
	server_port=7860,
	share=True,
	debug=True,
	show_error=True
	)