Spaces:

justinkay
/

coda

Running

coda / cleanup_deleted_images.py

justinkay

Cleanup script

e7063f6 about 2 months ago

2.99 kB

	#!/usr/bin/env python3
	"""
	Script to clean up references to deleted images from:
	- iwildcam_demo_annotations.json
	- iwildcam_demo.pt
	- iwildcam_demo_labels.pt
	- images.txt
	"""

	import json
	import os
	import torch

	# Get list of existing images
	image_dir = "iwildcam_demo_images"
	existing_images = set(os.listdir(image_dir))
	print(f"Found {len(existing_images)} existing images")

	# Read images.txt to get current order
	with open("images.txt", "r") as f:
	current_images = [line.strip() for line in f]
	print(f"Found {len(current_images)} images in images.txt")

	# Identify which images still exist and their new indices
	valid_images = []
	valid_indices = []
	for idx, img in enumerate(current_images):
	if img in existing_images:
	valid_images.append(img)
	valid_indices.append(idx)

	print(f"Keeping {len(valid_images)} images")
	print(f"Removing {len(current_images) - len(valid_images)} images")

	# Update images.txt
	with open("images.txt", "w") as f:
	for img in valid_images:
	f.write(f"{img}\n")
	print("Updated images.txt")

	# Load and filter .pt files
	demo_tensors = torch.load("iwildcam_demo.pt")
	demo_labels = torch.load("iwildcam_demo_labels.pt")

	print(f"Original iwildcam_demo.pt shape: {demo_tensors.shape}")
	print(f"Original iwildcam_demo_labels.pt shape: {demo_labels.shape}")

	# Filter tensors to only keep valid indices
	# demo_tensors has shape [3, N, 5] where N is number of images
	# We need to filter along dimension 1
	filtered_demo = demo_tensors[:, valid_indices, :]
	filtered_labels = demo_labels[valid_indices]

	# Save filtered tensors
	torch.save(filtered_demo, "iwildcam_demo.pt")
	torch.save(filtered_labels, "iwildcam_demo_labels.pt")
	print(f"Updated iwildcam_demo.pt: {demo_tensors.shape} -> {filtered_demo.shape}")
	print(f"Updated iwildcam_demo_labels.pt: {demo_labels.shape} -> {filtered_labels.shape}")

	# Load and filter JSON annotations
	with open("iwildcam_demo_annotations.json", "r") as f:
	annotations = json.load(f)

	# Filter images in JSON
	if "images" in annotations:
	original_count = len(annotations["images"])
	annotations["images"] = [
	img for img in annotations["images"]
	if img["file_name"] in existing_images
	]
	print(f"Updated JSON images: {original_count} -> {len(annotations['images'])}")

	# Filter annotations in JSON (if they reference image_id)
	if "annotations" in annotations:
	# Build mapping of file_name to image_id for existing images
	valid_image_ids = {img["id"] for img in annotations["images"]}
	original_count = len(annotations["annotations"])
	annotations["annotations"] = [
	ann for ann in annotations["annotations"]
	if ann["image_id"] in valid_image_ids
	]
	print(f"Updated JSON annotations: {original_count} -> {len(annotations['annotations'])}")

	# Save updated JSON
	with open("iwildcam_demo_annotations.json", "w") as f:
	json.dump(annotations, f, indent=2)
	print("Updated iwildcam_demo_annotations.json")

	print("\nCleanup complete!")