Spaces:

Yuxihenry
/

SpatialTrackerV2

Running on Zero

SpatialTrackerV2 / models /vggt /datasets /spring.py

xiaoyuxi

vggt_da

e43b66a 6 months ago

6.94 kB

	import os.path as osp
	import os
	import sys
	import json
	import itertools
	import time
	from collections import deque
	import torch
	import tqdm
	import concurrent.futures
	import psutil
	import io
	import cv2

	from PIL import Image
	import numpy as np
	from models.SpaTrackV2.models.utils import matrix_to_quaternion

	from models.SpaTrackV2.datasets.base_sfm_dataset import BaseSfMViewDataset
	from models.SpaTrackV2.models.utils import (
	camera_to_pose_encoding, pose_encoding_to_camera
	)
	from models.SpaTrackV2.models.camera_transform import normalize_cameras
	from models.SpaTrackV2.datasets.tartan_utils.traj_tf import ned2cam
	from models.SpaTrackV2.datasets.tartan_utils.cam_tf import pos_quats2SE_matrices
	from models.SpaTrackV2.utils.visualizer import Visualizer
	import glob

	from models.SpaTrackV2.datasets.dataset_util import (
	imread_cv2, npz_loader, read_video,npy_loader,resize_crop_video
	)

	class Spring(BaseSfMViewDataset):
	def __init__(self, args, ROOT, *kwargs):
	self.ROOT = ROOT
	super().__init__(args, *kwargs)
	scene_list = os.listdir(self.ROOT)
	self.scene_list = sorted([osp.join(self.ROOT, scene) for scene in scene_list])
	self.except_list = [6]
	self.scene_list = [self.scene_list[i] for i in range(len(self.scene_list)) if i not in self.except_list]
	def __len__(self):
	return len(self.scene_list)

	def _get_views(self, idx, resolution, rng):

	# scene root dir
	scene = self.scene_list[idx]
	img_len = len(glob.glob(os.path.join(scene, "rgb", "*.png")))

	try:
	sclae_num = int(np.random.uniform(2, 3))
	start = np.random.choice(np.arange(0, max(img_len - sclae_num*self.num_views, 1)))
	img_idxs = np.arange(start, start+sclae_num*self.num_views, sclae_num).clip(0, img_len-1)
	except Exception:
	img_idxs = np.arange(0, img_len, 1)

	frame_dir = os.path.join(scene, "rgb")
	depth_dir = os.path.join(scene, "depth")
	cam_dir = os.path.join(scene, "cam")
	rgbs = []
	depths = []
	extrs = []
	intrs = []
	# read the frames and depths
	for idx_i in img_idxs:
	idx_i = int(idx_i)
	img_path = os.path.join(frame_dir, f"{idx_i:04d}.png")
	depth_path = os.path.join(depth_dir, f"{idx_i:04d}.npy")
	# read depth and image
	img = imread_cv2(img_path)
	depth = npy_loader(depth_path)
	depth[~np.isfinite(depth)] = 0 # invalid
	rgbs.append(img)
	depths.append(depth)
	# read extrinsics and intrinsics
	cam = np.load(osp.join(cam_dir, f"{idx_i:04d}.npz"))
	extrs.append(cam["pose"])
	intrs.append(cam["intrinsics"])
	rgbs = np.stack(rgbs, axis=0) # (T, H, W, C)
	depths = np.stack(depths, axis=0) # (T, H, W)
	extrs = np.stack(extrs, axis=0) # (T, 4, 4)
	intrs = np.stack(intrs, axis=0) # (T, 3, 3)
	# convert BGR to RGB
	# rgbs = rgbs[..., [2, 1, 0]]
	depths = depths
	T, H, W, _ = rgbs.shape

	# convert them into numpy array
	intrinsics = intrs
	extrinsics = extrs

	# augumentation
	# change the sequence of the frames
	# create the track annotations
	# print("The shape of traj_3d is not correct")
	traj_2d = np.zeros((self.num_views, self.track_num, 2))
	traj_3d = np.zeros((self.num_views, self.track_num, 3))
	vis = np.zeros((self.num_views, self.track_num))

	poses = extrinsics


	# get tensor track
	traj_2d = torch.from_numpy(traj_2d)
	traj_3d = torch.from_numpy(traj_3d)
	vis = torch.from_numpy(vis)

	# crop and resize
	rgbs, depths, Intrs = resize_crop_video(rgbs, depths, intrinsics, resolution[0])

	# encode the camera poses
	Extrs = torch.from_numpy(poses)
	camera_poses = Extrs #NOTE: C2W

	focal0 = Intrs[:, 0, 0] / resolution[0]
	focal1 = Intrs[:, 1, 1] / resolution[0]
	focal = (focal0.unsqueeze(1)+focal1.unsqueeze(1))/2
	# first frame normalize
	camera_poses = torch.inverse(camera_poses[:1]) @ camera_poses
	T_center = camera_poses[:, :3, 3].mean(dim=0)
	Radius = (camera_poses[:, :3, 3].norm(dim=1).max())
	# if Radius < 1e-2:
	Radius = 1
	camera_poses[:, :3, 3] = (camera_poses[:, :3, 3])/Radius
	R = camera_poses[:, :3, :3]
	t = camera_poses[:, :3, 3]
	rot_vec = matrix_to_quaternion(R)
	pose_enc = torch.cat([t, rot_vec, focal], dim=1)
	# depth_cano = Radius*focal[:,:,None,None] / depths.clamp(min=1e-6)
	depth_cano = depths / Radius

	#TODO: DEBUG depth range
	# metric_depth = depth_cano.clone()
	# metric_depth[metric_depth == torch.inf] = 0
	# _depths = metric_depth[metric_depth > 0].reshape(-1)
	# q25 = torch.kthvalue(_depths, int(0.25 * len(_depths))).values
	# q75 = torch.kthvalue(_depths, int(0.75 * len(_depths))).values
	# iqr = q75 - q25
	# upper_bound = (q75 + 0.8iqr).clamp(min=1e-6, max=10q25)
	# _depth_roi = torch.tensor(
	# [1e-1, upper_bound.item()],
	# dtype=metric_depth.dtype,
	# device=metric_depth.device
	# )
	# mask_roi = (metric_depth > _depth_roi[0]) & (metric_depth < _depth_roi[1])
	# depth_cano = depth_cano * mask_roi.float()


	depth_cano[depth_cano==torch.nan] = 0

	traj_3d = torch.zeros(self.num_views, self.track_num, 3)
	vis = torch.zeros(self.num_views, self.track_num)

	syn_real = torch.tensor([1])
	metric_rel = torch.tensor([1])
	data_dir = scene
	views = dict(
	rgbs=rgbs,
	depths=depth_cano,
	pose_enc=pose_enc,
	traj_mat=camera_poses,
	intrs=Intrs,
	traj_3d=traj_3d,
	vis=vis,
	syn_real=syn_real,
	metric_rel=metric_rel,
	data_dir=data_dir
	)

	return views


	if __name__ == "__main__":
	from models.SpaTrackV2.datasets.base_sfm_dataset import view_name
	from functools import partial
	import shutil
	DATA_DIR = "/mnt/bn/xyxdata/data/4d_data/spring/processed"

	dataset = Spring(split='train', ROOT=DATA_DIR,
	resolution=518, aug_crop=16, num_views=32)

	rng = np.random.default_rng(seed=0)
	data_ret = dataset._get_views(6,(518,518),rng)

	from models.SpaTrackV2.datasets.vis3d_check import vis4d
	vis4d(data_ret["rgbs"], data_ret["depths"],
	data_ret["traj_mat"], data_ret["intrs"], track3d=data_ret["traj_3d"],
	workspace="/mnt/bn/xyxdata/home/codes/my_projs/SpaTrack2/viser_result/test")
	import pdb; pdb.set_trace()